1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // MachineScheduler schedules machine instructions after phi elimination. It 11 // preserves LiveIntervals so it can be invoked before register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "misched" 16 17 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 18 #include "llvm/CodeGen/MachineScheduler.h" 19 #include "llvm/CodeGen/Passes.h" 20 #include "llvm/CodeGen/RegisterClassInfo.h" 21 #include "llvm/CodeGen/ScheduleDFS.h" 22 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include "llvm/ADT/OwningPtr.h" 29 #include "llvm/ADT/PriorityQueue.h" 30 31 #include <queue> 32 33 using namespace llvm; 34 35 namespace llvm { 36 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 37 cl::desc("Force top-down list scheduling")); 38 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 39 cl::desc("Force bottom-up list scheduling")); 40 } 41 42 #ifndef NDEBUG 43 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 44 cl::desc("Pop up a window to show MISched dags after they are processed")); 45 46 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 47 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 48 #else 49 static bool ViewMISchedDAGs = false; 50 #endif // NDEBUG 51 52 // Threshold to very roughly model an out-of-order processor's instruction 53 // buffers. If the actual value of this threshold matters much in practice, then 54 // it can be specified by the machine model. For now, it's an experimental 55 // tuning knob to determine when and if it matters. 56 static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, 57 cl::desc("Allow expected latency to exceed the critical path by N cycles " 58 "before attempting to balance ILP"), 59 cl::init(10U)); 60 61 // Experimental heuristics 62 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, 63 cl::desc("Enable load clustering."), cl::init(true)); 64 65 // Experimental heuristics 66 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, 67 cl::desc("Enable scheduling for macro fusion."), cl::init(true)); 68 69 //===----------------------------------------------------------------------===// 70 // Machine Instruction Scheduling Pass and Registry 71 //===----------------------------------------------------------------------===// 72 73 MachineSchedContext::MachineSchedContext(): 74 MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { 75 RegClassInfo = new RegisterClassInfo(); 76 } 77 78 MachineSchedContext::~MachineSchedContext() { 79 delete RegClassInfo; 80 } 81 82 namespace { 83 /// MachineScheduler runs after coalescing and before register allocation. 84 class MachineScheduler : public MachineSchedContext, 85 public MachineFunctionPass { 86 public: 87 MachineScheduler(); 88 89 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 90 91 virtual void releaseMemory() {} 92 93 virtual bool runOnMachineFunction(MachineFunction&); 94 95 virtual void print(raw_ostream &O, const Module* = 0) const; 96 97 static char ID; // Class identification, replacement for typeinfo 98 }; 99 } // namespace 100 101 char MachineScheduler::ID = 0; 102 103 char &llvm::MachineSchedulerID = MachineScheduler::ID; 104 105 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 106 "Machine Instruction Scheduler", false, false) 107 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 108 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 109 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 110 INITIALIZE_PASS_END(MachineScheduler, "misched", 111 "Machine Instruction Scheduler", false, false) 112 113 MachineScheduler::MachineScheduler() 114 : MachineFunctionPass(ID) { 115 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 116 } 117 118 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 119 AU.setPreservesCFG(); 120 AU.addRequiredID(MachineDominatorsID); 121 AU.addRequired<MachineLoopInfo>(); 122 AU.addRequired<AliasAnalysis>(); 123 AU.addRequired<TargetPassConfig>(); 124 AU.addRequired<SlotIndexes>(); 125 AU.addPreserved<SlotIndexes>(); 126 AU.addRequired<LiveIntervals>(); 127 AU.addPreserved<LiveIntervals>(); 128 MachineFunctionPass::getAnalysisUsage(AU); 129 } 130 131 MachinePassRegistry MachineSchedRegistry::Registry; 132 133 /// A dummy default scheduler factory indicates whether the scheduler 134 /// is overridden on the command line. 135 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 136 return 0; 137 } 138 139 /// MachineSchedOpt allows command line selection of the scheduler. 140 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 141 RegisterPassParser<MachineSchedRegistry> > 142 MachineSchedOpt("misched", 143 cl::init(&useDefaultMachineSched), cl::Hidden, 144 cl::desc("Machine instruction scheduler to use")); 145 146 static MachineSchedRegistry 147 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 148 useDefaultMachineSched); 149 150 /// Forward declare the standard machine scheduler. This will be used as the 151 /// default scheduler if the target does not set a default. 152 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); 153 154 155 /// Decrement this iterator until reaching the top or a non-debug instr. 156 static MachineBasicBlock::iterator 157 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { 158 assert(I != Beg && "reached the top of the region, cannot decrement"); 159 while (--I != Beg) { 160 if (!I->isDebugValue()) 161 break; 162 } 163 return I; 164 } 165 166 /// If this iterator is a debug value, increment until reaching the End or a 167 /// non-debug instruction. 168 static MachineBasicBlock::iterator 169 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { 170 for(; I != End; ++I) { 171 if (!I->isDebugValue()) 172 break; 173 } 174 return I; 175 } 176 177 /// Top-level MachineScheduler pass driver. 178 /// 179 /// Visit blocks in function order. Divide each block into scheduling regions 180 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 181 /// consistent with the DAG builder, which traverses the interior of the 182 /// scheduling regions bottom-up. 183 /// 184 /// This design avoids exposing scheduling boundaries to the DAG builder, 185 /// simplifying the DAG builder's support for "special" target instructions. 186 /// At the same time the design allows target schedulers to operate across 187 /// scheduling boundaries, for example to bundle the boudary instructions 188 /// without reordering them. This creates complexity, because the target 189 /// scheduler must update the RegionBegin and RegionEnd positions cached by 190 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 191 /// design would be to split blocks at scheduling boundaries, but LLVM has a 192 /// general bias against block splitting purely for implementation simplicity. 193 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 194 DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 195 196 // Initialize the context of the pass. 197 MF = &mf; 198 MLI = &getAnalysis<MachineLoopInfo>(); 199 MDT = &getAnalysis<MachineDominatorTree>(); 200 PassConfig = &getAnalysis<TargetPassConfig>(); 201 AA = &getAnalysis<AliasAnalysis>(); 202 203 LIS = &getAnalysis<LiveIntervals>(); 204 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 205 206 RegClassInfo->runOnMachineFunction(*MF); 207 208 // Select the scheduler, or set the default. 209 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 210 if (Ctor == useDefaultMachineSched) { 211 // Get the default scheduler set by the target. 212 Ctor = MachineSchedRegistry::getDefault(); 213 if (!Ctor) { 214 Ctor = createConvergingSched; 215 MachineSchedRegistry::setDefault(Ctor); 216 } 217 } 218 // Instantiate the selected scheduler. 219 OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); 220 221 // Visit all machine basic blocks. 222 // 223 // TODO: Visit blocks in global postorder or postorder within the bottom-up 224 // loop tree. Then we can optionally compute global RegPressure. 225 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 226 MBB != MBBEnd; ++MBB) { 227 228 Scheduler->startBlock(MBB); 229 230 // Break the block into scheduling regions [I, RegionEnd), and schedule each 231 // region as soon as it is discovered. RegionEnd points the scheduling 232 // boundary at the bottom of the region. The DAG does not include RegionEnd, 233 // but the region does (i.e. the next RegionEnd is above the previous 234 // RegionBegin). If the current block has no terminator then RegionEnd == 235 // MBB->end() for the bottom region. 236 // 237 // The Scheduler may insert instructions during either schedule() or 238 // exitRegion(), even for empty regions. So the local iterators 'I' and 239 // 'RegionEnd' are invalid across these calls. 240 unsigned RemainingInstrs = MBB->size(); 241 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 242 RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { 243 244 // Avoid decrementing RegionEnd for blocks with no terminator. 245 if (RegionEnd != MBB->end() 246 || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { 247 --RegionEnd; 248 // Count the boundary instruction. 249 --RemainingInstrs; 250 } 251 252 // The next region starts above the previous region. Look backward in the 253 // instruction stream until we find the nearest boundary. 254 MachineBasicBlock::iterator I = RegionEnd; 255 for(;I != MBB->begin(); --I, --RemainingInstrs) { 256 if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) 257 break; 258 } 259 // Notify the scheduler of the region, even if we may skip scheduling 260 // it. Perhaps it still needs to be bundled. 261 Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); 262 263 // Skip empty scheduling regions (0 or 1 schedulable instructions). 264 if (I == RegionEnd || I == llvm::prior(RegionEnd)) { 265 // Close the current region. Bundle the terminator if needed. 266 // This invalidates 'RegionEnd' and 'I'. 267 Scheduler->exitRegion(); 268 continue; 269 } 270 DEBUG(dbgs() << "********** MI Scheduling **********\n"); 271 DEBUG(dbgs() << MF->getName() 272 << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; 273 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 274 else dbgs() << "End"; 275 dbgs() << " Remaining: " << RemainingInstrs << "\n"); 276 277 // Schedule a region: possibly reorder instructions. 278 // This invalidates 'RegionEnd' and 'I'. 279 Scheduler->schedule(); 280 281 // Close the current region. 282 Scheduler->exitRegion(); 283 284 // Scheduling has invalidated the current iterator 'I'. Ask the 285 // scheduler for the top of it's scheduled region. 286 RegionEnd = Scheduler->begin(); 287 } 288 assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 289 Scheduler->finishBlock(); 290 } 291 Scheduler->finalizeSchedule(); 292 DEBUG(LIS->print(dbgs())); 293 return true; 294 } 295 296 void MachineScheduler::print(raw_ostream &O, const Module* m) const { 297 // unimplemented 298 } 299 300 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 301 void ReadyQueue::dump() { 302 dbgs() << Name << ": "; 303 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 304 dbgs() << Queue[i]->NodeNum << " "; 305 dbgs() << "\n"; 306 } 307 #endif 308 309 //===----------------------------------------------------------------------===// 310 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals 311 // preservation. 312 //===----------------------------------------------------------------------===// 313 314 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { 315 if (SuccSU != &ExitSU) { 316 // Do not use WillCreateCycle, it assumes SD scheduling. 317 // If Pred is reachable from Succ, then the edge creates a cycle. 318 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) 319 return false; 320 Topo.AddPred(SuccSU, PredDep.getSUnit()); 321 } 322 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); 323 // Return true regardless of whether a new edge needed to be inserted. 324 return true; 325 } 326 327 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 328 /// NumPredsLeft reaches zero, release the successor node. 329 /// 330 /// FIXME: Adjust SuccSU height based on MinLatency. 331 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 332 SUnit *SuccSU = SuccEdge->getSUnit(); 333 334 if (SuccEdge->isWeak()) { 335 --SuccSU->WeakPredsLeft; 336 if (SuccEdge->isCluster()) 337 NextClusterSucc = SuccSU; 338 return; 339 } 340 #ifndef NDEBUG 341 if (SuccSU->NumPredsLeft == 0) { 342 dbgs() << "*** Scheduling failed! ***\n"; 343 SuccSU->dump(this); 344 dbgs() << " has been released too many times!\n"; 345 llvm_unreachable(0); 346 } 347 #endif 348 --SuccSU->NumPredsLeft; 349 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 350 SchedImpl->releaseTopNode(SuccSU); 351 } 352 353 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 354 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 355 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 356 I != E; ++I) { 357 releaseSucc(SU, &*I); 358 } 359 } 360 361 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 362 /// NumSuccsLeft reaches zero, release the predecessor node. 363 /// 364 /// FIXME: Adjust PredSU height based on MinLatency. 365 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 366 SUnit *PredSU = PredEdge->getSUnit(); 367 368 if (PredEdge->isWeak()) { 369 --PredSU->WeakSuccsLeft; 370 if (PredEdge->isCluster()) 371 NextClusterPred = PredSU; 372 return; 373 } 374 #ifndef NDEBUG 375 if (PredSU->NumSuccsLeft == 0) { 376 dbgs() << "*** Scheduling failed! ***\n"; 377 PredSU->dump(this); 378 dbgs() << " has been released too many times!\n"; 379 llvm_unreachable(0); 380 } 381 #endif 382 --PredSU->NumSuccsLeft; 383 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 384 SchedImpl->releaseBottomNode(PredSU); 385 } 386 387 /// releasePredecessors - Call releasePred on each of SU's predecessors. 388 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 389 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 390 I != E; ++I) { 391 releasePred(SU, &*I); 392 } 393 } 394 395 void ScheduleDAGMI::moveInstruction(MachineInstr *MI, 396 MachineBasicBlock::iterator InsertPos) { 397 // Advance RegionBegin if the first instruction moves down. 398 if (&*RegionBegin == MI) 399 ++RegionBegin; 400 401 // Update the instruction stream. 402 BB->splice(InsertPos, BB, MI); 403 404 // Update LiveIntervals 405 LIS->handleMove(MI, /*UpdateFlags=*/true); 406 407 // Recede RegionBegin if an instruction moves above the first. 408 if (RegionBegin == InsertPos) 409 RegionBegin = MI; 410 } 411 412 bool ScheduleDAGMI::checkSchedLimit() { 413 #ifndef NDEBUG 414 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 415 CurrentTop = CurrentBottom; 416 return false; 417 } 418 ++NumInstrsScheduled; 419 #endif 420 return true; 421 } 422 423 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 424 /// crossing a scheduling boundary. [begin, end) includes all instructions in 425 /// the region, including the boundary itself and single-instruction regions 426 /// that don't get scheduled. 427 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 428 MachineBasicBlock::iterator begin, 429 MachineBasicBlock::iterator end, 430 unsigned endcount) 431 { 432 ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); 433 434 // For convenience remember the end of the liveness region. 435 LiveRegionEnd = 436 (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); 437 } 438 439 // Setup the register pressure trackers for the top scheduled top and bottom 440 // scheduled regions. 441 void ScheduleDAGMI::initRegPressure() { 442 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 443 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 444 445 // Close the RPTracker to finalize live ins. 446 RPTracker.closeRegion(); 447 448 DEBUG(RPTracker.getPressure().dump(TRI)); 449 450 // Initialize the live ins and live outs. 451 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 452 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 453 454 // Close one end of the tracker so we can call 455 // getMaxUpward/DownwardPressureDelta before advancing across any 456 // instructions. This converts currently live regs into live ins/outs. 457 TopRPTracker.closeTop(); 458 BotRPTracker.closeBottom(); 459 460 // Account for liveness generated by the region boundary. 461 if (LiveRegionEnd != RegionEnd) 462 BotRPTracker.recede(); 463 464 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 465 466 // Cache the list of excess pressure sets in this region. This will also track 467 // the max pressure in the scheduled code for these sets. 468 RegionCriticalPSets.clear(); 469 std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; 470 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 471 unsigned Limit = TRI->getRegPressureSetLimit(i); 472 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 473 << "Limit " << Limit 474 << " Actual " << RegionPressure[i] << "\n"); 475 if (RegionPressure[i] > Limit) 476 RegionCriticalPSets.push_back(PressureElement(i, 0)); 477 } 478 DEBUG(dbgs() << "Excess PSets: "; 479 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 480 dbgs() << TRI->getRegPressureSetName( 481 RegionCriticalPSets[i].PSetID) << " "; 482 dbgs() << "\n"); 483 } 484 485 // FIXME: When the pressure tracker deals in pressure differences then we won't 486 // iterate over all RegionCriticalPSets[i]. 487 void ScheduleDAGMI:: 488 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { 489 for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { 490 unsigned ID = RegionCriticalPSets[i].PSetID; 491 int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; 492 if ((int)NewMaxPressure[ID] > MaxUnits) 493 MaxUnits = NewMaxPressure[ID]; 494 } 495 } 496 497 /// schedule - Called back from MachineScheduler::runOnMachineFunction 498 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 499 /// only includes instructions that have DAG nodes, not scheduling boundaries. 500 /// 501 /// This is a skeletal driver, with all the functionality pushed into helpers, 502 /// so that it can be easilly extended by experimental schedulers. Generally, 503 /// implementing MachineSchedStrategy should be sufficient to implement a new 504 /// scheduling algorithm. However, if a scheduler further subclasses 505 /// ScheduleDAGMI then it will want to override this virtual method in order to 506 /// update any specialized state. 507 void ScheduleDAGMI::schedule() { 508 buildDAGWithRegPressure(); 509 510 Topo.InitDAGTopologicalSorting(); 511 512 postprocessDAG(); 513 514 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 515 SUnits[su].dumpAll(this)); 516 517 if (ViewMISchedDAGs) viewGraph(); 518 519 initQueues(); 520 521 bool IsTopNode = false; 522 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 523 assert(!SU->isScheduled && "Node already scheduled"); 524 if (!checkSchedLimit()) 525 break; 526 527 scheduleMI(SU, IsTopNode); 528 529 updateQueues(SU, IsTopNode); 530 } 531 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 532 533 placeDebugValues(); 534 535 DEBUG({ 536 unsigned BBNum = begin()->getParent()->getNumber(); 537 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 538 dumpSchedule(); 539 dbgs() << '\n'; 540 }); 541 } 542 543 /// Build the DAG and setup three register pressure trackers. 544 void ScheduleDAGMI::buildDAGWithRegPressure() { 545 // Initialize the register pressure tracker used by buildSchedGraph. 546 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 547 548 // Account for liveness generate by the region boundary. 549 if (LiveRegionEnd != RegionEnd) 550 RPTracker.recede(); 551 552 // Build the DAG, and compute current register pressure. 553 buildSchedGraph(AA, &RPTracker); 554 if (ViewMISchedDAGs) viewGraph(); 555 556 // Initialize top/bottom trackers after computing region pressure. 557 initRegPressure(); 558 } 559 560 /// Apply each ScheduleDAGMutation step in order. 561 void ScheduleDAGMI::postprocessDAG() { 562 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 563 Mutations[i]->apply(this); 564 } 565 } 566 567 // Release all DAG roots for scheduling. 568 // 569 // Nodes with unreleased weak edges can still be roots. 570 void ScheduleDAGMI::releaseRoots() { 571 SmallVector<SUnit*, 16> BotRoots; 572 573 for (std::vector<SUnit>::iterator 574 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 575 SUnit *SU = &(*I); 576 // A SUnit is ready to top schedule if it has no predecessors. 577 if (!I->NumPredsLeft && SU != &EntrySU) 578 SchedImpl->releaseTopNode(SU); 579 // A SUnit is ready to bottom schedule if it has no successors. 580 if (!I->NumSuccsLeft && SU != &ExitSU) 581 BotRoots.push_back(SU); 582 } 583 // Release bottom roots in reverse order so the higher priority nodes appear 584 // first. This is more natural and slightly more efficient. 585 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 586 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) 587 SchedImpl->releaseBottomNode(*I); 588 } 589 590 /// Identify DAG roots and setup scheduler queues. 591 void ScheduleDAGMI::initQueues() { 592 NextClusterSucc = NULL; 593 NextClusterPred = NULL; 594 595 // Initialize the strategy before modifying the DAG. 596 SchedImpl->initialize(this); 597 598 // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 599 releaseRoots(); 600 601 releaseSuccessors(&EntrySU); 602 releasePredecessors(&ExitSU); 603 604 SchedImpl->registerRoots(); 605 606 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 607 CurrentBottom = RegionEnd; 608 } 609 610 /// Move an instruction and update register pressure. 611 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { 612 // Move the instruction to its new location in the instruction stream. 613 MachineInstr *MI = SU->getInstr(); 614 615 if (IsTopNode) { 616 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 617 if (&*CurrentTop == MI) 618 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 619 else { 620 moveInstruction(MI, CurrentTop); 621 TopRPTracker.setPos(MI); 622 } 623 624 // Update top scheduled pressure. 625 TopRPTracker.advance(); 626 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 627 updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); 628 } 629 else { 630 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 631 MachineBasicBlock::iterator priorII = 632 priorNonDebug(CurrentBottom, CurrentTop); 633 if (&*priorII == MI) 634 CurrentBottom = priorII; 635 else { 636 if (&*CurrentTop == MI) { 637 CurrentTop = nextIfDebug(++CurrentTop, priorII); 638 TopRPTracker.setPos(CurrentTop); 639 } 640 moveInstruction(MI, CurrentBottom); 641 CurrentBottom = MI; 642 } 643 // Update bottom scheduled pressure. 644 BotRPTracker.recede(); 645 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 646 updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); 647 } 648 } 649 650 /// Update scheduler queues after scheduling an instruction. 651 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 652 // Release dependent instructions for scheduling. 653 if (IsTopNode) 654 releaseSuccessors(SU); 655 else 656 releasePredecessors(SU); 657 658 SU->isScheduled = true; 659 660 // Notify the scheduling strategy after updating the DAG. 661 SchedImpl->schedNode(SU, IsTopNode); 662 } 663 664 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 665 void ScheduleDAGMI::placeDebugValues() { 666 // If first instruction was a DBG_VALUE then put it back. 667 if (FirstDbgValue) { 668 BB->splice(RegionBegin, BB, FirstDbgValue); 669 RegionBegin = FirstDbgValue; 670 } 671 672 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 673 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 674 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 675 MachineInstr *DbgValue = P.first; 676 MachineBasicBlock::iterator OrigPrevMI = P.second; 677 if (&*RegionBegin == DbgValue) 678 ++RegionBegin; 679 BB->splice(++OrigPrevMI, BB, DbgValue); 680 if (OrigPrevMI == llvm::prior(RegionEnd)) 681 RegionEnd = DbgValue; 682 } 683 DbgValues.clear(); 684 FirstDbgValue = NULL; 685 } 686 687 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 688 void ScheduleDAGMI::dumpSchedule() const { 689 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 690 if (SUnit *SU = getSUnit(&(*MI))) 691 SU->dump(this); 692 else 693 dbgs() << "Missing SUnit\n"; 694 } 695 } 696 #endif 697 698 //===----------------------------------------------------------------------===// 699 // LoadClusterMutation - DAG post-processing to cluster loads. 700 //===----------------------------------------------------------------------===// 701 702 namespace { 703 /// \brief Post-process the DAG to create cluster edges between neighboring 704 /// loads. 705 class LoadClusterMutation : public ScheduleDAGMutation { 706 struct LoadInfo { 707 SUnit *SU; 708 unsigned BaseReg; 709 unsigned Offset; 710 LoadInfo(SUnit *su, unsigned reg, unsigned ofs) 711 : SU(su), BaseReg(reg), Offset(ofs) {} 712 }; 713 static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, 714 const LoadClusterMutation::LoadInfo &RHS); 715 716 const TargetInstrInfo *TII; 717 const TargetRegisterInfo *TRI; 718 public: 719 LoadClusterMutation(const TargetInstrInfo *tii, 720 const TargetRegisterInfo *tri) 721 : TII(tii), TRI(tri) {} 722 723 virtual void apply(ScheduleDAGMI *DAG); 724 protected: 725 void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); 726 }; 727 } // anonymous 728 729 bool LoadClusterMutation::LoadInfoLess( 730 const LoadClusterMutation::LoadInfo &LHS, 731 const LoadClusterMutation::LoadInfo &RHS) { 732 if (LHS.BaseReg != RHS.BaseReg) 733 return LHS.BaseReg < RHS.BaseReg; 734 return LHS.Offset < RHS.Offset; 735 } 736 737 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, 738 ScheduleDAGMI *DAG) { 739 SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; 740 for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { 741 SUnit *SU = Loads[Idx]; 742 unsigned BaseReg; 743 unsigned Offset; 744 if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) 745 LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); 746 } 747 if (LoadRecords.size() < 2) 748 return; 749 std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); 750 unsigned ClusterLength = 1; 751 for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { 752 if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { 753 ClusterLength = 1; 754 continue; 755 } 756 757 SUnit *SUa = LoadRecords[Idx].SU; 758 SUnit *SUb = LoadRecords[Idx+1].SU; 759 if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) 760 && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { 761 762 DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" 763 << SUb->NodeNum << ")\n"); 764 // Copy successor edges from SUa to SUb. Interleaving computation 765 // dependent on SUa can prevent load combining due to register reuse. 766 // Predecessor edges do not need to be copied from SUb to SUa since nearby 767 // loads should have effectively the same inputs. 768 for (SUnit::const_succ_iterator 769 SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { 770 if (SI->getSUnit() == SUb) 771 continue; 772 DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); 773 DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); 774 } 775 ++ClusterLength; 776 } 777 else 778 ClusterLength = 1; 779 } 780 } 781 782 /// \brief Callback from DAG postProcessing to create cluster edges for loads. 783 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { 784 // Map DAG NodeNum to store chain ID. 785 DenseMap<unsigned, unsigned> StoreChainIDs; 786 // Map each store chain to a set of dependent loads. 787 SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; 788 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 789 SUnit *SU = &DAG->SUnits[Idx]; 790 if (!SU->getInstr()->mayLoad()) 791 continue; 792 unsigned ChainPredID = DAG->SUnits.size(); 793 for (SUnit::const_pred_iterator 794 PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 795 if (PI->isCtrl()) { 796 ChainPredID = PI->getSUnit()->NodeNum; 797 break; 798 } 799 } 800 // Check if this chain-like pred has been seen 801 // before. ChainPredID==MaxNodeID for loads at the top of the schedule. 802 unsigned NumChains = StoreChainDependents.size(); 803 std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = 804 StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); 805 if (Result.second) 806 StoreChainDependents.resize(NumChains + 1); 807 StoreChainDependents[Result.first->second].push_back(SU); 808 } 809 // Iterate over the store chains. 810 for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) 811 clusterNeighboringLoads(StoreChainDependents[Idx], DAG); 812 } 813 814 //===----------------------------------------------------------------------===// 815 // MacroFusion - DAG post-processing to encourage fusion of macro ops. 816 //===----------------------------------------------------------------------===// 817 818 namespace { 819 /// \brief Post-process the DAG to create cluster edges between instructions 820 /// that may be fused by the processor into a single operation. 821 class MacroFusion : public ScheduleDAGMutation { 822 const TargetInstrInfo *TII; 823 public: 824 MacroFusion(const TargetInstrInfo *tii): TII(tii) {} 825 826 virtual void apply(ScheduleDAGMI *DAG); 827 }; 828 } // anonymous 829 830 /// \brief Callback from DAG postProcessing to create cluster edges to encourage 831 /// fused operations. 832 void MacroFusion::apply(ScheduleDAGMI *DAG) { 833 // For now, assume targets can only fuse with the branch. 834 MachineInstr *Branch = DAG->ExitSU.getInstr(); 835 if (!Branch) 836 return; 837 838 for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { 839 SUnit *SU = &DAG->SUnits[--Idx]; 840 if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) 841 continue; 842 843 // Create a single weak edge from SU to ExitSU. The only effect is to cause 844 // bottom-up scheduling to heavily prioritize the clustered SU. There is no 845 // need to copy predecessor edges from ExitSU to SU, since top-down 846 // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling 847 // of SU, we could create an artificial edge from the deepest root, but it 848 // hasn't been needed yet. 849 bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); 850 (void)Success; 851 assert(Success && "No DAG nodes should be reachable from ExitSU"); 852 853 DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); 854 break; 855 } 856 } 857 858 //===----------------------------------------------------------------------===// 859 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. 860 //===----------------------------------------------------------------------===// 861 862 namespace { 863 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance 864 /// the schedule. 865 class ConvergingScheduler : public MachineSchedStrategy { 866 public: 867 /// Represent the type of SchedCandidate found within a single queue. 868 /// pickNodeBidirectional depends on these listed by decreasing priority. 869 enum CandReason { 870 NoCand, SingleExcess, SingleCritical, Cluster, 871 ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 872 TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, 873 NodeOrder}; 874 875 #ifndef NDEBUG 876 static const char *getReasonStr(ConvergingScheduler::CandReason Reason); 877 #endif 878 879 /// Policy for scheduling the next instruction in the candidate's zone. 880 struct CandPolicy { 881 bool ReduceLatency; 882 unsigned ReduceResIdx; 883 unsigned DemandResIdx; 884 885 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} 886 }; 887 888 /// Status of an instruction's critical resource consumption. 889 struct SchedResourceDelta { 890 // Count critical resources in the scheduled region required by SU. 891 unsigned CritResources; 892 893 // Count critical resources from another region consumed by SU. 894 unsigned DemandedResources; 895 896 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} 897 898 bool operator==(const SchedResourceDelta &RHS) const { 899 return CritResources == RHS.CritResources 900 && DemandedResources == RHS.DemandedResources; 901 } 902 bool operator!=(const SchedResourceDelta &RHS) const { 903 return !operator==(RHS); 904 } 905 }; 906 907 /// Store the state used by ConvergingScheduler heuristics, required for the 908 /// lifetime of one invocation of pickNode(). 909 struct SchedCandidate { 910 CandPolicy Policy; 911 912 // The best SUnit candidate. 913 SUnit *SU; 914 915 // The reason for this candidate. 916 CandReason Reason; 917 918 // Register pressure values for the best candidate. 919 RegPressureDelta RPDelta; 920 921 // Critical resource consumption of the best candidate. 922 SchedResourceDelta ResDelta; 923 924 SchedCandidate(const CandPolicy &policy) 925 : Policy(policy), SU(NULL), Reason(NoCand) {} 926 927 bool isValid() const { return SU; } 928 929 // Copy the status of another candidate without changing policy. 930 void setBest(SchedCandidate &Best) { 931 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 932 SU = Best.SU; 933 Reason = Best.Reason; 934 RPDelta = Best.RPDelta; 935 ResDelta = Best.ResDelta; 936 } 937 938 void initResourceDelta(const ScheduleDAGMI *DAG, 939 const TargetSchedModel *SchedModel); 940 }; 941 942 /// Summarize the unscheduled region. 943 struct SchedRemainder { 944 // Critical path through the DAG in expected latency. 945 unsigned CriticalPath; 946 947 // Unscheduled resources 948 SmallVector<unsigned, 16> RemainingCounts; 949 // Critical resource for the unscheduled zone. 950 unsigned CritResIdx; 951 // Number of micro-ops left to schedule. 952 unsigned RemainingMicroOps; 953 // Is the unscheduled zone resource limited. 954 bool IsResourceLimited; 955 956 unsigned MaxRemainingCount; 957 958 void reset() { 959 CriticalPath = 0; 960 RemainingCounts.clear(); 961 CritResIdx = 0; 962 RemainingMicroOps = 0; 963 IsResourceLimited = false; 964 MaxRemainingCount = 0; 965 } 966 967 SchedRemainder() { reset(); } 968 969 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 970 }; 971 972 /// Each Scheduling boundary is associated with ready queues. It tracks the 973 /// current cycle in the direction of movement, and maintains the state 974 /// of "hazards" and other interlocks at the current cycle. 975 struct SchedBoundary { 976 ScheduleDAGMI *DAG; 977 const TargetSchedModel *SchedModel; 978 SchedRemainder *Rem; 979 980 ReadyQueue Available; 981 ReadyQueue Pending; 982 bool CheckPending; 983 984 // For heuristics, keep a list of the nodes that immediately depend on the 985 // most recently scheduled node. 986 SmallPtrSet<const SUnit*, 8> NextSUs; 987 988 ScheduleHazardRecognizer *HazardRec; 989 990 unsigned CurrCycle; 991 unsigned IssueCount; 992 993 /// MinReadyCycle - Cycle of the soonest available instruction. 994 unsigned MinReadyCycle; 995 996 // The expected latency of the critical path in this scheduled zone. 997 unsigned ExpectedLatency; 998 999 // Resources used in the scheduled zone beyond this boundary. 1000 SmallVector<unsigned, 16> ResourceCounts; 1001 1002 // Cache the critical resources ID in this scheduled zone. 1003 unsigned CritResIdx; 1004 1005 // Is the scheduled region resource limited vs. latency limited. 1006 bool IsResourceLimited; 1007 1008 unsigned ExpectedCount; 1009 1010 // Policy flag: attempt to find ILP until expected latency is covered. 1011 bool ShouldIncreaseILP; 1012 1013 #ifndef NDEBUG 1014 // Remember the greatest min operand latency. 1015 unsigned MaxMinLatency; 1016 #endif 1017 1018 void reset() { 1019 Available.clear(); 1020 Pending.clear(); 1021 CheckPending = false; 1022 NextSUs.clear(); 1023 HazardRec = 0; 1024 CurrCycle = 0; 1025 IssueCount = 0; 1026 MinReadyCycle = UINT_MAX; 1027 ExpectedLatency = 0; 1028 ResourceCounts.resize(1); 1029 assert(!ResourceCounts[0] && "nonzero count for bad resource"); 1030 CritResIdx = 0; 1031 IsResourceLimited = false; 1032 ExpectedCount = 0; 1033 ShouldIncreaseILP = false; 1034 #ifndef NDEBUG 1035 MaxMinLatency = 0; 1036 #endif 1037 // Reserve a zero-count for invalid CritResIdx. 1038 ResourceCounts.resize(1); 1039 } 1040 1041 /// Pending queues extend the ready queues with the same ID and the 1042 /// PendingFlag set. 1043 SchedBoundary(unsigned ID, const Twine &Name): 1044 DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), 1045 Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { 1046 reset(); 1047 } 1048 1049 ~SchedBoundary() { delete HazardRec; } 1050 1051 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 1052 SchedRemainder *rem); 1053 1054 bool isTop() const { 1055 return Available.getID() == ConvergingScheduler::TopQID; 1056 } 1057 1058 unsigned getUnscheduledLatency(SUnit *SU) const { 1059 if (isTop()) 1060 return SU->getHeight(); 1061 return SU->getDepth(); 1062 } 1063 1064 unsigned getCriticalCount() const { 1065 return ResourceCounts[CritResIdx]; 1066 } 1067 1068 bool checkHazard(SUnit *SU); 1069 1070 void checkILPPolicy(); 1071 1072 void releaseNode(SUnit *SU, unsigned ReadyCycle); 1073 1074 void bumpCycle(); 1075 1076 void countResource(unsigned PIdx, unsigned Cycles); 1077 1078 void bumpNode(SUnit *SU); 1079 1080 void releasePending(); 1081 1082 void removeReady(SUnit *SU); 1083 1084 SUnit *pickOnlyChoice(); 1085 }; 1086 1087 private: 1088 ScheduleDAGMI *DAG; 1089 const TargetSchedModel *SchedModel; 1090 const TargetRegisterInfo *TRI; 1091 1092 // State of the top and bottom scheduled instruction boundaries. 1093 SchedRemainder Rem; 1094 SchedBoundary Top; 1095 SchedBoundary Bot; 1096 1097 public: 1098 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 1099 enum { 1100 TopQID = 1, 1101 BotQID = 2, 1102 LogMaxQID = 2 1103 }; 1104 1105 ConvergingScheduler(): 1106 DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} 1107 1108 virtual void initialize(ScheduleDAGMI *dag); 1109 1110 virtual SUnit *pickNode(bool &IsTopNode); 1111 1112 virtual void schedNode(SUnit *SU, bool IsTopNode); 1113 1114 virtual void releaseTopNode(SUnit *SU); 1115 1116 virtual void releaseBottomNode(SUnit *SU); 1117 1118 virtual void registerRoots(); 1119 1120 protected: 1121 void balanceZones( 1122 ConvergingScheduler::SchedBoundary &CriticalZone, 1123 ConvergingScheduler::SchedCandidate &CriticalCand, 1124 ConvergingScheduler::SchedBoundary &OppositeZone, 1125 ConvergingScheduler::SchedCandidate &OppositeCand); 1126 1127 void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, 1128 ConvergingScheduler::SchedCandidate &BotCand); 1129 1130 void tryCandidate(SchedCandidate &Cand, 1131 SchedCandidate &TryCand, 1132 SchedBoundary &Zone, 1133 const RegPressureTracker &RPTracker, 1134 RegPressureTracker &TempTracker); 1135 1136 SUnit *pickNodeBidirectional(bool &IsTopNode); 1137 1138 void pickNodeFromQueue(SchedBoundary &Zone, 1139 const RegPressureTracker &RPTracker, 1140 SchedCandidate &Candidate); 1141 1142 #ifndef NDEBUG 1143 void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); 1144 #endif 1145 }; 1146 } // namespace 1147 1148 void ConvergingScheduler::SchedRemainder:: 1149 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 1150 reset(); 1151 if (!SchedModel->hasInstrSchedModel()) 1152 return; 1153 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 1154 for (std::vector<SUnit>::iterator 1155 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 1156 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 1157 RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); 1158 for (TargetSchedModel::ProcResIter 1159 PI = SchedModel->getWriteProcResBegin(SC), 1160 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1161 unsigned PIdx = PI->ProcResourceIdx; 1162 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1163 RemainingCounts[PIdx] += (Factor * PI->Cycles); 1164 } 1165 } 1166 } 1167 1168 void ConvergingScheduler::SchedBoundary:: 1169 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 1170 reset(); 1171 DAG = dag; 1172 SchedModel = smodel; 1173 Rem = rem; 1174 if (SchedModel->hasInstrSchedModel()) 1175 ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); 1176 } 1177 1178 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { 1179 DAG = dag; 1180 SchedModel = DAG->getSchedModel(); 1181 TRI = DAG->TRI; 1182 Rem.init(DAG, SchedModel); 1183 Top.init(DAG, SchedModel, &Rem); 1184 Bot.init(DAG, SchedModel, &Rem); 1185 1186 // Initialize resource counts. 1187 1188 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 1189 // are disabled, then these HazardRecs will be disabled. 1190 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 1191 const TargetMachine &TM = DAG->MF.getTarget(); 1192 Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1193 Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1194 1195 assert((!ForceTopDown || !ForceBottomUp) && 1196 "-misched-topdown incompatible with -misched-bottomup"); 1197 } 1198 1199 void ConvergingScheduler::releaseTopNode(SUnit *SU) { 1200 if (SU->isScheduled) 1201 return; 1202 1203 for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1204 I != E; ++I) { 1205 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; 1206 unsigned MinLatency = I->getMinLatency(); 1207 #ifndef NDEBUG 1208 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 1209 #endif 1210 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 1211 SU->TopReadyCycle = PredReadyCycle + MinLatency; 1212 } 1213 Top.releaseNode(SU, SU->TopReadyCycle); 1214 } 1215 1216 void ConvergingScheduler::releaseBottomNode(SUnit *SU) { 1217 if (SU->isScheduled) 1218 return; 1219 1220 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 1221 1222 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1223 I != E; ++I) { 1224 if (I->isWeak()) 1225 continue; 1226 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 1227 unsigned MinLatency = I->getMinLatency(); 1228 #ifndef NDEBUG 1229 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 1230 #endif 1231 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 1232 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 1233 } 1234 Bot.releaseNode(SU, SU->BotReadyCycle); 1235 } 1236 1237 void ConvergingScheduler::registerRoots() { 1238 Rem.CriticalPath = DAG->ExitSU.getDepth(); 1239 // Some roots may not feed into ExitSU. Check all of them in case. 1240 for (std::vector<SUnit*>::const_iterator 1241 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 1242 if ((*I)->getDepth() > Rem.CriticalPath) 1243 Rem.CriticalPath = (*I)->getDepth(); 1244 } 1245 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 1246 } 1247 1248 /// Does this SU have a hazard within the current instruction group. 1249 /// 1250 /// The scheduler supports two modes of hazard recognition. The first is the 1251 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1252 /// supports highly complicated in-order reservation tables 1253 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1254 /// 1255 /// The second is a streamlined mechanism that checks for hazards based on 1256 /// simple counters that the scheduler itself maintains. It explicitly checks 1257 /// for instruction dispatch limitations, including the number of micro-ops that 1258 /// can dispatch per cycle. 1259 /// 1260 /// TODO: Also check whether the SU must start a new group. 1261 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { 1262 if (HazardRec->isEnabled()) 1263 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 1264 1265 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1266 if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { 1267 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1268 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1269 return true; 1270 } 1271 return false; 1272 } 1273 1274 /// If expected latency is covered, disable ILP policy. 1275 void ConvergingScheduler::SchedBoundary::checkILPPolicy() { 1276 if (ShouldIncreaseILP 1277 && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { 1278 ShouldIncreaseILP = false; 1279 DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); 1280 } 1281 } 1282 1283 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, 1284 unsigned ReadyCycle) { 1285 1286 if (ReadyCycle < MinReadyCycle) 1287 MinReadyCycle = ReadyCycle; 1288 1289 // Check for interlocks first. For the purpose of other heuristics, an 1290 // instruction that cannot issue appears as if it's not in the ReadyQueue. 1291 if (ReadyCycle > CurrCycle || checkHazard(SU)) 1292 Pending.push(SU); 1293 else 1294 Available.push(SU); 1295 1296 // Record this node as an immediate dependent of the scheduled node. 1297 NextSUs.insert(SU); 1298 1299 // If CriticalPath has been computed, then check if the unscheduled nodes 1300 // exceed the ILP window. Before registerRoots, CriticalPath==0. 1301 if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) 1302 > Rem->CriticalPath + ILPWindow)) { 1303 ShouldIncreaseILP = true; 1304 DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " 1305 << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); 1306 } 1307 } 1308 1309 /// Move the boundary of scheduled code by one cycle. 1310 void ConvergingScheduler::SchedBoundary::bumpCycle() { 1311 unsigned Width = SchedModel->getIssueWidth(); 1312 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 1313 1314 unsigned NextCycle = CurrCycle + 1; 1315 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 1316 if (MinReadyCycle > NextCycle) { 1317 IssueCount = 0; 1318 NextCycle = MinReadyCycle; 1319 } 1320 1321 if (!HazardRec->isEnabled()) { 1322 // Bypass HazardRec virtual calls. 1323 CurrCycle = NextCycle; 1324 } 1325 else { 1326 // Bypass getHazardType calls in case of long latency. 1327 for (; CurrCycle != NextCycle; ++CurrCycle) { 1328 if (isTop()) 1329 HazardRec->AdvanceCycle(); 1330 else 1331 HazardRec->RecedeCycle(); 1332 } 1333 } 1334 CheckPending = true; 1335 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1336 1337 DEBUG(dbgs() << " *** " << Available.getName() << " cycle " 1338 << CurrCycle << '\n'); 1339 } 1340 1341 /// Add the given processor resource to this scheduled zone. 1342 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, 1343 unsigned Cycles) { 1344 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1345 DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name 1346 << " +(" << Cycles << "x" << Factor 1347 << ") / " << SchedModel->getLatencyFactor() << '\n'); 1348 1349 unsigned Count = Factor * Cycles; 1350 ResourceCounts[PIdx] += Count; 1351 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 1352 Rem->RemainingCounts[PIdx] -= Count; 1353 1354 // Reset MaxRemainingCount for sanity. 1355 Rem->MaxRemainingCount = 0; 1356 1357 // Check if this resource exceeds the current critical resource by a full 1358 // cycle. If so, it becomes the critical resource. 1359 if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) 1360 >= (int)SchedModel->getLatencyFactor()) { 1361 CritResIdx = PIdx; 1362 DEBUG(dbgs() << " *** Critical resource " 1363 << SchedModel->getProcResource(PIdx)->Name << " x" 1364 << ResourceCounts[PIdx] << '\n'); 1365 } 1366 } 1367 1368 /// Move the boundary of scheduled code by one SUnit. 1369 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { 1370 // Update the reservation table. 1371 if (HazardRec->isEnabled()) { 1372 if (!isTop() && SU->isCall) { 1373 // Calls are scheduled with their preceding instructions. For bottom-up 1374 // scheduling, clear the pipeline state before emitting. 1375 HazardRec->Reset(); 1376 } 1377 HazardRec->EmitInstruction(SU); 1378 } 1379 // Update resource counts and critical resource. 1380 if (SchedModel->hasInstrSchedModel()) { 1381 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1382 Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); 1383 for (TargetSchedModel::ProcResIter 1384 PI = SchedModel->getWriteProcResBegin(SC), 1385 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1386 countResource(PI->ProcResourceIdx, PI->Cycles); 1387 } 1388 } 1389 if (isTop()) { 1390 if (SU->getDepth() > ExpectedLatency) 1391 ExpectedLatency = SU->getDepth(); 1392 } 1393 else { 1394 if (SU->getHeight() > ExpectedLatency) 1395 ExpectedLatency = SU->getHeight(); 1396 } 1397 1398 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1399 1400 // Check the instruction group dispatch limit. 1401 // TODO: Check if this SU must end a dispatch group. 1402 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 1403 1404 // checkHazard prevents scheduling multiple instructions per cycle that exceed 1405 // issue width. However, we commonly reach the maximum. In this case 1406 // opportunistically bump the cycle to avoid uselessly checking everything in 1407 // the readyQ. Furthermore, a single instruction may produce more than one 1408 // cycle's worth of micro-ops. 1409 if (IssueCount >= SchedModel->getIssueWidth()) { 1410 DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); 1411 bumpCycle(); 1412 } 1413 } 1414 1415 /// Release pending ready nodes in to the available queue. This makes them 1416 /// visible to heuristics. 1417 void ConvergingScheduler::SchedBoundary::releasePending() { 1418 // If the available queue is empty, it is safe to reset MinReadyCycle. 1419 if (Available.empty()) 1420 MinReadyCycle = UINT_MAX; 1421 1422 // Check to see if any of the pending instructions are ready to issue. If 1423 // so, add them to the available queue. 1424 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 1425 SUnit *SU = *(Pending.begin()+i); 1426 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 1427 1428 if (ReadyCycle < MinReadyCycle) 1429 MinReadyCycle = ReadyCycle; 1430 1431 if (ReadyCycle > CurrCycle) 1432 continue; 1433 1434 if (checkHazard(SU)) 1435 continue; 1436 1437 Available.push(SU); 1438 Pending.remove(Pending.begin()+i); 1439 --i; --e; 1440 } 1441 DEBUG(if (!Pending.empty()) Pending.dump()); 1442 CheckPending = false; 1443 } 1444 1445 /// Remove SU from the ready set for this boundary. 1446 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { 1447 if (Available.isInQueue(SU)) 1448 Available.remove(Available.find(SU)); 1449 else { 1450 assert(Pending.isInQueue(SU) && "bad ready count"); 1451 Pending.remove(Pending.find(SU)); 1452 } 1453 } 1454 1455 /// If this queue only has one ready candidate, return it. As a side effect, 1456 /// defer any nodes that now hit a hazard, and advance the cycle until at least 1457 /// one node is ready. If multiple instructions are ready, return NULL. 1458 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { 1459 if (CheckPending) 1460 releasePending(); 1461 1462 if (IssueCount > 0) { 1463 // Defer any ready instrs that now have a hazard. 1464 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 1465 if (checkHazard(*I)) { 1466 Pending.push(*I); 1467 I = Available.remove(I); 1468 continue; 1469 } 1470 ++I; 1471 } 1472 } 1473 for (unsigned i = 0; Available.empty(); ++i) { 1474 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 1475 "permanent hazard"); (void)i; 1476 bumpCycle(); 1477 releasePending(); 1478 } 1479 if (Available.size() == 1) 1480 return *Available.begin(); 1481 return NULL; 1482 } 1483 1484 /// Record the candidate policy for opposite zones with different critical 1485 /// resources. 1486 /// 1487 /// If the CriticalZone is latency limited, don't force a policy for the 1488 /// candidates here. Instead, When releasing each candidate, releaseNode 1489 /// compares the region's critical path to the candidate's height or depth and 1490 /// the scheduled zone's expected latency then sets ShouldIncreaseILP. 1491 void ConvergingScheduler::balanceZones( 1492 ConvergingScheduler::SchedBoundary &CriticalZone, 1493 ConvergingScheduler::SchedCandidate &CriticalCand, 1494 ConvergingScheduler::SchedBoundary &OppositeZone, 1495 ConvergingScheduler::SchedCandidate &OppositeCand) { 1496 1497 if (!CriticalZone.IsResourceLimited) 1498 return; 1499 1500 SchedRemainder *Rem = CriticalZone.Rem; 1501 1502 // If the critical zone is overconsuming a resource relative to the 1503 // remainder, try to reduce it. 1504 unsigned RemainingCritCount = 1505 Rem->RemainingCounts[CriticalZone.CritResIdx]; 1506 if ((int)(Rem->MaxRemainingCount - RemainingCritCount) 1507 > (int)SchedModel->getLatencyFactor()) { 1508 CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; 1509 DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " 1510 << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name 1511 << '\n'); 1512 } 1513 // If the other zone is underconsuming a resource relative to the full zone, 1514 // try to increase it. 1515 unsigned OppositeCount = 1516 OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; 1517 if ((int)(OppositeZone.ExpectedCount - OppositeCount) 1518 > (int)SchedModel->getLatencyFactor()) { 1519 OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; 1520 DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " 1521 << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name 1522 << '\n'); 1523 } 1524 } 1525 1526 /// Determine if the scheduled zones exceed resource limits or critical path and 1527 /// set each candidate's ReduceHeight policy accordingly. 1528 void ConvergingScheduler::checkResourceLimits( 1529 ConvergingScheduler::SchedCandidate &TopCand, 1530 ConvergingScheduler::SchedCandidate &BotCand) { 1531 1532 Bot.checkILPPolicy(); 1533 Top.checkILPPolicy(); 1534 if (Bot.ShouldIncreaseILP) 1535 BotCand.Policy.ReduceLatency = true; 1536 if (Top.ShouldIncreaseILP) 1537 TopCand.Policy.ReduceLatency = true; 1538 1539 // Handle resource-limited regions. 1540 if (Top.IsResourceLimited && Bot.IsResourceLimited 1541 && Top.CritResIdx == Bot.CritResIdx) { 1542 // If the scheduled critical resource in both zones is no longer the 1543 // critical remaining resource, attempt to reduce resource height both ways. 1544 if (Top.CritResIdx != Rem.CritResIdx) { 1545 TopCand.Policy.ReduceResIdx = Top.CritResIdx; 1546 BotCand.Policy.ReduceResIdx = Bot.CritResIdx; 1547 DEBUG(dbgs() << "Reduce scheduled " 1548 << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); 1549 } 1550 return; 1551 } 1552 // Handle latency-limited regions. 1553 if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { 1554 // If the total scheduled expected latency exceeds the region's critical 1555 // path then reduce latency both ways. 1556 // 1557 // Just because a zone is not resource limited does not mean it is latency 1558 // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle 1559 // to exceed expected latency. 1560 if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) 1561 && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { 1562 TopCand.Policy.ReduceLatency = true; 1563 BotCand.Policy.ReduceLatency = true; 1564 DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency 1565 << " + " << Bot.ExpectedLatency << '\n'); 1566 } 1567 return; 1568 } 1569 // The critical resource is different in each zone, so request balancing. 1570 1571 // Compute the cost of each zone. 1572 Rem.MaxRemainingCount = std::max( 1573 Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), 1574 Rem.RemainingCounts[Rem.CritResIdx]); 1575 Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); 1576 Top.ExpectedCount = std::max( 1577 Top.getCriticalCount(), 1578 Top.ExpectedCount * SchedModel->getLatencyFactor()); 1579 Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); 1580 Bot.ExpectedCount = std::max( 1581 Bot.getCriticalCount(), 1582 Bot.ExpectedCount * SchedModel->getLatencyFactor()); 1583 1584 balanceZones(Top, TopCand, Bot, BotCand); 1585 balanceZones(Bot, BotCand, Top, TopCand); 1586 } 1587 1588 void ConvergingScheduler::SchedCandidate:: 1589 initResourceDelta(const ScheduleDAGMI *DAG, 1590 const TargetSchedModel *SchedModel) { 1591 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 1592 return; 1593 1594 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1595 for (TargetSchedModel::ProcResIter 1596 PI = SchedModel->getWriteProcResBegin(SC), 1597 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1598 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 1599 ResDelta.CritResources += PI->Cycles; 1600 if (PI->ProcResourceIdx == Policy.DemandResIdx) 1601 ResDelta.DemandedResources += PI->Cycles; 1602 } 1603 } 1604 1605 /// Return true if this heuristic determines order. 1606 static bool tryLess(unsigned TryVal, unsigned CandVal, 1607 ConvergingScheduler::SchedCandidate &TryCand, 1608 ConvergingScheduler::SchedCandidate &Cand, 1609 ConvergingScheduler::CandReason Reason) { 1610 if (TryVal < CandVal) { 1611 TryCand.Reason = Reason; 1612 return true; 1613 } 1614 if (TryVal > CandVal) { 1615 if (Cand.Reason > Reason) 1616 Cand.Reason = Reason; 1617 return true; 1618 } 1619 return false; 1620 } 1621 1622 static bool tryGreater(unsigned TryVal, unsigned CandVal, 1623 ConvergingScheduler::SchedCandidate &TryCand, 1624 ConvergingScheduler::SchedCandidate &Cand, 1625 ConvergingScheduler::CandReason Reason) { 1626 if (TryVal > CandVal) { 1627 TryCand.Reason = Reason; 1628 return true; 1629 } 1630 if (TryVal < CandVal) { 1631 if (Cand.Reason > Reason) 1632 Cand.Reason = Reason; 1633 return true; 1634 } 1635 return false; 1636 } 1637 1638 static unsigned getWeakLeft(const SUnit *SU, bool isTop) { 1639 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 1640 } 1641 1642 /// Apply a set of heursitics to a new candidate. Heuristics are currently 1643 /// hierarchical. This may be more efficient than a graduated cost model because 1644 /// we don't need to evaluate all aspects of the model for each node in the 1645 /// queue. But it's really done to make the heuristics easier to debug and 1646 /// statistically analyze. 1647 /// 1648 /// \param Cand provides the policy and current best candidate. 1649 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 1650 /// \param Zone describes the scheduled zone that we are extending. 1651 /// \param RPTracker describes reg pressure within the scheduled zone. 1652 /// \param TempTracker is a scratch pressure tracker to reuse in queries. 1653 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, 1654 SchedCandidate &TryCand, 1655 SchedBoundary &Zone, 1656 const RegPressureTracker &RPTracker, 1657 RegPressureTracker &TempTracker) { 1658 1659 // Always initialize TryCand's RPDelta. 1660 TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, 1661 DAG->getRegionCriticalPSets(), 1662 DAG->getRegPressure().MaxSetPressure); 1663 1664 // Initialize the candidate if needed. 1665 if (!Cand.isValid()) { 1666 TryCand.Reason = NodeOrder; 1667 return; 1668 } 1669 // Avoid exceeding the target's limit. 1670 if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, 1671 Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) 1672 return; 1673 if (Cand.Reason == SingleExcess) 1674 Cand.Reason = MultiPressure; 1675 1676 // Avoid increasing the max critical pressure in the scheduled region. 1677 if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, 1678 Cand.RPDelta.CriticalMax.UnitIncrease, 1679 TryCand, Cand, SingleCritical)) 1680 return; 1681 if (Cand.Reason == SingleCritical) 1682 Cand.Reason = MultiPressure; 1683 1684 // Keep clustered nodes together to encourage downstream peephole 1685 // optimizations which may reduce resource requirements. 1686 // 1687 // This is a best effort to set things up for a post-RA pass. Optimizations 1688 // like generating loads of multiple registers should ideally be done within 1689 // the scheduler pass by combining the loads during DAG postprocessing. 1690 const SUnit *NextClusterSU = 1691 Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 1692 if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, 1693 TryCand, Cand, Cluster)) 1694 return; 1695 // Currently, weak edges are for clustering, so we hard-code that reason. 1696 // However, deferring the current TryCand will not change Cand's reason. 1697 CandReason OrigReason = Cand.Reason; 1698 if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), 1699 getWeakLeft(Cand.SU, Zone.isTop()), 1700 TryCand, Cand, Cluster)) { 1701 Cand.Reason = OrigReason; 1702 return; 1703 } 1704 // Avoid critical resource consumption and balance the schedule. 1705 TryCand.initResourceDelta(DAG, SchedModel); 1706 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 1707 TryCand, Cand, ResourceReduce)) 1708 return; 1709 if (tryGreater(TryCand.ResDelta.DemandedResources, 1710 Cand.ResDelta.DemandedResources, 1711 TryCand, Cand, ResourceDemand)) 1712 return; 1713 1714 // Avoid serializing long latency dependence chains. 1715 if (Cand.Policy.ReduceLatency) { 1716 if (Zone.isTop()) { 1717 if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() 1718 > Zone.ExpectedCount) { 1719 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1720 TryCand, Cand, TopDepthReduce)) 1721 return; 1722 } 1723 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1724 TryCand, Cand, TopPathReduce)) 1725 return; 1726 } 1727 else { 1728 if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() 1729 > Zone.ExpectedCount) { 1730 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1731 TryCand, Cand, BotHeightReduce)) 1732 return; 1733 } 1734 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1735 TryCand, Cand, BotPathReduce)) 1736 return; 1737 } 1738 } 1739 1740 // Avoid increasing the max pressure of the entire region. 1741 if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, 1742 Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) 1743 return; 1744 if (Cand.Reason == SingleMax) 1745 Cand.Reason = MultiPressure; 1746 1747 // Prefer immediate defs/users of the last scheduled instruction. This is a 1748 // nice pressure avoidance strategy that also conserves the processor's 1749 // register renaming resources and keeps the machine code readable. 1750 if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), 1751 TryCand, Cand, NextDefUse)) 1752 return; 1753 1754 // Fall through to original instruction order. 1755 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 1756 || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 1757 TryCand.Reason = NodeOrder; 1758 } 1759 } 1760 1761 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is 1762 /// more desirable than RHS from scheduling standpoint. 1763 static bool compareRPDelta(const RegPressureDelta &LHS, 1764 const RegPressureDelta &RHS) { 1765 // Compare each component of pressure in decreasing order of importance 1766 // without checking if any are valid. Invalid PressureElements are assumed to 1767 // have UnitIncrease==0, so are neutral. 1768 1769 // Avoid increasing the max critical pressure in the scheduled region. 1770 if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { 1771 DEBUG(dbgs() << "RP excess top - bot: " 1772 << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); 1773 return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; 1774 } 1775 // Avoid increasing the max critical pressure in the scheduled region. 1776 if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { 1777 DEBUG(dbgs() << "RP critical top - bot: " 1778 << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) 1779 << '\n'); 1780 return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; 1781 } 1782 // Avoid increasing the max pressure of the entire region. 1783 if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { 1784 DEBUG(dbgs() << "RP current top - bot: " 1785 << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) 1786 << '\n'); 1787 return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; 1788 } 1789 return false; 1790 } 1791 1792 #ifndef NDEBUG 1793 const char *ConvergingScheduler::getReasonStr( 1794 ConvergingScheduler::CandReason Reason) { 1795 switch (Reason) { 1796 case NoCand: return "NOCAND "; 1797 case SingleExcess: return "REG-EXCESS"; 1798 case SingleCritical: return "REG-CRIT "; 1799 case Cluster: return "CLUSTER "; 1800 case SingleMax: return "REG-MAX "; 1801 case MultiPressure: return "REG-MULTI "; 1802 case ResourceReduce: return "RES-REDUCE"; 1803 case ResourceDemand: return "RES-DEMAND"; 1804 case TopDepthReduce: return "TOP-DEPTH "; 1805 case TopPathReduce: return "TOP-PATH "; 1806 case BotHeightReduce:return "BOT-HEIGHT"; 1807 case BotPathReduce: return "BOT-PATH "; 1808 case NextDefUse: return "DEF-USE "; 1809 case NodeOrder: return "ORDER "; 1810 }; 1811 llvm_unreachable("Unknown reason!"); 1812 } 1813 1814 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, 1815 const SchedBoundary &Zone) { 1816 const char *Label = getReasonStr(Cand.Reason); 1817 PressureElement P; 1818 unsigned ResIdx = 0; 1819 unsigned Latency = 0; 1820 switch (Cand.Reason) { 1821 default: 1822 break; 1823 case SingleExcess: 1824 P = Cand.RPDelta.Excess; 1825 break; 1826 case SingleCritical: 1827 P = Cand.RPDelta.CriticalMax; 1828 break; 1829 case SingleMax: 1830 P = Cand.RPDelta.CurrentMax; 1831 break; 1832 case ResourceReduce: 1833 ResIdx = Cand.Policy.ReduceResIdx; 1834 break; 1835 case ResourceDemand: 1836 ResIdx = Cand.Policy.DemandResIdx; 1837 break; 1838 case TopDepthReduce: 1839 Latency = Cand.SU->getDepth(); 1840 break; 1841 case TopPathReduce: 1842 Latency = Cand.SU->getHeight(); 1843 break; 1844 case BotHeightReduce: 1845 Latency = Cand.SU->getHeight(); 1846 break; 1847 case BotPathReduce: 1848 Latency = Cand.SU->getDepth(); 1849 break; 1850 } 1851 dbgs() << Label << " " << Zone.Available.getName() << " "; 1852 if (P.isValid()) 1853 dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease 1854 << " "; 1855 else 1856 dbgs() << " "; 1857 if (ResIdx) 1858 dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; 1859 else 1860 dbgs() << " "; 1861 if (Latency) 1862 dbgs() << Latency << " cycles "; 1863 else 1864 dbgs() << " "; 1865 Cand.SU->dump(DAG); 1866 } 1867 #endif 1868 1869 /// Pick the best candidate from the top queue. 1870 /// 1871 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 1872 /// DAG building. To adjust for the current scheduling location we need to 1873 /// maintain the number of vreg uses remaining to be top-scheduled. 1874 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, 1875 const RegPressureTracker &RPTracker, 1876 SchedCandidate &Cand) { 1877 ReadyQueue &Q = Zone.Available; 1878 1879 DEBUG(Q.dump()); 1880 1881 // getMaxPressureDelta temporarily modifies the tracker. 1882 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 1883 1884 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 1885 1886 SchedCandidate TryCand(Cand.Policy); 1887 TryCand.SU = *I; 1888 tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 1889 if (TryCand.Reason != NoCand) { 1890 // Initialize resource delta if needed in case future heuristics query it. 1891 if (TryCand.ResDelta == SchedResourceDelta()) 1892 TryCand.initResourceDelta(DAG, SchedModel); 1893 Cand.setBest(TryCand); 1894 DEBUG(traceCandidate(Cand, Zone)); 1895 } 1896 TryCand.SU = *I; 1897 } 1898 } 1899 1900 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, 1901 bool IsTop) { 1902 DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") 1903 << " SU(" << Cand.SU->NodeNum << ") " 1904 << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); 1905 } 1906 1907 /// Pick the best candidate node from either the top or bottom queue. 1908 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { 1909 // Schedule as far as possible in the direction of no choice. This is most 1910 // efficient, but also provides the best heuristics for CriticalPSets. 1911 if (SUnit *SU = Bot.pickOnlyChoice()) { 1912 IsTopNode = false; 1913 return SU; 1914 } 1915 if (SUnit *SU = Top.pickOnlyChoice()) { 1916 IsTopNode = true; 1917 return SU; 1918 } 1919 CandPolicy NoPolicy; 1920 SchedCandidate BotCand(NoPolicy); 1921 SchedCandidate TopCand(NoPolicy); 1922 checkResourceLimits(TopCand, BotCand); 1923 1924 // Prefer bottom scheduling when heuristics are silent. 1925 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1926 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1927 1928 // If either Q has a single candidate that provides the least increase in 1929 // Excess pressure, we can immediately schedule from that Q. 1930 // 1931 // RegionCriticalPSets summarizes the pressure within the scheduled region and 1932 // affects picking from either Q. If scheduling in one direction must 1933 // increase pressure for one of the excess PSets, then schedule in that 1934 // direction first to provide more freedom in the other direction. 1935 if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { 1936 IsTopNode = false; 1937 tracePick(BotCand, IsTopNode); 1938 return BotCand.SU; 1939 } 1940 // Check if the top Q has a better candidate. 1941 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1942 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1943 1944 // If either Q has a single candidate that minimizes pressure above the 1945 // original region's pressure pick it. 1946 if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { 1947 if (TopCand.Reason < BotCand.Reason) { 1948 IsTopNode = true; 1949 tracePick(TopCand, IsTopNode); 1950 return TopCand.SU; 1951 } 1952 IsTopNode = false; 1953 tracePick(BotCand, IsTopNode); 1954 return BotCand.SU; 1955 } 1956 // Check for a salient pressure difference and pick the best from either side. 1957 if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { 1958 IsTopNode = true; 1959 tracePick(TopCand, IsTopNode); 1960 return TopCand.SU; 1961 } 1962 // Otherwise prefer the bottom candidate, in node order if all else failed. 1963 if (TopCand.Reason < BotCand.Reason) { 1964 IsTopNode = true; 1965 tracePick(TopCand, IsTopNode); 1966 return TopCand.SU; 1967 } 1968 IsTopNode = false; 1969 tracePick(BotCand, IsTopNode); 1970 return BotCand.SU; 1971 } 1972 1973 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 1974 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { 1975 if (DAG->top() == DAG->bottom()) { 1976 assert(Top.Available.empty() && Top.Pending.empty() && 1977 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 1978 return NULL; 1979 } 1980 SUnit *SU; 1981 do { 1982 if (ForceTopDown) { 1983 SU = Top.pickOnlyChoice(); 1984 if (!SU) { 1985 CandPolicy NoPolicy; 1986 SchedCandidate TopCand(NoPolicy); 1987 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1988 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1989 SU = TopCand.SU; 1990 } 1991 IsTopNode = true; 1992 } 1993 else if (ForceBottomUp) { 1994 SU = Bot.pickOnlyChoice(); 1995 if (!SU) { 1996 CandPolicy NoPolicy; 1997 SchedCandidate BotCand(NoPolicy); 1998 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1999 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 2000 SU = BotCand.SU; 2001 } 2002 IsTopNode = false; 2003 } 2004 else { 2005 SU = pickNodeBidirectional(IsTopNode); 2006 } 2007 } while (SU->isScheduled); 2008 2009 if (SU->isTopReady()) 2010 Top.removeReady(SU); 2011 if (SU->isBottomReady()) 2012 Bot.removeReady(SU); 2013 2014 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 2015 << " Scheduling Instruction in cycle " 2016 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; 2017 SU->dump(DAG)); 2018 return SU; 2019 } 2020 2021 /// Update the scheduler's state after scheduling a node. This is the same node 2022 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update 2023 /// it's state based on the current cycle before MachineSchedStrategy does. 2024 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { 2025 if (IsTopNode) { 2026 SU->TopReadyCycle = Top.CurrCycle; 2027 Top.bumpNode(SU); 2028 } 2029 else { 2030 SU->BotReadyCycle = Bot.CurrCycle; 2031 Bot.bumpNode(SU); 2032 } 2033 } 2034 2035 /// Create the standard converging machine scheduler. This will be used as the 2036 /// default scheduler if the target does not set a default. 2037 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { 2038 assert((!ForceTopDown || !ForceBottomUp) && 2039 "-misched-topdown incompatible with -misched-bottomup"); 2040 ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); 2041 // Register DAG post-processors. 2042 if (EnableLoadCluster) 2043 DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); 2044 if (EnableMacroFusion) 2045 DAG->addMutation(new MacroFusion(DAG->TII)); 2046 return DAG; 2047 } 2048 static MachineSchedRegistry 2049 ConvergingSchedRegistry("converge", "Standard converging scheduler.", 2050 createConvergingSched); 2051 2052 //===----------------------------------------------------------------------===// 2053 // ILP Scheduler. Currently for experimental analysis of heuristics. 2054 //===----------------------------------------------------------------------===// 2055 2056 namespace { 2057 /// \brief Order nodes by the ILP metric. 2058 struct ILPOrder { 2059 SchedDFSResult *DFSResult; 2060 BitVector *ScheduledTrees; 2061 bool MaximizeILP; 2062 2063 ILPOrder(SchedDFSResult *dfs, BitVector *schedtrees, bool MaxILP) 2064 : DFSResult(dfs), ScheduledTrees(schedtrees), MaximizeILP(MaxILP) {} 2065 2066 /// \brief Apply a less-than relation on node priority. 2067 /// 2068 /// (Return true if A comes after B in the Q.) 2069 bool operator()(const SUnit *A, const SUnit *B) const { 2070 unsigned SchedTreeA = DFSResult->getSubtreeID(A); 2071 unsigned SchedTreeB = DFSResult->getSubtreeID(B); 2072 if (SchedTreeA != SchedTreeB) { 2073 // Unscheduled trees have lower priority. 2074 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) 2075 return ScheduledTrees->test(SchedTreeB); 2076 2077 // Trees with shallower connections have have lower priority. 2078 if (DFSResult->getSubtreeLevel(SchedTreeA) 2079 != DFSResult->getSubtreeLevel(SchedTreeB)) { 2080 return DFSResult->getSubtreeLevel(SchedTreeA) 2081 < DFSResult->getSubtreeLevel(SchedTreeB); 2082 } 2083 } 2084 if (MaximizeILP) 2085 return DFSResult->getILP(A) < DFSResult->getILP(B); 2086 else 2087 return DFSResult->getILP(A) > DFSResult->getILP(B); 2088 } 2089 }; 2090 2091 /// \brief Schedule based on the ILP metric. 2092 class ILPScheduler : public MachineSchedStrategy { 2093 /// In case all subtrees are eventually connected to a common root through 2094 /// data dependence (e.g. reduction), place an upper limit on their size. 2095 /// 2096 /// FIXME: A subtree limit is generally good, but in the situation commented 2097 /// above, where multiple similar subtrees feed a common root, we should 2098 /// only split at a point where the resulting subtrees will be balanced. 2099 /// (a motivating test case must be found). 2100 static const unsigned SubtreeLimit = 16; 2101 2102 SchedDFSResult DFSResult; 2103 BitVector ScheduledTrees; 2104 ILPOrder Cmp; 2105 2106 std::vector<SUnit*> ReadyQ; 2107 public: 2108 ILPScheduler(bool MaximizeILP) 2109 : DFSResult(/*BottomUp=*/true, SubtreeLimit), 2110 Cmp(&DFSResult, &ScheduledTrees, MaximizeILP) {} 2111 2112 virtual void initialize(ScheduleDAGMI *DAG) { 2113 ReadyQ.clear(); 2114 DFSResult.clear(); 2115 DFSResult.resize(DAG->SUnits.size()); 2116 ScheduledTrees.clear(); 2117 } 2118 2119 virtual void registerRoots() { 2120 DFSResult.compute(ReadyQ); 2121 ScheduledTrees.resize(DFSResult.getNumSubtrees()); 2122 // Restore the heap in ReadyQ with the updated DFS results. 2123 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2124 } 2125 2126 /// Implement MachineSchedStrategy interface. 2127 /// ----------------------------------------- 2128 2129 /// Callback to select the highest priority node from the ready Q. 2130 virtual SUnit *pickNode(bool &IsTopNode) { 2131 if (ReadyQ.empty()) return NULL; 2132 pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2133 SUnit *SU = ReadyQ.back(); 2134 ReadyQ.pop_back(); 2135 IsTopNode = false; 2136 DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): " 2137 << *SU->getInstr() 2138 << " ILP: " << DFSResult.getILP(SU) 2139 << " Tree: " << DFSResult.getSubtreeID(SU) << " @" 2140 << DFSResult.getSubtreeLevel(DFSResult.getSubtreeID(SU))<< '\n'); 2141 return SU; 2142 } 2143 2144 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify 2145 /// DFSResults, and resort the priority Q. 2146 virtual void schedNode(SUnit *SU, bool IsTopNode) { 2147 assert(!IsTopNode && "SchedDFSResult needs bottom-up"); 2148 if (!ScheduledTrees.test(DFSResult.getSubtreeID(SU))) { 2149 ScheduledTrees.set(DFSResult.getSubtreeID(SU)); 2150 DFSResult.scheduleTree(DFSResult.getSubtreeID(SU)); 2151 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2152 } 2153 } 2154 2155 virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } 2156 2157 virtual void releaseBottomNode(SUnit *SU) { 2158 ReadyQ.push_back(SU); 2159 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2160 } 2161 }; 2162 } // namespace 2163 2164 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 2165 return new ScheduleDAGMI(C, new ILPScheduler(true)); 2166 } 2167 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 2168 return new ScheduleDAGMI(C, new ILPScheduler(false)); 2169 } 2170 static MachineSchedRegistry ILPMaxRegistry( 2171 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 2172 static MachineSchedRegistry ILPMinRegistry( 2173 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 2174 2175 //===----------------------------------------------------------------------===// 2176 // Machine Instruction Shuffler for Correctness Testing 2177 //===----------------------------------------------------------------------===// 2178 2179 #ifndef NDEBUG 2180 namespace { 2181 /// Apply a less-than relation on the node order, which corresponds to the 2182 /// instruction order prior to scheduling. IsReverse implements greater-than. 2183 template<bool IsReverse> 2184 struct SUnitOrder { 2185 bool operator()(SUnit *A, SUnit *B) const { 2186 if (IsReverse) 2187 return A->NodeNum > B->NodeNum; 2188 else 2189 return A->NodeNum < B->NodeNum; 2190 } 2191 }; 2192 2193 /// Reorder instructions as much as possible. 2194 class InstructionShuffler : public MachineSchedStrategy { 2195 bool IsAlternating; 2196 bool IsTopDown; 2197 2198 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 2199 // gives nodes with a higher number higher priority causing the latest 2200 // instructions to be scheduled first. 2201 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 2202 TopQ; 2203 // When scheduling bottom-up, use greater-than as the queue priority. 2204 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 2205 BottomQ; 2206 public: 2207 InstructionShuffler(bool alternate, bool topdown) 2208 : IsAlternating(alternate), IsTopDown(topdown) {} 2209 2210 virtual void initialize(ScheduleDAGMI *) { 2211 TopQ.clear(); 2212 BottomQ.clear(); 2213 } 2214 2215 /// Implement MachineSchedStrategy interface. 2216 /// ----------------------------------------- 2217 2218 virtual SUnit *pickNode(bool &IsTopNode) { 2219 SUnit *SU; 2220 if (IsTopDown) { 2221 do { 2222 if (TopQ.empty()) return NULL; 2223 SU = TopQ.top(); 2224 TopQ.pop(); 2225 } while (SU->isScheduled); 2226 IsTopNode = true; 2227 } 2228 else { 2229 do { 2230 if (BottomQ.empty()) return NULL; 2231 SU = BottomQ.top(); 2232 BottomQ.pop(); 2233 } while (SU->isScheduled); 2234 IsTopNode = false; 2235 } 2236 if (IsAlternating) 2237 IsTopDown = !IsTopDown; 2238 return SU; 2239 } 2240 2241 virtual void schedNode(SUnit *SU, bool IsTopNode) {} 2242 2243 virtual void releaseTopNode(SUnit *SU) { 2244 TopQ.push(SU); 2245 } 2246 virtual void releaseBottomNode(SUnit *SU) { 2247 BottomQ.push(SU); 2248 } 2249 }; 2250 } // namespace 2251 2252 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 2253 bool Alternate = !ForceTopDown && !ForceBottomUp; 2254 bool TopDown = !ForceBottomUp; 2255 assert((TopDown || !ForceTopDown) && 2256 "-misched-topdown incompatible with -misched-bottomup"); 2257 return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); 2258 } 2259 static MachineSchedRegistry ShufflerRegistry( 2260 "shuffle", "Shuffle machine instructions alternating directions", 2261 createInstructionShuffler); 2262 #endif // !NDEBUG 2263