1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // MachineScheduler schedules machine instructions after phi elimination. It 11 // preserves LiveIntervals so it can be invoked before register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "misched" 16 17 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 18 #include "llvm/CodeGen/MachineScheduler.h" 19 #include "llvm/CodeGen/Passes.h" 20 #include "llvm/CodeGen/RegisterClassInfo.h" 21 #include "llvm/CodeGen/ScheduleDAGILP.h" 22 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include "llvm/ADT/OwningPtr.h" 29 #include "llvm/ADT/PriorityQueue.h" 30 31 #include <queue> 32 33 using namespace llvm; 34 35 namespace llvm { 36 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 37 cl::desc("Force top-down list scheduling")); 38 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 39 cl::desc("Force bottom-up list scheduling")); 40 } 41 42 #ifndef NDEBUG 43 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 44 cl::desc("Pop up a window to show MISched dags after they are processed")); 45 46 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 47 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 48 #else 49 static bool ViewMISchedDAGs = false; 50 #endif // NDEBUG 51 52 // Threshold to very roughly model an out-of-order processor's instruction 53 // buffers. If the actual value of this threshold matters much in practice, then 54 // it can be specified by the machine model. For now, it's an experimental 55 // tuning knob to determine when and if it matters. 56 static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, 57 cl::desc("Allow expected latency to exceed the critical path by N cycles " 58 "before attempting to balance ILP"), 59 cl::init(10U)); 60 61 // Experimental heuristics 62 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, 63 cl::desc("Enable load clustering."), cl::init(true)); 64 65 // Experimental heuristics 66 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, 67 cl::desc("Enable scheduling for macro fusion."), cl::init(true)); 68 69 //===----------------------------------------------------------------------===// 70 // Machine Instruction Scheduling Pass and Registry 71 //===----------------------------------------------------------------------===// 72 73 MachineSchedContext::MachineSchedContext(): 74 MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { 75 RegClassInfo = new RegisterClassInfo(); 76 } 77 78 MachineSchedContext::~MachineSchedContext() { 79 delete RegClassInfo; 80 } 81 82 namespace { 83 /// MachineScheduler runs after coalescing and before register allocation. 84 class MachineScheduler : public MachineSchedContext, 85 public MachineFunctionPass { 86 public: 87 MachineScheduler(); 88 89 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 90 91 virtual void releaseMemory() {} 92 93 virtual bool runOnMachineFunction(MachineFunction&); 94 95 virtual void print(raw_ostream &O, const Module* = 0) const; 96 97 static char ID; // Class identification, replacement for typeinfo 98 }; 99 } // namespace 100 101 char MachineScheduler::ID = 0; 102 103 char &llvm::MachineSchedulerID = MachineScheduler::ID; 104 105 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 106 "Machine Instruction Scheduler", false, false) 107 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 108 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 109 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 110 INITIALIZE_PASS_END(MachineScheduler, "misched", 111 "Machine Instruction Scheduler", false, false) 112 113 MachineScheduler::MachineScheduler() 114 : MachineFunctionPass(ID) { 115 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 116 } 117 118 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 119 AU.setPreservesCFG(); 120 AU.addRequiredID(MachineDominatorsID); 121 AU.addRequired<MachineLoopInfo>(); 122 AU.addRequired<AliasAnalysis>(); 123 AU.addRequired<TargetPassConfig>(); 124 AU.addRequired<SlotIndexes>(); 125 AU.addPreserved<SlotIndexes>(); 126 AU.addRequired<LiveIntervals>(); 127 AU.addPreserved<LiveIntervals>(); 128 MachineFunctionPass::getAnalysisUsage(AU); 129 } 130 131 MachinePassRegistry MachineSchedRegistry::Registry; 132 133 /// A dummy default scheduler factory indicates whether the scheduler 134 /// is overridden on the command line. 135 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 136 return 0; 137 } 138 139 /// MachineSchedOpt allows command line selection of the scheduler. 140 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 141 RegisterPassParser<MachineSchedRegistry> > 142 MachineSchedOpt("misched", 143 cl::init(&useDefaultMachineSched), cl::Hidden, 144 cl::desc("Machine instruction scheduler to use")); 145 146 static MachineSchedRegistry 147 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 148 useDefaultMachineSched); 149 150 /// Forward declare the standard machine scheduler. This will be used as the 151 /// default scheduler if the target does not set a default. 152 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); 153 154 155 /// Decrement this iterator until reaching the top or a non-debug instr. 156 static MachineBasicBlock::iterator 157 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { 158 assert(I != Beg && "reached the top of the region, cannot decrement"); 159 while (--I != Beg) { 160 if (!I->isDebugValue()) 161 break; 162 } 163 return I; 164 } 165 166 /// If this iterator is a debug value, increment until reaching the End or a 167 /// non-debug instruction. 168 static MachineBasicBlock::iterator 169 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { 170 for(; I != End; ++I) { 171 if (!I->isDebugValue()) 172 break; 173 } 174 return I; 175 } 176 177 /// Top-level MachineScheduler pass driver. 178 /// 179 /// Visit blocks in function order. Divide each block into scheduling regions 180 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 181 /// consistent with the DAG builder, which traverses the interior of the 182 /// scheduling regions bottom-up. 183 /// 184 /// This design avoids exposing scheduling boundaries to the DAG builder, 185 /// simplifying the DAG builder's support for "special" target instructions. 186 /// At the same time the design allows target schedulers to operate across 187 /// scheduling boundaries, for example to bundle the boudary instructions 188 /// without reordering them. This creates complexity, because the target 189 /// scheduler must update the RegionBegin and RegionEnd positions cached by 190 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 191 /// design would be to split blocks at scheduling boundaries, but LLVM has a 192 /// general bias against block splitting purely for implementation simplicity. 193 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 194 DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 195 196 // Initialize the context of the pass. 197 MF = &mf; 198 MLI = &getAnalysis<MachineLoopInfo>(); 199 MDT = &getAnalysis<MachineDominatorTree>(); 200 PassConfig = &getAnalysis<TargetPassConfig>(); 201 AA = &getAnalysis<AliasAnalysis>(); 202 203 LIS = &getAnalysis<LiveIntervals>(); 204 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 205 206 RegClassInfo->runOnMachineFunction(*MF); 207 208 // Select the scheduler, or set the default. 209 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 210 if (Ctor == useDefaultMachineSched) { 211 // Get the default scheduler set by the target. 212 Ctor = MachineSchedRegistry::getDefault(); 213 if (!Ctor) { 214 Ctor = createConvergingSched; 215 MachineSchedRegistry::setDefault(Ctor); 216 } 217 } 218 // Instantiate the selected scheduler. 219 OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); 220 221 // Visit all machine basic blocks. 222 // 223 // TODO: Visit blocks in global postorder or postorder within the bottom-up 224 // loop tree. Then we can optionally compute global RegPressure. 225 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 226 MBB != MBBEnd; ++MBB) { 227 228 Scheduler->startBlock(MBB); 229 230 // Break the block into scheduling regions [I, RegionEnd), and schedule each 231 // region as soon as it is discovered. RegionEnd points the scheduling 232 // boundary at the bottom of the region. The DAG does not include RegionEnd, 233 // but the region does (i.e. the next RegionEnd is above the previous 234 // RegionBegin). If the current block has no terminator then RegionEnd == 235 // MBB->end() for the bottom region. 236 // 237 // The Scheduler may insert instructions during either schedule() or 238 // exitRegion(), even for empty regions. So the local iterators 'I' and 239 // 'RegionEnd' are invalid across these calls. 240 unsigned RemainingInstrs = MBB->size(); 241 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 242 RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { 243 244 // Avoid decrementing RegionEnd for blocks with no terminator. 245 if (RegionEnd != MBB->end() 246 || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { 247 --RegionEnd; 248 // Count the boundary instruction. 249 --RemainingInstrs; 250 } 251 252 // The next region starts above the previous region. Look backward in the 253 // instruction stream until we find the nearest boundary. 254 MachineBasicBlock::iterator I = RegionEnd; 255 for(;I != MBB->begin(); --I, --RemainingInstrs) { 256 if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) 257 break; 258 } 259 // Notify the scheduler of the region, even if we may skip scheduling 260 // it. Perhaps it still needs to be bundled. 261 Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); 262 263 // Skip empty scheduling regions (0 or 1 schedulable instructions). 264 if (I == RegionEnd || I == llvm::prior(RegionEnd)) { 265 // Close the current region. Bundle the terminator if needed. 266 // This invalidates 'RegionEnd' and 'I'. 267 Scheduler->exitRegion(); 268 continue; 269 } 270 DEBUG(dbgs() << "********** MI Scheduling **********\n"); 271 DEBUG(dbgs() << MF->getName() 272 << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; 273 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 274 else dbgs() << "End"; 275 dbgs() << " Remaining: " << RemainingInstrs << "\n"); 276 277 // Schedule a region: possibly reorder instructions. 278 // This invalidates 'RegionEnd' and 'I'. 279 Scheduler->schedule(); 280 281 // Close the current region. 282 Scheduler->exitRegion(); 283 284 // Scheduling has invalidated the current iterator 'I'. Ask the 285 // scheduler for the top of it's scheduled region. 286 RegionEnd = Scheduler->begin(); 287 } 288 assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 289 Scheduler->finishBlock(); 290 } 291 Scheduler->finalizeSchedule(); 292 DEBUG(LIS->print(dbgs())); 293 return true; 294 } 295 296 void MachineScheduler::print(raw_ostream &O, const Module* m) const { 297 // unimplemented 298 } 299 300 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 301 void ReadyQueue::dump() { 302 dbgs() << Name << ": "; 303 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 304 dbgs() << Queue[i]->NodeNum << " "; 305 dbgs() << "\n"; 306 } 307 #endif 308 309 //===----------------------------------------------------------------------===// 310 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals 311 // preservation. 312 //===----------------------------------------------------------------------===// 313 314 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { 315 if (SuccSU != &ExitSU) { 316 // Do not use WillCreateCycle, it assumes SD scheduling. 317 // If Pred is reachable from Succ, then the edge creates a cycle. 318 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) 319 return false; 320 Topo.AddPred(SuccSU, PredDep.getSUnit()); 321 } 322 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); 323 // Return true regardless of whether a new edge needed to be inserted. 324 return true; 325 } 326 327 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 328 /// NumPredsLeft reaches zero, release the successor node. 329 /// 330 /// FIXME: Adjust SuccSU height based on MinLatency. 331 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 332 SUnit *SuccSU = SuccEdge->getSUnit(); 333 334 if (SuccEdge->isWeak()) { 335 --SuccSU->WeakPredsLeft; 336 if (SuccEdge->isCluster()) 337 NextClusterSucc = SuccSU; 338 return; 339 } 340 #ifndef NDEBUG 341 if (SuccSU->NumPredsLeft == 0) { 342 dbgs() << "*** Scheduling failed! ***\n"; 343 SuccSU->dump(this); 344 dbgs() << " has been released too many times!\n"; 345 llvm_unreachable(0); 346 } 347 #endif 348 --SuccSU->NumPredsLeft; 349 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 350 SchedImpl->releaseTopNode(SuccSU); 351 } 352 353 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 354 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 355 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 356 I != E; ++I) { 357 releaseSucc(SU, &*I); 358 } 359 } 360 361 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 362 /// NumSuccsLeft reaches zero, release the predecessor node. 363 /// 364 /// FIXME: Adjust PredSU height based on MinLatency. 365 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 366 SUnit *PredSU = PredEdge->getSUnit(); 367 368 if (PredEdge->isWeak()) { 369 --PredSU->WeakSuccsLeft; 370 if (PredEdge->isCluster()) 371 NextClusterPred = PredSU; 372 return; 373 } 374 #ifndef NDEBUG 375 if (PredSU->NumSuccsLeft == 0) { 376 dbgs() << "*** Scheduling failed! ***\n"; 377 PredSU->dump(this); 378 dbgs() << " has been released too many times!\n"; 379 llvm_unreachable(0); 380 } 381 #endif 382 --PredSU->NumSuccsLeft; 383 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 384 SchedImpl->releaseBottomNode(PredSU); 385 } 386 387 /// releasePredecessors - Call releasePred on each of SU's predecessors. 388 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 389 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 390 I != E; ++I) { 391 releasePred(SU, &*I); 392 } 393 } 394 395 void ScheduleDAGMI::moveInstruction(MachineInstr *MI, 396 MachineBasicBlock::iterator InsertPos) { 397 // Advance RegionBegin if the first instruction moves down. 398 if (&*RegionBegin == MI) 399 ++RegionBegin; 400 401 // Update the instruction stream. 402 BB->splice(InsertPos, BB, MI); 403 404 // Update LiveIntervals 405 LIS->handleMove(MI, /*UpdateFlags=*/true); 406 407 // Recede RegionBegin if an instruction moves above the first. 408 if (RegionBegin == InsertPos) 409 RegionBegin = MI; 410 } 411 412 bool ScheduleDAGMI::checkSchedLimit() { 413 #ifndef NDEBUG 414 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 415 CurrentTop = CurrentBottom; 416 return false; 417 } 418 ++NumInstrsScheduled; 419 #endif 420 return true; 421 } 422 423 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 424 /// crossing a scheduling boundary. [begin, end) includes all instructions in 425 /// the region, including the boundary itself and single-instruction regions 426 /// that don't get scheduled. 427 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 428 MachineBasicBlock::iterator begin, 429 MachineBasicBlock::iterator end, 430 unsigned endcount) 431 { 432 ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); 433 434 // For convenience remember the end of the liveness region. 435 LiveRegionEnd = 436 (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); 437 } 438 439 // Setup the register pressure trackers for the top scheduled top and bottom 440 // scheduled regions. 441 void ScheduleDAGMI::initRegPressure() { 442 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 443 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 444 445 // Close the RPTracker to finalize live ins. 446 RPTracker.closeRegion(); 447 448 DEBUG(RPTracker.getPressure().dump(TRI)); 449 450 // Initialize the live ins and live outs. 451 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 452 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 453 454 // Close one end of the tracker so we can call 455 // getMaxUpward/DownwardPressureDelta before advancing across any 456 // instructions. This converts currently live regs into live ins/outs. 457 TopRPTracker.closeTop(); 458 BotRPTracker.closeBottom(); 459 460 // Account for liveness generated by the region boundary. 461 if (LiveRegionEnd != RegionEnd) 462 BotRPTracker.recede(); 463 464 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 465 466 // Cache the list of excess pressure sets in this region. This will also track 467 // the max pressure in the scheduled code for these sets. 468 RegionCriticalPSets.clear(); 469 std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure; 470 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 471 unsigned Limit = TRI->getRegPressureSetLimit(i); 472 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 473 << "Limit " << Limit 474 << " Actual " << RegionPressure[i] << "\n"); 475 if (RegionPressure[i] > Limit) 476 RegionCriticalPSets.push_back(PressureElement(i, 0)); 477 } 478 DEBUG(dbgs() << "Excess PSets: "; 479 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 480 dbgs() << TRI->getRegPressureSetName( 481 RegionCriticalPSets[i].PSetID) << " "; 482 dbgs() << "\n"); 483 } 484 485 // FIXME: When the pressure tracker deals in pressure differences then we won't 486 // iterate over all RegionCriticalPSets[i]. 487 void ScheduleDAGMI:: 488 updateScheduledPressure(std::vector<unsigned> NewMaxPressure) { 489 for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { 490 unsigned ID = RegionCriticalPSets[i].PSetID; 491 int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; 492 if ((int)NewMaxPressure[ID] > MaxUnits) 493 MaxUnits = NewMaxPressure[ID]; 494 } 495 } 496 497 /// schedule - Called back from MachineScheduler::runOnMachineFunction 498 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 499 /// only includes instructions that have DAG nodes, not scheduling boundaries. 500 /// 501 /// This is a skeletal driver, with all the functionality pushed into helpers, 502 /// so that it can be easilly extended by experimental schedulers. Generally, 503 /// implementing MachineSchedStrategy should be sufficient to implement a new 504 /// scheduling algorithm. However, if a scheduler further subclasses 505 /// ScheduleDAGMI then it will want to override this virtual method in order to 506 /// update any specialized state. 507 void ScheduleDAGMI::schedule() { 508 buildDAGWithRegPressure(); 509 510 Topo.InitDAGTopologicalSorting(); 511 512 postprocessDAG(); 513 514 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 515 SUnits[su].dumpAll(this)); 516 517 if (ViewMISchedDAGs) viewGraph(); 518 519 initQueues(); 520 521 bool IsTopNode = false; 522 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 523 assert(!SU->isScheduled && "Node already scheduled"); 524 if (!checkSchedLimit()) 525 break; 526 527 scheduleMI(SU, IsTopNode); 528 529 updateQueues(SU, IsTopNode); 530 } 531 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 532 533 placeDebugValues(); 534 535 DEBUG({ 536 unsigned BBNum = top()->getParent()->getNumber(); 537 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 538 dumpSchedule(); 539 dbgs() << '\n'; 540 }); 541 } 542 543 /// Build the DAG and setup three register pressure trackers. 544 void ScheduleDAGMI::buildDAGWithRegPressure() { 545 // Initialize the register pressure tracker used by buildSchedGraph. 546 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 547 548 // Account for liveness generate by the region boundary. 549 if (LiveRegionEnd != RegionEnd) 550 RPTracker.recede(); 551 552 // Build the DAG, and compute current register pressure. 553 buildSchedGraph(AA, &RPTracker); 554 if (ViewMISchedDAGs) viewGraph(); 555 556 // Initialize top/bottom trackers after computing region pressure. 557 initRegPressure(); 558 } 559 560 /// Apply each ScheduleDAGMutation step in order. 561 void ScheduleDAGMI::postprocessDAG() { 562 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 563 Mutations[i]->apply(this); 564 } 565 } 566 567 // Release all DAG roots for scheduling. 568 // 569 // Nodes with unreleased weak edges can still be roots. 570 void ScheduleDAGMI::releaseRoots() { 571 SmallVector<SUnit*, 16> BotRoots; 572 573 for (std::vector<SUnit>::iterator 574 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 575 SUnit *SU = &(*I); 576 // A SUnit is ready to top schedule if it has no predecessors. 577 if (!I->NumPredsLeft && SU != &EntrySU) 578 SchedImpl->releaseTopNode(SU); 579 // A SUnit is ready to bottom schedule if it has no successors. 580 if (!I->NumSuccsLeft && SU != &ExitSU) 581 BotRoots.push_back(SU); 582 } 583 // Release bottom roots in reverse order so the higher priority nodes appear 584 // first. This is more natural and slightly more efficient. 585 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 586 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) 587 SchedImpl->releaseBottomNode(*I); 588 } 589 590 /// Identify DAG roots and setup scheduler queues. 591 void ScheduleDAGMI::initQueues() { 592 NextClusterSucc = NULL; 593 NextClusterPred = NULL; 594 595 // Initialize the strategy before modifying the DAG. 596 SchedImpl->initialize(this); 597 598 // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 599 releaseRoots(); 600 601 releaseSuccessors(&EntrySU); 602 releasePredecessors(&ExitSU); 603 604 SchedImpl->registerRoots(); 605 606 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 607 CurrentBottom = RegionEnd; 608 } 609 610 /// Move an instruction and update register pressure. 611 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { 612 // Move the instruction to its new location in the instruction stream. 613 MachineInstr *MI = SU->getInstr(); 614 615 if (IsTopNode) { 616 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 617 if (&*CurrentTop == MI) 618 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 619 else { 620 moveInstruction(MI, CurrentTop); 621 TopRPTracker.setPos(MI); 622 } 623 624 // Update top scheduled pressure. 625 TopRPTracker.advance(); 626 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 627 updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); 628 } 629 else { 630 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 631 MachineBasicBlock::iterator priorII = 632 priorNonDebug(CurrentBottom, CurrentTop); 633 if (&*priorII == MI) 634 CurrentBottom = priorII; 635 else { 636 if (&*CurrentTop == MI) { 637 CurrentTop = nextIfDebug(++CurrentTop, priorII); 638 TopRPTracker.setPos(CurrentTop); 639 } 640 moveInstruction(MI, CurrentBottom); 641 CurrentBottom = MI; 642 } 643 // Update bottom scheduled pressure. 644 BotRPTracker.recede(); 645 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 646 updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); 647 } 648 } 649 650 /// Update scheduler queues after scheduling an instruction. 651 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 652 // Release dependent instructions for scheduling. 653 if (IsTopNode) 654 releaseSuccessors(SU); 655 else 656 releasePredecessors(SU); 657 658 SU->isScheduled = true; 659 660 // Notify the scheduling strategy after updating the DAG. 661 SchedImpl->schedNode(SU, IsTopNode); 662 } 663 664 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 665 void ScheduleDAGMI::placeDebugValues() { 666 // If first instruction was a DBG_VALUE then put it back. 667 if (FirstDbgValue) { 668 BB->splice(RegionBegin, BB, FirstDbgValue); 669 RegionBegin = FirstDbgValue; 670 } 671 672 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 673 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 674 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 675 MachineInstr *DbgValue = P.first; 676 MachineBasicBlock::iterator OrigPrevMI = P.second; 677 BB->splice(++OrigPrevMI, BB, DbgValue); 678 if (OrigPrevMI == llvm::prior(RegionEnd)) 679 RegionEnd = DbgValue; 680 } 681 DbgValues.clear(); 682 FirstDbgValue = NULL; 683 } 684 685 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 686 void ScheduleDAGMI::dumpSchedule() const { 687 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 688 if (SUnit *SU = getSUnit(&(*MI))) 689 SU->dump(this); 690 else 691 dbgs() << "Missing SUnit\n"; 692 } 693 } 694 #endif 695 696 //===----------------------------------------------------------------------===// 697 // LoadClusterMutation - DAG post-processing to cluster loads. 698 //===----------------------------------------------------------------------===// 699 700 namespace { 701 /// \brief Post-process the DAG to create cluster edges between neighboring 702 /// loads. 703 class LoadClusterMutation : public ScheduleDAGMutation { 704 struct LoadInfo { 705 SUnit *SU; 706 unsigned BaseReg; 707 unsigned Offset; 708 LoadInfo(SUnit *su, unsigned reg, unsigned ofs) 709 : SU(su), BaseReg(reg), Offset(ofs) {} 710 }; 711 static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, 712 const LoadClusterMutation::LoadInfo &RHS); 713 714 const TargetInstrInfo *TII; 715 const TargetRegisterInfo *TRI; 716 public: 717 LoadClusterMutation(const TargetInstrInfo *tii, 718 const TargetRegisterInfo *tri) 719 : TII(tii), TRI(tri) {} 720 721 virtual void apply(ScheduleDAGMI *DAG); 722 protected: 723 void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); 724 }; 725 } // anonymous 726 727 bool LoadClusterMutation::LoadInfoLess( 728 const LoadClusterMutation::LoadInfo &LHS, 729 const LoadClusterMutation::LoadInfo &RHS) { 730 if (LHS.BaseReg != RHS.BaseReg) 731 return LHS.BaseReg < RHS.BaseReg; 732 return LHS.Offset < RHS.Offset; 733 } 734 735 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, 736 ScheduleDAGMI *DAG) { 737 SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; 738 for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { 739 SUnit *SU = Loads[Idx]; 740 unsigned BaseReg; 741 unsigned Offset; 742 if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) 743 LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); 744 } 745 if (LoadRecords.size() < 2) 746 return; 747 std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); 748 unsigned ClusterLength = 1; 749 for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { 750 if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { 751 ClusterLength = 1; 752 continue; 753 } 754 755 SUnit *SUa = LoadRecords[Idx].SU; 756 SUnit *SUb = LoadRecords[Idx+1].SU; 757 if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) 758 && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { 759 760 DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" 761 << SUb->NodeNum << ")\n"); 762 // Copy successor edges from SUa to SUb. Interleaving computation 763 // dependent on SUa can prevent load combining due to register reuse. 764 // Predecessor edges do not need to be copied from SUb to SUa since nearby 765 // loads should have effectively the same inputs. 766 for (SUnit::const_succ_iterator 767 SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { 768 if (SI->getSUnit() == SUb) 769 continue; 770 DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); 771 DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); 772 } 773 ++ClusterLength; 774 } 775 else 776 ClusterLength = 1; 777 } 778 } 779 780 /// \brief Callback from DAG postProcessing to create cluster edges for loads. 781 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { 782 // Map DAG NodeNum to store chain ID. 783 DenseMap<unsigned, unsigned> StoreChainIDs; 784 // Map each store chain to a set of dependent loads. 785 SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; 786 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 787 SUnit *SU = &DAG->SUnits[Idx]; 788 if (!SU->getInstr()->mayLoad()) 789 continue; 790 unsigned ChainPredID = DAG->SUnits.size(); 791 for (SUnit::const_pred_iterator 792 PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 793 if (PI->isCtrl()) { 794 ChainPredID = PI->getSUnit()->NodeNum; 795 break; 796 } 797 } 798 // Check if this chain-like pred has been seen 799 // before. ChainPredID==MaxNodeID for loads at the top of the schedule. 800 unsigned NumChains = StoreChainDependents.size(); 801 std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = 802 StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); 803 if (Result.second) 804 StoreChainDependents.resize(NumChains + 1); 805 StoreChainDependents[Result.first->second].push_back(SU); 806 } 807 // Iterate over the store chains. 808 for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) 809 clusterNeighboringLoads(StoreChainDependents[Idx], DAG); 810 } 811 812 //===----------------------------------------------------------------------===// 813 // MacroFusion - DAG post-processing to encourage fusion of macro ops. 814 //===----------------------------------------------------------------------===// 815 816 namespace { 817 /// \brief Post-process the DAG to create cluster edges between instructions 818 /// that may be fused by the processor into a single operation. 819 class MacroFusion : public ScheduleDAGMutation { 820 const TargetInstrInfo *TII; 821 public: 822 MacroFusion(const TargetInstrInfo *tii): TII(tii) {} 823 824 virtual void apply(ScheduleDAGMI *DAG); 825 }; 826 } // anonymous 827 828 /// \brief Callback from DAG postProcessing to create cluster edges to encourage 829 /// fused operations. 830 void MacroFusion::apply(ScheduleDAGMI *DAG) { 831 // For now, assume targets can only fuse with the branch. 832 MachineInstr *Branch = DAG->ExitSU.getInstr(); 833 if (!Branch) 834 return; 835 836 for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { 837 SUnit *SU = &DAG->SUnits[--Idx]; 838 if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) 839 continue; 840 841 // Create a single weak edge from SU to ExitSU. The only effect is to cause 842 // bottom-up scheduling to heavily prioritize the clustered SU. There is no 843 // need to copy predecessor edges from ExitSU to SU, since top-down 844 // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling 845 // of SU, we could create an artificial edge from the deepest root, but it 846 // hasn't been needed yet. 847 bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); 848 (void)Success; 849 assert(Success && "No DAG nodes should be reachable from ExitSU"); 850 851 DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); 852 break; 853 } 854 } 855 856 //===----------------------------------------------------------------------===// 857 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. 858 //===----------------------------------------------------------------------===// 859 860 namespace { 861 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance 862 /// the schedule. 863 class ConvergingScheduler : public MachineSchedStrategy { 864 public: 865 /// Represent the type of SchedCandidate found within a single queue. 866 /// pickNodeBidirectional depends on these listed by decreasing priority. 867 enum CandReason { 868 NoCand, SingleExcess, SingleCritical, Cluster, 869 ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 870 TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, 871 NodeOrder}; 872 873 #ifndef NDEBUG 874 static const char *getReasonStr(ConvergingScheduler::CandReason Reason); 875 #endif 876 877 /// Policy for scheduling the next instruction in the candidate's zone. 878 struct CandPolicy { 879 bool ReduceLatency; 880 unsigned ReduceResIdx; 881 unsigned DemandResIdx; 882 883 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} 884 }; 885 886 /// Status of an instruction's critical resource consumption. 887 struct SchedResourceDelta { 888 // Count critical resources in the scheduled region required by SU. 889 unsigned CritResources; 890 891 // Count critical resources from another region consumed by SU. 892 unsigned DemandedResources; 893 894 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} 895 896 bool operator==(const SchedResourceDelta &RHS) const { 897 return CritResources == RHS.CritResources 898 && DemandedResources == RHS.DemandedResources; 899 } 900 bool operator!=(const SchedResourceDelta &RHS) const { 901 return !operator==(RHS); 902 } 903 }; 904 905 /// Store the state used by ConvergingScheduler heuristics, required for the 906 /// lifetime of one invocation of pickNode(). 907 struct SchedCandidate { 908 CandPolicy Policy; 909 910 // The best SUnit candidate. 911 SUnit *SU; 912 913 // The reason for this candidate. 914 CandReason Reason; 915 916 // Register pressure values for the best candidate. 917 RegPressureDelta RPDelta; 918 919 // Critical resource consumption of the best candidate. 920 SchedResourceDelta ResDelta; 921 922 SchedCandidate(const CandPolicy &policy) 923 : Policy(policy), SU(NULL), Reason(NoCand) {} 924 925 bool isValid() const { return SU; } 926 927 // Copy the status of another candidate without changing policy. 928 void setBest(SchedCandidate &Best) { 929 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 930 SU = Best.SU; 931 Reason = Best.Reason; 932 RPDelta = Best.RPDelta; 933 ResDelta = Best.ResDelta; 934 } 935 936 void initResourceDelta(const ScheduleDAGMI *DAG, 937 const TargetSchedModel *SchedModel); 938 }; 939 940 /// Summarize the unscheduled region. 941 struct SchedRemainder { 942 // Critical path through the DAG in expected latency. 943 unsigned CriticalPath; 944 945 // Unscheduled resources 946 SmallVector<unsigned, 16> RemainingCounts; 947 // Critical resource for the unscheduled zone. 948 unsigned CritResIdx; 949 // Number of micro-ops left to schedule. 950 unsigned RemainingMicroOps; 951 // Is the unscheduled zone resource limited. 952 bool IsResourceLimited; 953 954 unsigned MaxRemainingCount; 955 956 void reset() { 957 CriticalPath = 0; 958 RemainingCounts.clear(); 959 CritResIdx = 0; 960 RemainingMicroOps = 0; 961 IsResourceLimited = false; 962 MaxRemainingCount = 0; 963 } 964 965 SchedRemainder() { reset(); } 966 967 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 968 }; 969 970 /// Each Scheduling boundary is associated with ready queues. It tracks the 971 /// current cycle in the direction of movement, and maintains the state 972 /// of "hazards" and other interlocks at the current cycle. 973 struct SchedBoundary { 974 ScheduleDAGMI *DAG; 975 const TargetSchedModel *SchedModel; 976 SchedRemainder *Rem; 977 978 ReadyQueue Available; 979 ReadyQueue Pending; 980 bool CheckPending; 981 982 // For heuristics, keep a list of the nodes that immediately depend on the 983 // most recently scheduled node. 984 SmallPtrSet<const SUnit*, 8> NextSUs; 985 986 ScheduleHazardRecognizer *HazardRec; 987 988 unsigned CurrCycle; 989 unsigned IssueCount; 990 991 /// MinReadyCycle - Cycle of the soonest available instruction. 992 unsigned MinReadyCycle; 993 994 // The expected latency of the critical path in this scheduled zone. 995 unsigned ExpectedLatency; 996 997 // Resources used in the scheduled zone beyond this boundary. 998 SmallVector<unsigned, 16> ResourceCounts; 999 1000 // Cache the critical resources ID in this scheduled zone. 1001 unsigned CritResIdx; 1002 1003 // Is the scheduled region resource limited vs. latency limited. 1004 bool IsResourceLimited; 1005 1006 unsigned ExpectedCount; 1007 1008 // Policy flag: attempt to find ILP until expected latency is covered. 1009 bool ShouldIncreaseILP; 1010 1011 #ifndef NDEBUG 1012 // Remember the greatest min operand latency. 1013 unsigned MaxMinLatency; 1014 #endif 1015 1016 void reset() { 1017 Available.clear(); 1018 Pending.clear(); 1019 CheckPending = false; 1020 NextSUs.clear(); 1021 HazardRec = 0; 1022 CurrCycle = 0; 1023 IssueCount = 0; 1024 MinReadyCycle = UINT_MAX; 1025 ExpectedLatency = 0; 1026 ResourceCounts.resize(1); 1027 assert(!ResourceCounts[0] && "nonzero count for bad resource"); 1028 CritResIdx = 0; 1029 IsResourceLimited = false; 1030 ExpectedCount = 0; 1031 ShouldIncreaseILP = false; 1032 #ifndef NDEBUG 1033 MaxMinLatency = 0; 1034 #endif 1035 // Reserve a zero-count for invalid CritResIdx. 1036 ResourceCounts.resize(1); 1037 } 1038 1039 /// Pending queues extend the ready queues with the same ID and the 1040 /// PendingFlag set. 1041 SchedBoundary(unsigned ID, const Twine &Name): 1042 DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), 1043 Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { 1044 reset(); 1045 } 1046 1047 ~SchedBoundary() { delete HazardRec; } 1048 1049 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 1050 SchedRemainder *rem); 1051 1052 bool isTop() const { 1053 return Available.getID() == ConvergingScheduler::TopQID; 1054 } 1055 1056 unsigned getUnscheduledLatency(SUnit *SU) const { 1057 if (isTop()) 1058 return SU->getHeight(); 1059 return SU->getDepth(); 1060 } 1061 1062 unsigned getCriticalCount() const { 1063 return ResourceCounts[CritResIdx]; 1064 } 1065 1066 bool checkHazard(SUnit *SU); 1067 1068 void checkILPPolicy(); 1069 1070 void releaseNode(SUnit *SU, unsigned ReadyCycle); 1071 1072 void bumpCycle(); 1073 1074 void countResource(unsigned PIdx, unsigned Cycles); 1075 1076 void bumpNode(SUnit *SU); 1077 1078 void releasePending(); 1079 1080 void removeReady(SUnit *SU); 1081 1082 SUnit *pickOnlyChoice(); 1083 }; 1084 1085 private: 1086 ScheduleDAGMI *DAG; 1087 const TargetSchedModel *SchedModel; 1088 const TargetRegisterInfo *TRI; 1089 1090 // State of the top and bottom scheduled instruction boundaries. 1091 SchedRemainder Rem; 1092 SchedBoundary Top; 1093 SchedBoundary Bot; 1094 1095 public: 1096 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 1097 enum { 1098 TopQID = 1, 1099 BotQID = 2, 1100 LogMaxQID = 2 1101 }; 1102 1103 ConvergingScheduler(): 1104 DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} 1105 1106 virtual void initialize(ScheduleDAGMI *dag); 1107 1108 virtual SUnit *pickNode(bool &IsTopNode); 1109 1110 virtual void schedNode(SUnit *SU, bool IsTopNode); 1111 1112 virtual void releaseTopNode(SUnit *SU); 1113 1114 virtual void releaseBottomNode(SUnit *SU); 1115 1116 virtual void registerRoots(); 1117 1118 protected: 1119 void balanceZones( 1120 ConvergingScheduler::SchedBoundary &CriticalZone, 1121 ConvergingScheduler::SchedCandidate &CriticalCand, 1122 ConvergingScheduler::SchedBoundary &OppositeZone, 1123 ConvergingScheduler::SchedCandidate &OppositeCand); 1124 1125 void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, 1126 ConvergingScheduler::SchedCandidate &BotCand); 1127 1128 void tryCandidate(SchedCandidate &Cand, 1129 SchedCandidate &TryCand, 1130 SchedBoundary &Zone, 1131 const RegPressureTracker &RPTracker, 1132 RegPressureTracker &TempTracker); 1133 1134 SUnit *pickNodeBidirectional(bool &IsTopNode); 1135 1136 void pickNodeFromQueue(SchedBoundary &Zone, 1137 const RegPressureTracker &RPTracker, 1138 SchedCandidate &Candidate); 1139 1140 #ifndef NDEBUG 1141 void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); 1142 #endif 1143 }; 1144 } // namespace 1145 1146 void ConvergingScheduler::SchedRemainder:: 1147 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 1148 reset(); 1149 if (!SchedModel->hasInstrSchedModel()) 1150 return; 1151 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 1152 for (std::vector<SUnit>::iterator 1153 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 1154 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 1155 RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); 1156 for (TargetSchedModel::ProcResIter 1157 PI = SchedModel->getWriteProcResBegin(SC), 1158 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1159 unsigned PIdx = PI->ProcResourceIdx; 1160 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1161 RemainingCounts[PIdx] += (Factor * PI->Cycles); 1162 } 1163 } 1164 } 1165 1166 void ConvergingScheduler::SchedBoundary:: 1167 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 1168 reset(); 1169 DAG = dag; 1170 SchedModel = smodel; 1171 Rem = rem; 1172 if (SchedModel->hasInstrSchedModel()) 1173 ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); 1174 } 1175 1176 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { 1177 DAG = dag; 1178 SchedModel = DAG->getSchedModel(); 1179 TRI = DAG->TRI; 1180 Rem.init(DAG, SchedModel); 1181 Top.init(DAG, SchedModel, &Rem); 1182 Bot.init(DAG, SchedModel, &Rem); 1183 1184 // Initialize resource counts. 1185 1186 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 1187 // are disabled, then these HazardRecs will be disabled. 1188 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 1189 const TargetMachine &TM = DAG->MF.getTarget(); 1190 Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1191 Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1192 1193 assert((!ForceTopDown || !ForceBottomUp) && 1194 "-misched-topdown incompatible with -misched-bottomup"); 1195 } 1196 1197 void ConvergingScheduler::releaseTopNode(SUnit *SU) { 1198 if (SU->isScheduled) 1199 return; 1200 1201 for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1202 I != E; ++I) { 1203 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; 1204 unsigned MinLatency = I->getMinLatency(); 1205 #ifndef NDEBUG 1206 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 1207 #endif 1208 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 1209 SU->TopReadyCycle = PredReadyCycle + MinLatency; 1210 } 1211 Top.releaseNode(SU, SU->TopReadyCycle); 1212 } 1213 1214 void ConvergingScheduler::releaseBottomNode(SUnit *SU) { 1215 if (SU->isScheduled) 1216 return; 1217 1218 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 1219 1220 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1221 I != E; ++I) { 1222 if (I->isWeak()) 1223 continue; 1224 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 1225 unsigned MinLatency = I->getMinLatency(); 1226 #ifndef NDEBUG 1227 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 1228 #endif 1229 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 1230 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 1231 } 1232 Bot.releaseNode(SU, SU->BotReadyCycle); 1233 } 1234 1235 void ConvergingScheduler::registerRoots() { 1236 Rem.CriticalPath = DAG->ExitSU.getDepth(); 1237 // Some roots may not feed into ExitSU. Check all of them in case. 1238 for (std::vector<SUnit*>::const_iterator 1239 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 1240 if ((*I)->getDepth() > Rem.CriticalPath) 1241 Rem.CriticalPath = (*I)->getDepth(); 1242 } 1243 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 1244 } 1245 1246 /// Does this SU have a hazard within the current instruction group. 1247 /// 1248 /// The scheduler supports two modes of hazard recognition. The first is the 1249 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1250 /// supports highly complicated in-order reservation tables 1251 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1252 /// 1253 /// The second is a streamlined mechanism that checks for hazards based on 1254 /// simple counters that the scheduler itself maintains. It explicitly checks 1255 /// for instruction dispatch limitations, including the number of micro-ops that 1256 /// can dispatch per cycle. 1257 /// 1258 /// TODO: Also check whether the SU must start a new group. 1259 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { 1260 if (HazardRec->isEnabled()) 1261 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 1262 1263 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1264 if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { 1265 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1266 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1267 return true; 1268 } 1269 return false; 1270 } 1271 1272 /// If expected latency is covered, disable ILP policy. 1273 void ConvergingScheduler::SchedBoundary::checkILPPolicy() { 1274 if (ShouldIncreaseILP 1275 && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { 1276 ShouldIncreaseILP = false; 1277 DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); 1278 } 1279 } 1280 1281 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, 1282 unsigned ReadyCycle) { 1283 1284 if (ReadyCycle < MinReadyCycle) 1285 MinReadyCycle = ReadyCycle; 1286 1287 // Check for interlocks first. For the purpose of other heuristics, an 1288 // instruction that cannot issue appears as if it's not in the ReadyQueue. 1289 if (ReadyCycle > CurrCycle || checkHazard(SU)) 1290 Pending.push(SU); 1291 else 1292 Available.push(SU); 1293 1294 // Record this node as an immediate dependent of the scheduled node. 1295 NextSUs.insert(SU); 1296 1297 // If CriticalPath has been computed, then check if the unscheduled nodes 1298 // exceed the ILP window. Before registerRoots, CriticalPath==0. 1299 if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) 1300 > Rem->CriticalPath + ILPWindow)) { 1301 ShouldIncreaseILP = true; 1302 DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " 1303 << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); 1304 } 1305 } 1306 1307 /// Move the boundary of scheduled code by one cycle. 1308 void ConvergingScheduler::SchedBoundary::bumpCycle() { 1309 unsigned Width = SchedModel->getIssueWidth(); 1310 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 1311 1312 unsigned NextCycle = CurrCycle + 1; 1313 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 1314 if (MinReadyCycle > NextCycle) { 1315 IssueCount = 0; 1316 NextCycle = MinReadyCycle; 1317 } 1318 1319 if (!HazardRec->isEnabled()) { 1320 // Bypass HazardRec virtual calls. 1321 CurrCycle = NextCycle; 1322 } 1323 else { 1324 // Bypass getHazardType calls in case of long latency. 1325 for (; CurrCycle != NextCycle; ++CurrCycle) { 1326 if (isTop()) 1327 HazardRec->AdvanceCycle(); 1328 else 1329 HazardRec->RecedeCycle(); 1330 } 1331 } 1332 CheckPending = true; 1333 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1334 1335 DEBUG(dbgs() << " *** " << Available.getName() << " cycle " 1336 << CurrCycle << '\n'); 1337 } 1338 1339 /// Add the given processor resource to this scheduled zone. 1340 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, 1341 unsigned Cycles) { 1342 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1343 DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name 1344 << " +(" << Cycles << "x" << Factor 1345 << ") / " << SchedModel->getLatencyFactor() << '\n'); 1346 1347 unsigned Count = Factor * Cycles; 1348 ResourceCounts[PIdx] += Count; 1349 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 1350 Rem->RemainingCounts[PIdx] -= Count; 1351 1352 // Reset MaxRemainingCount for sanity. 1353 Rem->MaxRemainingCount = 0; 1354 1355 // Check if this resource exceeds the current critical resource by a full 1356 // cycle. If so, it becomes the critical resource. 1357 if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) 1358 >= (int)SchedModel->getLatencyFactor()) { 1359 CritResIdx = PIdx; 1360 DEBUG(dbgs() << " *** Critical resource " 1361 << SchedModel->getProcResource(PIdx)->Name << " x" 1362 << ResourceCounts[PIdx] << '\n'); 1363 } 1364 } 1365 1366 /// Move the boundary of scheduled code by one SUnit. 1367 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { 1368 // Update the reservation table. 1369 if (HazardRec->isEnabled()) { 1370 if (!isTop() && SU->isCall) { 1371 // Calls are scheduled with their preceding instructions. For bottom-up 1372 // scheduling, clear the pipeline state before emitting. 1373 HazardRec->Reset(); 1374 } 1375 HazardRec->EmitInstruction(SU); 1376 } 1377 // Update resource counts and critical resource. 1378 if (SchedModel->hasInstrSchedModel()) { 1379 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1380 Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); 1381 for (TargetSchedModel::ProcResIter 1382 PI = SchedModel->getWriteProcResBegin(SC), 1383 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1384 countResource(PI->ProcResourceIdx, PI->Cycles); 1385 } 1386 } 1387 if (isTop()) { 1388 if (SU->getDepth() > ExpectedLatency) 1389 ExpectedLatency = SU->getDepth(); 1390 } 1391 else { 1392 if (SU->getHeight() > ExpectedLatency) 1393 ExpectedLatency = SU->getHeight(); 1394 } 1395 1396 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1397 1398 // Check the instruction group dispatch limit. 1399 // TODO: Check if this SU must end a dispatch group. 1400 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 1401 1402 // checkHazard prevents scheduling multiple instructions per cycle that exceed 1403 // issue width. However, we commonly reach the maximum. In this case 1404 // opportunistically bump the cycle to avoid uselessly checking everything in 1405 // the readyQ. Furthermore, a single instruction may produce more than one 1406 // cycle's worth of micro-ops. 1407 if (IssueCount >= SchedModel->getIssueWidth()) { 1408 DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); 1409 bumpCycle(); 1410 } 1411 } 1412 1413 /// Release pending ready nodes in to the available queue. This makes them 1414 /// visible to heuristics. 1415 void ConvergingScheduler::SchedBoundary::releasePending() { 1416 // If the available queue is empty, it is safe to reset MinReadyCycle. 1417 if (Available.empty()) 1418 MinReadyCycle = UINT_MAX; 1419 1420 // Check to see if any of the pending instructions are ready to issue. If 1421 // so, add them to the available queue. 1422 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 1423 SUnit *SU = *(Pending.begin()+i); 1424 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 1425 1426 if (ReadyCycle < MinReadyCycle) 1427 MinReadyCycle = ReadyCycle; 1428 1429 if (ReadyCycle > CurrCycle) 1430 continue; 1431 1432 if (checkHazard(SU)) 1433 continue; 1434 1435 Available.push(SU); 1436 Pending.remove(Pending.begin()+i); 1437 --i; --e; 1438 } 1439 DEBUG(if (!Pending.empty()) Pending.dump()); 1440 CheckPending = false; 1441 } 1442 1443 /// Remove SU from the ready set for this boundary. 1444 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { 1445 if (Available.isInQueue(SU)) 1446 Available.remove(Available.find(SU)); 1447 else { 1448 assert(Pending.isInQueue(SU) && "bad ready count"); 1449 Pending.remove(Pending.find(SU)); 1450 } 1451 } 1452 1453 /// If this queue only has one ready candidate, return it. As a side effect, 1454 /// defer any nodes that now hit a hazard, and advance the cycle until at least 1455 /// one node is ready. If multiple instructions are ready, return NULL. 1456 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { 1457 if (CheckPending) 1458 releasePending(); 1459 1460 if (IssueCount > 0) { 1461 // Defer any ready instrs that now have a hazard. 1462 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 1463 if (checkHazard(*I)) { 1464 Pending.push(*I); 1465 I = Available.remove(I); 1466 continue; 1467 } 1468 ++I; 1469 } 1470 } 1471 for (unsigned i = 0; Available.empty(); ++i) { 1472 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 1473 "permanent hazard"); (void)i; 1474 bumpCycle(); 1475 releasePending(); 1476 } 1477 if (Available.size() == 1) 1478 return *Available.begin(); 1479 return NULL; 1480 } 1481 1482 /// Record the candidate policy for opposite zones with different critical 1483 /// resources. 1484 /// 1485 /// If the CriticalZone is latency limited, don't force a policy for the 1486 /// candidates here. Instead, When releasing each candidate, releaseNode 1487 /// compares the region's critical path to the candidate's height or depth and 1488 /// the scheduled zone's expected latency then sets ShouldIncreaseILP. 1489 void ConvergingScheduler::balanceZones( 1490 ConvergingScheduler::SchedBoundary &CriticalZone, 1491 ConvergingScheduler::SchedCandidate &CriticalCand, 1492 ConvergingScheduler::SchedBoundary &OppositeZone, 1493 ConvergingScheduler::SchedCandidate &OppositeCand) { 1494 1495 if (!CriticalZone.IsResourceLimited) 1496 return; 1497 1498 SchedRemainder *Rem = CriticalZone.Rem; 1499 1500 // If the critical zone is overconsuming a resource relative to the 1501 // remainder, try to reduce it. 1502 unsigned RemainingCritCount = 1503 Rem->RemainingCounts[CriticalZone.CritResIdx]; 1504 if ((int)(Rem->MaxRemainingCount - RemainingCritCount) 1505 > (int)SchedModel->getLatencyFactor()) { 1506 CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; 1507 DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " 1508 << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name 1509 << '\n'); 1510 } 1511 // If the other zone is underconsuming a resource relative to the full zone, 1512 // try to increase it. 1513 unsigned OppositeCount = 1514 OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; 1515 if ((int)(OppositeZone.ExpectedCount - OppositeCount) 1516 > (int)SchedModel->getLatencyFactor()) { 1517 OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; 1518 DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand " 1519 << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name 1520 << '\n'); 1521 } 1522 } 1523 1524 /// Determine if the scheduled zones exceed resource limits or critical path and 1525 /// set each candidate's ReduceHeight policy accordingly. 1526 void ConvergingScheduler::checkResourceLimits( 1527 ConvergingScheduler::SchedCandidate &TopCand, 1528 ConvergingScheduler::SchedCandidate &BotCand) { 1529 1530 Bot.checkILPPolicy(); 1531 Top.checkILPPolicy(); 1532 if (Bot.ShouldIncreaseILP) 1533 BotCand.Policy.ReduceLatency = true; 1534 if (Top.ShouldIncreaseILP) 1535 TopCand.Policy.ReduceLatency = true; 1536 1537 // Handle resource-limited regions. 1538 if (Top.IsResourceLimited && Bot.IsResourceLimited 1539 && Top.CritResIdx == Bot.CritResIdx) { 1540 // If the scheduled critical resource in both zones is no longer the 1541 // critical remaining resource, attempt to reduce resource height both ways. 1542 if (Top.CritResIdx != Rem.CritResIdx) { 1543 TopCand.Policy.ReduceResIdx = Top.CritResIdx; 1544 BotCand.Policy.ReduceResIdx = Bot.CritResIdx; 1545 DEBUG(dbgs() << "Reduce scheduled " 1546 << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); 1547 } 1548 return; 1549 } 1550 // Handle latency-limited regions. 1551 if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { 1552 // If the total scheduled expected latency exceeds the region's critical 1553 // path then reduce latency both ways. 1554 // 1555 // Just because a zone is not resource limited does not mean it is latency 1556 // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle 1557 // to exceed expected latency. 1558 if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) 1559 && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { 1560 TopCand.Policy.ReduceLatency = true; 1561 BotCand.Policy.ReduceLatency = true; 1562 DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency 1563 << " + " << Bot.ExpectedLatency << '\n'); 1564 } 1565 return; 1566 } 1567 // The critical resource is different in each zone, so request balancing. 1568 1569 // Compute the cost of each zone. 1570 Rem.MaxRemainingCount = std::max( 1571 Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), 1572 Rem.RemainingCounts[Rem.CritResIdx]); 1573 Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); 1574 Top.ExpectedCount = std::max( 1575 Top.getCriticalCount(), 1576 Top.ExpectedCount * SchedModel->getLatencyFactor()); 1577 Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); 1578 Bot.ExpectedCount = std::max( 1579 Bot.getCriticalCount(), 1580 Bot.ExpectedCount * SchedModel->getLatencyFactor()); 1581 1582 balanceZones(Top, TopCand, Bot, BotCand); 1583 balanceZones(Bot, BotCand, Top, TopCand); 1584 } 1585 1586 void ConvergingScheduler::SchedCandidate:: 1587 initResourceDelta(const ScheduleDAGMI *DAG, 1588 const TargetSchedModel *SchedModel) { 1589 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 1590 return; 1591 1592 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1593 for (TargetSchedModel::ProcResIter 1594 PI = SchedModel->getWriteProcResBegin(SC), 1595 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1596 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 1597 ResDelta.CritResources += PI->Cycles; 1598 if (PI->ProcResourceIdx == Policy.DemandResIdx) 1599 ResDelta.DemandedResources += PI->Cycles; 1600 } 1601 } 1602 1603 /// Return true if this heuristic determines order. 1604 static bool tryLess(unsigned TryVal, unsigned CandVal, 1605 ConvergingScheduler::SchedCandidate &TryCand, 1606 ConvergingScheduler::SchedCandidate &Cand, 1607 ConvergingScheduler::CandReason Reason) { 1608 if (TryVal < CandVal) { 1609 TryCand.Reason = Reason; 1610 return true; 1611 } 1612 if (TryVal > CandVal) { 1613 if (Cand.Reason > Reason) 1614 Cand.Reason = Reason; 1615 return true; 1616 } 1617 return false; 1618 } 1619 1620 static bool tryGreater(unsigned TryVal, unsigned CandVal, 1621 ConvergingScheduler::SchedCandidate &TryCand, 1622 ConvergingScheduler::SchedCandidate &Cand, 1623 ConvergingScheduler::CandReason Reason) { 1624 if (TryVal > CandVal) { 1625 TryCand.Reason = Reason; 1626 return true; 1627 } 1628 if (TryVal < CandVal) { 1629 if (Cand.Reason > Reason) 1630 Cand.Reason = Reason; 1631 return true; 1632 } 1633 return false; 1634 } 1635 1636 static unsigned getWeakLeft(const SUnit *SU, bool isTop) { 1637 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 1638 } 1639 1640 /// Apply a set of heursitics to a new candidate. Heuristics are currently 1641 /// hierarchical. This may be more efficient than a graduated cost model because 1642 /// we don't need to evaluate all aspects of the model for each node in the 1643 /// queue. But it's really done to make the heuristics easier to debug and 1644 /// statistically analyze. 1645 /// 1646 /// \param Cand provides the policy and current best candidate. 1647 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 1648 /// \param Zone describes the scheduled zone that we are extending. 1649 /// \param RPTracker describes reg pressure within the scheduled zone. 1650 /// \param TempTracker is a scratch pressure tracker to reuse in queries. 1651 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, 1652 SchedCandidate &TryCand, 1653 SchedBoundary &Zone, 1654 const RegPressureTracker &RPTracker, 1655 RegPressureTracker &TempTracker) { 1656 1657 // Always initialize TryCand's RPDelta. 1658 TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, 1659 DAG->getRegionCriticalPSets(), 1660 DAG->getRegPressure().MaxSetPressure); 1661 1662 // Initialize the candidate if needed. 1663 if (!Cand.isValid()) { 1664 TryCand.Reason = NodeOrder; 1665 return; 1666 } 1667 // Avoid exceeding the target's limit. 1668 if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, 1669 Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) 1670 return; 1671 if (Cand.Reason == SingleExcess) 1672 Cand.Reason = MultiPressure; 1673 1674 // Avoid increasing the max critical pressure in the scheduled region. 1675 if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, 1676 Cand.RPDelta.CriticalMax.UnitIncrease, 1677 TryCand, Cand, SingleCritical)) 1678 return; 1679 if (Cand.Reason == SingleCritical) 1680 Cand.Reason = MultiPressure; 1681 1682 // Keep clustered nodes together to encourage downstream peephole 1683 // optimizations which may reduce resource requirements. 1684 // 1685 // This is a best effort to set things up for a post-RA pass. Optimizations 1686 // like generating loads of multiple registers should ideally be done within 1687 // the scheduler pass by combining the loads during DAG postprocessing. 1688 const SUnit *NextClusterSU = 1689 Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 1690 if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, 1691 TryCand, Cand, Cluster)) 1692 return; 1693 // Currently, weak edges are for clustering, so we hard-code that reason. 1694 // However, deferring the current TryCand will not change Cand's reason. 1695 CandReason OrigReason = Cand.Reason; 1696 if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), 1697 getWeakLeft(Cand.SU, Zone.isTop()), 1698 TryCand, Cand, Cluster)) { 1699 Cand.Reason = OrigReason; 1700 return; 1701 } 1702 // Avoid critical resource consumption and balance the schedule. 1703 TryCand.initResourceDelta(DAG, SchedModel); 1704 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 1705 TryCand, Cand, ResourceReduce)) 1706 return; 1707 if (tryGreater(TryCand.ResDelta.DemandedResources, 1708 Cand.ResDelta.DemandedResources, 1709 TryCand, Cand, ResourceDemand)) 1710 return; 1711 1712 // Avoid serializing long latency dependence chains. 1713 if (Cand.Policy.ReduceLatency) { 1714 if (Zone.isTop()) { 1715 if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() 1716 > Zone.ExpectedCount) { 1717 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1718 TryCand, Cand, TopDepthReduce)) 1719 return; 1720 } 1721 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1722 TryCand, Cand, TopPathReduce)) 1723 return; 1724 } 1725 else { 1726 if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() 1727 > Zone.ExpectedCount) { 1728 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1729 TryCand, Cand, BotHeightReduce)) 1730 return; 1731 } 1732 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1733 TryCand, Cand, BotPathReduce)) 1734 return; 1735 } 1736 } 1737 1738 // Avoid increasing the max pressure of the entire region. 1739 if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, 1740 Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) 1741 return; 1742 if (Cand.Reason == SingleMax) 1743 Cand.Reason = MultiPressure; 1744 1745 // Prefer immediate defs/users of the last scheduled instruction. This is a 1746 // nice pressure avoidance strategy that also conserves the processor's 1747 // register renaming resources and keeps the machine code readable. 1748 if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), 1749 TryCand, Cand, NextDefUse)) 1750 return; 1751 1752 // Fall through to original instruction order. 1753 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 1754 || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 1755 TryCand.Reason = NodeOrder; 1756 } 1757 } 1758 1759 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is 1760 /// more desirable than RHS from scheduling standpoint. 1761 static bool compareRPDelta(const RegPressureDelta &LHS, 1762 const RegPressureDelta &RHS) { 1763 // Compare each component of pressure in decreasing order of importance 1764 // without checking if any are valid. Invalid PressureElements are assumed to 1765 // have UnitIncrease==0, so are neutral. 1766 1767 // Avoid increasing the max critical pressure in the scheduled region. 1768 if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { 1769 DEBUG(dbgs() << "RP excess top - bot: " 1770 << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); 1771 return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; 1772 } 1773 // Avoid increasing the max critical pressure in the scheduled region. 1774 if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { 1775 DEBUG(dbgs() << "RP critical top - bot: " 1776 << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) 1777 << '\n'); 1778 return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; 1779 } 1780 // Avoid increasing the max pressure of the entire region. 1781 if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { 1782 DEBUG(dbgs() << "RP current top - bot: " 1783 << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) 1784 << '\n'); 1785 return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; 1786 } 1787 return false; 1788 } 1789 1790 #ifndef NDEBUG 1791 const char *ConvergingScheduler::getReasonStr( 1792 ConvergingScheduler::CandReason Reason) { 1793 switch (Reason) { 1794 case NoCand: return "NOCAND "; 1795 case SingleExcess: return "REG-EXCESS"; 1796 case SingleCritical: return "REG-CRIT "; 1797 case Cluster: return "CLUSTER "; 1798 case SingleMax: return "REG-MAX "; 1799 case MultiPressure: return "REG-MULTI "; 1800 case ResourceReduce: return "RES-REDUCE"; 1801 case ResourceDemand: return "RES-DEMAND"; 1802 case TopDepthReduce: return "TOP-DEPTH "; 1803 case TopPathReduce: return "TOP-PATH "; 1804 case BotHeightReduce:return "BOT-HEIGHT"; 1805 case BotPathReduce: return "BOT-PATH "; 1806 case NextDefUse: return "DEF-USE "; 1807 case NodeOrder: return "ORDER "; 1808 }; 1809 llvm_unreachable("Unknown reason!"); 1810 } 1811 1812 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, 1813 const SchedBoundary &Zone) { 1814 const char *Label = getReasonStr(Cand.Reason); 1815 PressureElement P; 1816 unsigned ResIdx = 0; 1817 unsigned Latency = 0; 1818 switch (Cand.Reason) { 1819 default: 1820 break; 1821 case SingleExcess: 1822 P = Cand.RPDelta.Excess; 1823 break; 1824 case SingleCritical: 1825 P = Cand.RPDelta.CriticalMax; 1826 break; 1827 case SingleMax: 1828 P = Cand.RPDelta.CurrentMax; 1829 break; 1830 case ResourceReduce: 1831 ResIdx = Cand.Policy.ReduceResIdx; 1832 break; 1833 case ResourceDemand: 1834 ResIdx = Cand.Policy.DemandResIdx; 1835 break; 1836 case TopDepthReduce: 1837 Latency = Cand.SU->getDepth(); 1838 break; 1839 case TopPathReduce: 1840 Latency = Cand.SU->getHeight(); 1841 break; 1842 case BotHeightReduce: 1843 Latency = Cand.SU->getHeight(); 1844 break; 1845 case BotPathReduce: 1846 Latency = Cand.SU->getDepth(); 1847 break; 1848 } 1849 dbgs() << Label << " " << Zone.Available.getName() << " "; 1850 if (P.isValid()) 1851 dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease 1852 << " "; 1853 else 1854 dbgs() << " "; 1855 if (ResIdx) 1856 dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; 1857 else 1858 dbgs() << " "; 1859 if (Latency) 1860 dbgs() << Latency << " cycles "; 1861 else 1862 dbgs() << " "; 1863 Cand.SU->dump(DAG); 1864 } 1865 #endif 1866 1867 /// Pick the best candidate from the top queue. 1868 /// 1869 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 1870 /// DAG building. To adjust for the current scheduling location we need to 1871 /// maintain the number of vreg uses remaining to be top-scheduled. 1872 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, 1873 const RegPressureTracker &RPTracker, 1874 SchedCandidate &Cand) { 1875 ReadyQueue &Q = Zone.Available; 1876 1877 DEBUG(Q.dump()); 1878 1879 // getMaxPressureDelta temporarily modifies the tracker. 1880 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 1881 1882 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 1883 1884 SchedCandidate TryCand(Cand.Policy); 1885 TryCand.SU = *I; 1886 tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 1887 if (TryCand.Reason != NoCand) { 1888 // Initialize resource delta if needed in case future heuristics query it. 1889 if (TryCand.ResDelta == SchedResourceDelta()) 1890 TryCand.initResourceDelta(DAG, SchedModel); 1891 Cand.setBest(TryCand); 1892 DEBUG(traceCandidate(Cand, Zone)); 1893 } 1894 TryCand.SU = *I; 1895 } 1896 } 1897 1898 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, 1899 bool IsTop) { 1900 DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") 1901 << " SU(" << Cand.SU->NodeNum << ") " 1902 << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); 1903 } 1904 1905 /// Pick the best candidate node from either the top or bottom queue. 1906 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { 1907 // Schedule as far as possible in the direction of no choice. This is most 1908 // efficient, but also provides the best heuristics for CriticalPSets. 1909 if (SUnit *SU = Bot.pickOnlyChoice()) { 1910 IsTopNode = false; 1911 return SU; 1912 } 1913 if (SUnit *SU = Top.pickOnlyChoice()) { 1914 IsTopNode = true; 1915 return SU; 1916 } 1917 CandPolicy NoPolicy; 1918 SchedCandidate BotCand(NoPolicy); 1919 SchedCandidate TopCand(NoPolicy); 1920 checkResourceLimits(TopCand, BotCand); 1921 1922 // Prefer bottom scheduling when heuristics are silent. 1923 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1924 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1925 1926 // If either Q has a single candidate that provides the least increase in 1927 // Excess pressure, we can immediately schedule from that Q. 1928 // 1929 // RegionCriticalPSets summarizes the pressure within the scheduled region and 1930 // affects picking from either Q. If scheduling in one direction must 1931 // increase pressure for one of the excess PSets, then schedule in that 1932 // direction first to provide more freedom in the other direction. 1933 if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { 1934 IsTopNode = false; 1935 tracePick(BotCand, IsTopNode); 1936 return BotCand.SU; 1937 } 1938 // Check if the top Q has a better candidate. 1939 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1940 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1941 1942 // If either Q has a single candidate that minimizes pressure above the 1943 // original region's pressure pick it. 1944 if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { 1945 if (TopCand.Reason < BotCand.Reason) { 1946 IsTopNode = true; 1947 tracePick(TopCand, IsTopNode); 1948 return TopCand.SU; 1949 } 1950 IsTopNode = false; 1951 tracePick(BotCand, IsTopNode); 1952 return BotCand.SU; 1953 } 1954 // Check for a salient pressure difference and pick the best from either side. 1955 if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { 1956 IsTopNode = true; 1957 tracePick(TopCand, IsTopNode); 1958 return TopCand.SU; 1959 } 1960 // Otherwise prefer the bottom candidate, in node order if all else failed. 1961 if (TopCand.Reason < BotCand.Reason) { 1962 IsTopNode = true; 1963 tracePick(TopCand, IsTopNode); 1964 return TopCand.SU; 1965 } 1966 IsTopNode = false; 1967 tracePick(BotCand, IsTopNode); 1968 return BotCand.SU; 1969 } 1970 1971 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 1972 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { 1973 if (DAG->top() == DAG->bottom()) { 1974 assert(Top.Available.empty() && Top.Pending.empty() && 1975 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 1976 return NULL; 1977 } 1978 SUnit *SU; 1979 do { 1980 if (ForceTopDown) { 1981 SU = Top.pickOnlyChoice(); 1982 if (!SU) { 1983 CandPolicy NoPolicy; 1984 SchedCandidate TopCand(NoPolicy); 1985 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 1986 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 1987 SU = TopCand.SU; 1988 } 1989 IsTopNode = true; 1990 } 1991 else if (ForceBottomUp) { 1992 SU = Bot.pickOnlyChoice(); 1993 if (!SU) { 1994 CandPolicy NoPolicy; 1995 SchedCandidate BotCand(NoPolicy); 1996 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 1997 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 1998 SU = BotCand.SU; 1999 } 2000 IsTopNode = false; 2001 } 2002 else { 2003 SU = pickNodeBidirectional(IsTopNode); 2004 } 2005 } while (SU->isScheduled); 2006 2007 if (SU->isTopReady()) 2008 Top.removeReady(SU); 2009 if (SU->isBottomReady()) 2010 Bot.removeReady(SU); 2011 2012 DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 2013 << " Scheduling Instruction in cycle " 2014 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; 2015 SU->dump(DAG)); 2016 return SU; 2017 } 2018 2019 /// Update the scheduler's state after scheduling a node. This is the same node 2020 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update 2021 /// it's state based on the current cycle before MachineSchedStrategy does. 2022 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { 2023 if (IsTopNode) { 2024 SU->TopReadyCycle = Top.CurrCycle; 2025 Top.bumpNode(SU); 2026 } 2027 else { 2028 SU->BotReadyCycle = Bot.CurrCycle; 2029 Bot.bumpNode(SU); 2030 } 2031 } 2032 2033 /// Create the standard converging machine scheduler. This will be used as the 2034 /// default scheduler if the target does not set a default. 2035 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { 2036 assert((!ForceTopDown || !ForceBottomUp) && 2037 "-misched-topdown incompatible with -misched-bottomup"); 2038 ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); 2039 // Register DAG post-processors. 2040 if (EnableLoadCluster) 2041 DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); 2042 if (EnableMacroFusion) 2043 DAG->addMutation(new MacroFusion(DAG->TII)); 2044 return DAG; 2045 } 2046 static MachineSchedRegistry 2047 ConvergingSchedRegistry("converge", "Standard converging scheduler.", 2048 createConvergingSched); 2049 2050 //===----------------------------------------------------------------------===// 2051 // ILP Scheduler. Currently for experimental analysis of heuristics. 2052 //===----------------------------------------------------------------------===// 2053 2054 namespace { 2055 /// \brief Order nodes by the ILP metric. 2056 struct ILPOrder { 2057 ScheduleDAGILP *ILP; 2058 bool MaximizeILP; 2059 2060 ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {} 2061 2062 /// \brief Apply a less-than relation on node priority. 2063 bool operator()(const SUnit *A, const SUnit *B) const { 2064 // Return true if A comes after B in the Q. 2065 if (MaximizeILP) 2066 return ILP->getILP(A) < ILP->getILP(B); 2067 else 2068 return ILP->getILP(A) > ILP->getILP(B); 2069 } 2070 }; 2071 2072 /// \brief Schedule based on the ILP metric. 2073 class ILPScheduler : public MachineSchedStrategy { 2074 ScheduleDAGILP ILP; 2075 ILPOrder Cmp; 2076 2077 std::vector<SUnit*> ReadyQ; 2078 public: 2079 ILPScheduler(bool MaximizeILP) 2080 : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {} 2081 2082 virtual void initialize(ScheduleDAGMI *DAG) { 2083 ReadyQ.clear(); 2084 ILP.resize(DAG->SUnits.size()); 2085 } 2086 2087 virtual void registerRoots() { 2088 for (std::vector<SUnit*>::const_iterator 2089 I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) { 2090 ILP.computeILP(*I); 2091 } 2092 } 2093 2094 /// Implement MachineSchedStrategy interface. 2095 /// ----------------------------------------- 2096 2097 virtual SUnit *pickNode(bool &IsTopNode) { 2098 if (ReadyQ.empty()) return NULL; 2099 pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2100 SUnit *SU = ReadyQ.back(); 2101 ReadyQ.pop_back(); 2102 IsTopNode = false; 2103 DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr() 2104 << " ILP: " << ILP.getILP(SU) << '\n'); 2105 return SU; 2106 } 2107 2108 virtual void schedNode(SUnit *, bool) {} 2109 2110 virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } 2111 2112 virtual void releaseBottomNode(SUnit *SU) { 2113 ReadyQ.push_back(SU); 2114 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2115 } 2116 }; 2117 } // namespace 2118 2119 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 2120 return new ScheduleDAGMI(C, new ILPScheduler(true)); 2121 } 2122 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 2123 return new ScheduleDAGMI(C, new ILPScheduler(false)); 2124 } 2125 static MachineSchedRegistry ILPMaxRegistry( 2126 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 2127 static MachineSchedRegistry ILPMinRegistry( 2128 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 2129 2130 //===----------------------------------------------------------------------===// 2131 // Machine Instruction Shuffler for Correctness Testing 2132 //===----------------------------------------------------------------------===// 2133 2134 #ifndef NDEBUG 2135 namespace { 2136 /// Apply a less-than relation on the node order, which corresponds to the 2137 /// instruction order prior to scheduling. IsReverse implements greater-than. 2138 template<bool IsReverse> 2139 struct SUnitOrder { 2140 bool operator()(SUnit *A, SUnit *B) const { 2141 if (IsReverse) 2142 return A->NodeNum > B->NodeNum; 2143 else 2144 return A->NodeNum < B->NodeNum; 2145 } 2146 }; 2147 2148 /// Reorder instructions as much as possible. 2149 class InstructionShuffler : public MachineSchedStrategy { 2150 bool IsAlternating; 2151 bool IsTopDown; 2152 2153 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 2154 // gives nodes with a higher number higher priority causing the latest 2155 // instructions to be scheduled first. 2156 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 2157 TopQ; 2158 // When scheduling bottom-up, use greater-than as the queue priority. 2159 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 2160 BottomQ; 2161 public: 2162 InstructionShuffler(bool alternate, bool topdown) 2163 : IsAlternating(alternate), IsTopDown(topdown) {} 2164 2165 virtual void initialize(ScheduleDAGMI *) { 2166 TopQ.clear(); 2167 BottomQ.clear(); 2168 } 2169 2170 /// Implement MachineSchedStrategy interface. 2171 /// ----------------------------------------- 2172 2173 virtual SUnit *pickNode(bool &IsTopNode) { 2174 SUnit *SU; 2175 if (IsTopDown) { 2176 do { 2177 if (TopQ.empty()) return NULL; 2178 SU = TopQ.top(); 2179 TopQ.pop(); 2180 } while (SU->isScheduled); 2181 IsTopNode = true; 2182 } 2183 else { 2184 do { 2185 if (BottomQ.empty()) return NULL; 2186 SU = BottomQ.top(); 2187 BottomQ.pop(); 2188 } while (SU->isScheduled); 2189 IsTopNode = false; 2190 } 2191 if (IsAlternating) 2192 IsTopDown = !IsTopDown; 2193 return SU; 2194 } 2195 2196 virtual void schedNode(SUnit *SU, bool IsTopNode) {} 2197 2198 virtual void releaseTopNode(SUnit *SU) { 2199 TopQ.push(SU); 2200 } 2201 virtual void releaseBottomNode(SUnit *SU) { 2202 BottomQ.push(SU); 2203 } 2204 }; 2205 } // namespace 2206 2207 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 2208 bool Alternate = !ForceTopDown && !ForceBottomUp; 2209 bool TopDown = !ForceBottomUp; 2210 assert((TopDown || !ForceTopDown) && 2211 "-misched-topdown incompatible with -misched-bottomup"); 2212 return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); 2213 } 2214 static MachineSchedRegistry ShufflerRegistry( 2215 "shuffle", "Shuffle machine instructions alternating directions", 2216 createInstructionShuffler); 2217 #endif // !NDEBUG 2218