1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // MachineScheduler schedules machine instructions after phi elimination. It 11 // preserves LiveIntervals so it can be invoked before register allocation. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "misched" 16 17 #include "llvm/CodeGen/MachineScheduler.h" 18 #include "llvm/ADT/OwningPtr.h" 19 #include "llvm/ADT/PriorityQueue.h" 20 #include "llvm/Analysis/AliasAnalysis.h" 21 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 22 #include "llvm/CodeGen/MachineDominators.h" 23 #include "llvm/CodeGen/MachineLoopInfo.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/RegisterClassInfo.h" 26 #include "llvm/CodeGen/ScheduleDFS.h" 27 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/GraphWriter.h" 32 #include "llvm/Support/raw_ostream.h" 33 #include <queue> 34 35 using namespace llvm; 36 37 namespace llvm { 38 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 39 cl::desc("Force top-down list scheduling")); 40 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 41 cl::desc("Force bottom-up list scheduling")); 42 } 43 44 #ifndef NDEBUG 45 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, 46 cl::desc("Pop up a window to show MISched dags after they are processed")); 47 48 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 49 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 50 #else 51 static bool ViewMISchedDAGs = false; 52 #endif // NDEBUG 53 54 // Experimental heuristics 55 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, 56 cl::desc("Enable load clustering."), cl::init(true)); 57 58 // Experimental heuristics 59 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, 60 cl::desc("Enable scheduling for macro fusion."), cl::init(true)); 61 62 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, 63 cl::desc("Verify machine instrs before and after machine scheduling")); 64 65 // DAG subtrees must have at least this many nodes. 66 static const unsigned MinSubtreeSize = 8; 67 68 //===----------------------------------------------------------------------===// 69 // Machine Instruction Scheduling Pass and Registry 70 //===----------------------------------------------------------------------===// 71 72 MachineSchedContext::MachineSchedContext(): 73 MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { 74 RegClassInfo = new RegisterClassInfo(); 75 } 76 77 MachineSchedContext::~MachineSchedContext() { 78 delete RegClassInfo; 79 } 80 81 namespace { 82 /// MachineScheduler runs after coalescing and before register allocation. 83 class MachineScheduler : public MachineSchedContext, 84 public MachineFunctionPass { 85 public: 86 MachineScheduler(); 87 88 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 89 90 virtual void releaseMemory() {} 91 92 virtual bool runOnMachineFunction(MachineFunction&); 93 94 virtual void print(raw_ostream &O, const Module* = 0) const; 95 96 static char ID; // Class identification, replacement for typeinfo 97 }; 98 } // namespace 99 100 char MachineScheduler::ID = 0; 101 102 char &llvm::MachineSchedulerID = MachineScheduler::ID; 103 104 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched", 105 "Machine Instruction Scheduler", false, false) 106 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 107 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 108 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 109 INITIALIZE_PASS_END(MachineScheduler, "misched", 110 "Machine Instruction Scheduler", false, false) 111 112 MachineScheduler::MachineScheduler() 113 : MachineFunctionPass(ID) { 114 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 115 } 116 117 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 118 AU.setPreservesCFG(); 119 AU.addRequiredID(MachineDominatorsID); 120 AU.addRequired<MachineLoopInfo>(); 121 AU.addRequired<AliasAnalysis>(); 122 AU.addRequired<TargetPassConfig>(); 123 AU.addRequired<SlotIndexes>(); 124 AU.addPreserved<SlotIndexes>(); 125 AU.addRequired<LiveIntervals>(); 126 AU.addPreserved<LiveIntervals>(); 127 MachineFunctionPass::getAnalysisUsage(AU); 128 } 129 130 MachinePassRegistry MachineSchedRegistry::Registry; 131 132 /// A dummy default scheduler factory indicates whether the scheduler 133 /// is overridden on the command line. 134 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 135 return 0; 136 } 137 138 /// MachineSchedOpt allows command line selection of the scheduler. 139 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 140 RegisterPassParser<MachineSchedRegistry> > 141 MachineSchedOpt("misched", 142 cl::init(&useDefaultMachineSched), cl::Hidden, 143 cl::desc("Machine instruction scheduler to use")); 144 145 static MachineSchedRegistry 146 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 147 useDefaultMachineSched); 148 149 /// Forward declare the standard machine scheduler. This will be used as the 150 /// default scheduler if the target does not set a default. 151 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); 152 153 154 /// Decrement this iterator until reaching the top or a non-debug instr. 155 static MachineBasicBlock::iterator 156 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { 157 assert(I != Beg && "reached the top of the region, cannot decrement"); 158 while (--I != Beg) { 159 if (!I->isDebugValue()) 160 break; 161 } 162 return I; 163 } 164 165 /// If this iterator is a debug value, increment until reaching the End or a 166 /// non-debug instruction. 167 static MachineBasicBlock::iterator 168 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { 169 for(; I != End; ++I) { 170 if (!I->isDebugValue()) 171 break; 172 } 173 return I; 174 } 175 176 /// Top-level MachineScheduler pass driver. 177 /// 178 /// Visit blocks in function order. Divide each block into scheduling regions 179 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 180 /// consistent with the DAG builder, which traverses the interior of the 181 /// scheduling regions bottom-up. 182 /// 183 /// This design avoids exposing scheduling boundaries to the DAG builder, 184 /// simplifying the DAG builder's support for "special" target instructions. 185 /// At the same time the design allows target schedulers to operate across 186 /// scheduling boundaries, for example to bundle the boudary instructions 187 /// without reordering them. This creates complexity, because the target 188 /// scheduler must update the RegionBegin and RegionEnd positions cached by 189 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 190 /// design would be to split blocks at scheduling boundaries, but LLVM has a 191 /// general bias against block splitting purely for implementation simplicity. 192 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 193 DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs())); 194 195 // Initialize the context of the pass. 196 MF = &mf; 197 MLI = &getAnalysis<MachineLoopInfo>(); 198 MDT = &getAnalysis<MachineDominatorTree>(); 199 PassConfig = &getAnalysis<TargetPassConfig>(); 200 AA = &getAnalysis<AliasAnalysis>(); 201 202 LIS = &getAnalysis<LiveIntervals>(); 203 const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); 204 205 if (VerifyScheduling) { 206 DEBUG(LIS->print(dbgs())); 207 MF->verify(this, "Before machine scheduling."); 208 } 209 RegClassInfo->runOnMachineFunction(*MF); 210 211 // Select the scheduler, or set the default. 212 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 213 if (Ctor == useDefaultMachineSched) { 214 // Get the default scheduler set by the target. 215 Ctor = MachineSchedRegistry::getDefault(); 216 if (!Ctor) { 217 Ctor = createConvergingSched; 218 MachineSchedRegistry::setDefault(Ctor); 219 } 220 } 221 // Instantiate the selected scheduler. 222 OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); 223 224 // Visit all machine basic blocks. 225 // 226 // TODO: Visit blocks in global postorder or postorder within the bottom-up 227 // loop tree. Then we can optionally compute global RegPressure. 228 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 229 MBB != MBBEnd; ++MBB) { 230 231 Scheduler->startBlock(MBB); 232 233 // Break the block into scheduling regions [I, RegionEnd), and schedule each 234 // region as soon as it is discovered. RegionEnd points the scheduling 235 // boundary at the bottom of the region. The DAG does not include RegionEnd, 236 // but the region does (i.e. the next RegionEnd is above the previous 237 // RegionBegin). If the current block has no terminator then RegionEnd == 238 // MBB->end() for the bottom region. 239 // 240 // The Scheduler may insert instructions during either schedule() or 241 // exitRegion(), even for empty regions. So the local iterators 'I' and 242 // 'RegionEnd' are invalid across these calls. 243 unsigned RemainingInstrs = MBB->size(); 244 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 245 RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { 246 247 // Avoid decrementing RegionEnd for blocks with no terminator. 248 if (RegionEnd != MBB->end() 249 || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { 250 --RegionEnd; 251 // Count the boundary instruction. 252 --RemainingInstrs; 253 } 254 255 // The next region starts above the previous region. Look backward in the 256 // instruction stream until we find the nearest boundary. 257 MachineBasicBlock::iterator I = RegionEnd; 258 for(;I != MBB->begin(); --I, --RemainingInstrs) { 259 if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) 260 break; 261 } 262 // Notify the scheduler of the region, even if we may skip scheduling 263 // it. Perhaps it still needs to be bundled. 264 Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); 265 266 // Skip empty scheduling regions (0 or 1 schedulable instructions). 267 if (I == RegionEnd || I == llvm::prior(RegionEnd)) { 268 // Close the current region. Bundle the terminator if needed. 269 // This invalidates 'RegionEnd' and 'I'. 270 Scheduler->exitRegion(); 271 continue; 272 } 273 DEBUG(dbgs() << "********** MI Scheduling **********\n"); 274 DEBUG(dbgs() << MF->getName() 275 << ":BB#" << MBB->getNumber() << " " << MBB->getName() 276 << "\n From: " << *I << " To: "; 277 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 278 else dbgs() << "End"; 279 dbgs() << " Remaining: " << RemainingInstrs << "\n"); 280 281 // Schedule a region: possibly reorder instructions. 282 // This invalidates 'RegionEnd' and 'I'. 283 Scheduler->schedule(); 284 285 // Close the current region. 286 Scheduler->exitRegion(); 287 288 // Scheduling has invalidated the current iterator 'I'. Ask the 289 // scheduler for the top of it's scheduled region. 290 RegionEnd = Scheduler->begin(); 291 } 292 assert(RemainingInstrs == 0 && "Instruction count mismatch!"); 293 Scheduler->finishBlock(); 294 } 295 Scheduler->finalizeSchedule(); 296 DEBUG(LIS->print(dbgs())); 297 if (VerifyScheduling) 298 MF->verify(this, "After machine scheduling."); 299 return true; 300 } 301 302 void MachineScheduler::print(raw_ostream &O, const Module* m) const { 303 // unimplemented 304 } 305 306 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 307 void ReadyQueue::dump() { 308 dbgs() << " " << Name << ": "; 309 for (unsigned i = 0, e = Queue.size(); i < e; ++i) 310 dbgs() << Queue[i]->NodeNum << " "; 311 dbgs() << "\n"; 312 } 313 #endif 314 315 //===----------------------------------------------------------------------===// 316 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals 317 // preservation. 318 //===----------------------------------------------------------------------===// 319 320 ScheduleDAGMI::~ScheduleDAGMI() { 321 delete DFSResult; 322 DeleteContainerPointers(Mutations); 323 delete SchedImpl; 324 } 325 326 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { 327 if (SuccSU != &ExitSU) { 328 // Do not use WillCreateCycle, it assumes SD scheduling. 329 // If Pred is reachable from Succ, then the edge creates a cycle. 330 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) 331 return false; 332 Topo.AddPred(SuccSU, PredDep.getSUnit()); 333 } 334 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); 335 // Return true regardless of whether a new edge needed to be inserted. 336 return true; 337 } 338 339 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 340 /// NumPredsLeft reaches zero, release the successor node. 341 /// 342 /// FIXME: Adjust SuccSU height based on MinLatency. 343 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 344 SUnit *SuccSU = SuccEdge->getSUnit(); 345 346 if (SuccEdge->isWeak()) { 347 --SuccSU->WeakPredsLeft; 348 if (SuccEdge->isCluster()) 349 NextClusterSucc = SuccSU; 350 return; 351 } 352 #ifndef NDEBUG 353 if (SuccSU->NumPredsLeft == 0) { 354 dbgs() << "*** Scheduling failed! ***\n"; 355 SuccSU->dump(this); 356 dbgs() << " has been released too many times!\n"; 357 llvm_unreachable(0); 358 } 359 #endif 360 --SuccSU->NumPredsLeft; 361 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 362 SchedImpl->releaseTopNode(SuccSU); 363 } 364 365 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 366 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 367 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 368 I != E; ++I) { 369 releaseSucc(SU, &*I); 370 } 371 } 372 373 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 374 /// NumSuccsLeft reaches zero, release the predecessor node. 375 /// 376 /// FIXME: Adjust PredSU height based on MinLatency. 377 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 378 SUnit *PredSU = PredEdge->getSUnit(); 379 380 if (PredEdge->isWeak()) { 381 --PredSU->WeakSuccsLeft; 382 if (PredEdge->isCluster()) 383 NextClusterPred = PredSU; 384 return; 385 } 386 #ifndef NDEBUG 387 if (PredSU->NumSuccsLeft == 0) { 388 dbgs() << "*** Scheduling failed! ***\n"; 389 PredSU->dump(this); 390 dbgs() << " has been released too many times!\n"; 391 llvm_unreachable(0); 392 } 393 #endif 394 --PredSU->NumSuccsLeft; 395 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 396 SchedImpl->releaseBottomNode(PredSU); 397 } 398 399 /// releasePredecessors - Call releasePred on each of SU's predecessors. 400 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 401 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 402 I != E; ++I) { 403 releasePred(SU, &*I); 404 } 405 } 406 407 /// This is normally called from the main scheduler loop but may also be invoked 408 /// by the scheduling strategy to perform additional code motion. 409 void ScheduleDAGMI::moveInstruction(MachineInstr *MI, 410 MachineBasicBlock::iterator InsertPos) { 411 // Advance RegionBegin if the first instruction moves down. 412 if (&*RegionBegin == MI) 413 ++RegionBegin; 414 415 // Update the instruction stream. 416 BB->splice(InsertPos, BB, MI); 417 418 // Update LiveIntervals 419 LIS->handleMove(MI, /*UpdateFlags=*/true); 420 421 // Recede RegionBegin if an instruction moves above the first. 422 if (RegionBegin == InsertPos) 423 RegionBegin = MI; 424 } 425 426 bool ScheduleDAGMI::checkSchedLimit() { 427 #ifndef NDEBUG 428 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 429 CurrentTop = CurrentBottom; 430 return false; 431 } 432 ++NumInstrsScheduled; 433 #endif 434 return true; 435 } 436 437 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 438 /// crossing a scheduling boundary. [begin, end) includes all instructions in 439 /// the region, including the boundary itself and single-instruction regions 440 /// that don't get scheduled. 441 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 442 MachineBasicBlock::iterator begin, 443 MachineBasicBlock::iterator end, 444 unsigned endcount) 445 { 446 ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); 447 448 // For convenience remember the end of the liveness region. 449 LiveRegionEnd = 450 (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); 451 } 452 453 // Setup the register pressure trackers for the top scheduled top and bottom 454 // scheduled regions. 455 void ScheduleDAGMI::initRegPressure() { 456 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); 457 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 458 459 // Close the RPTracker to finalize live ins. 460 RPTracker.closeRegion(); 461 462 DEBUG(RPTracker.getPressure().dump(TRI)); 463 464 // Initialize the live ins and live outs. 465 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 466 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 467 468 // Close one end of the tracker so we can call 469 // getMaxUpward/DownwardPressureDelta before advancing across any 470 // instructions. This converts currently live regs into live ins/outs. 471 TopRPTracker.closeTop(); 472 BotRPTracker.closeBottom(); 473 474 // Account for liveness generated by the region boundary. 475 if (LiveRegionEnd != RegionEnd) 476 BotRPTracker.recede(); 477 478 assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); 479 480 // Cache the list of excess pressure sets in this region. This will also track 481 // the max pressure in the scheduled code for these sets. 482 RegionCriticalPSets.clear(); 483 const std::vector<unsigned> &RegionPressure = 484 RPTracker.getPressure().MaxSetPressure; 485 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 486 unsigned Limit = TRI->getRegPressureSetLimit(i); 487 DEBUG(dbgs() << TRI->getRegPressureSetName(i) 488 << "Limit " << Limit 489 << " Actual " << RegionPressure[i] << "\n"); 490 if (RegionPressure[i] > Limit) 491 RegionCriticalPSets.push_back(PressureElement(i, 0)); 492 } 493 DEBUG(dbgs() << "Excess PSets: "; 494 for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) 495 dbgs() << TRI->getRegPressureSetName( 496 RegionCriticalPSets[i].PSetID) << " "; 497 dbgs() << "\n"); 498 } 499 500 // FIXME: When the pressure tracker deals in pressure differences then we won't 501 // iterate over all RegionCriticalPSets[i]. 502 void ScheduleDAGMI:: 503 updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) { 504 for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { 505 unsigned ID = RegionCriticalPSets[i].PSetID; 506 int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; 507 if ((int)NewMaxPressure[ID] > MaxUnits) 508 MaxUnits = NewMaxPressure[ID]; 509 } 510 DEBUG( 511 for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) { 512 unsigned Limit = TRI->getRegPressureSetLimit(i); 513 if (NewMaxPressure[i] > Limit ) { 514 dbgs() << " " << TRI->getRegPressureSetName(i) << ": " 515 << NewMaxPressure[i] << " > " << Limit << "\n"; 516 } 517 }); 518 } 519 520 /// schedule - Called back from MachineScheduler::runOnMachineFunction 521 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 522 /// only includes instructions that have DAG nodes, not scheduling boundaries. 523 /// 524 /// This is a skeletal driver, with all the functionality pushed into helpers, 525 /// so that it can be easilly extended by experimental schedulers. Generally, 526 /// implementing MachineSchedStrategy should be sufficient to implement a new 527 /// scheduling algorithm. However, if a scheduler further subclasses 528 /// ScheduleDAGMI then it will want to override this virtual method in order to 529 /// update any specialized state. 530 void ScheduleDAGMI::schedule() { 531 buildDAGWithRegPressure(); 532 533 Topo.InitDAGTopologicalSorting(); 534 535 postprocessDAG(); 536 537 SmallVector<SUnit*, 8> TopRoots, BotRoots; 538 findRootsAndBiasEdges(TopRoots, BotRoots); 539 540 // Initialize the strategy before modifying the DAG. 541 // This may initialize a DFSResult to be used for queue priority. 542 SchedImpl->initialize(this); 543 544 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 545 SUnits[su].dumpAll(this)); 546 if (ViewMISchedDAGs) viewGraph(); 547 548 // Initialize ready queues now that the DAG and priority data are finalized. 549 initQueues(TopRoots, BotRoots); 550 551 bool IsTopNode = false; 552 while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { 553 assert(!SU->isScheduled && "Node already scheduled"); 554 if (!checkSchedLimit()) 555 break; 556 557 scheduleMI(SU, IsTopNode); 558 559 updateQueues(SU, IsTopNode); 560 } 561 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 562 563 placeDebugValues(); 564 565 DEBUG({ 566 unsigned BBNum = begin()->getParent()->getNumber(); 567 dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; 568 dumpSchedule(); 569 dbgs() << '\n'; 570 }); 571 } 572 573 /// Build the DAG and setup three register pressure trackers. 574 void ScheduleDAGMI::buildDAGWithRegPressure() { 575 // Initialize the register pressure tracker used by buildSchedGraph. 576 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); 577 578 // Account for liveness generate by the region boundary. 579 if (LiveRegionEnd != RegionEnd) 580 RPTracker.recede(); 581 582 // Build the DAG, and compute current register pressure. 583 buildSchedGraph(AA, &RPTracker); 584 585 // Initialize top/bottom trackers after computing region pressure. 586 initRegPressure(); 587 } 588 589 /// Apply each ScheduleDAGMutation step in order. 590 void ScheduleDAGMI::postprocessDAG() { 591 for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { 592 Mutations[i]->apply(this); 593 } 594 } 595 596 void ScheduleDAGMI::computeDFSResult() { 597 if (!DFSResult) 598 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); 599 DFSResult->clear(); 600 ScheduledTrees.clear(); 601 DFSResult->resize(SUnits.size()); 602 DFSResult->compute(SUnits); 603 ScheduledTrees.resize(DFSResult->getNumSubtrees()); 604 } 605 606 void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 607 SmallVectorImpl<SUnit*> &BotRoots) { 608 for (std::vector<SUnit>::iterator 609 I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { 610 SUnit *SU = &(*I); 611 assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); 612 613 // Order predecessors so DFSResult follows the critical path. 614 SU->biasCriticalPath(); 615 616 // A SUnit is ready to top schedule if it has no predecessors. 617 if (!I->NumPredsLeft) 618 TopRoots.push_back(SU); 619 // A SUnit is ready to bottom schedule if it has no successors. 620 if (!I->NumSuccsLeft) 621 BotRoots.push_back(SU); 622 } 623 ExitSU.biasCriticalPath(); 624 } 625 626 /// Identify DAG roots and setup scheduler queues. 627 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, 628 ArrayRef<SUnit*> BotRoots) { 629 NextClusterSucc = NULL; 630 NextClusterPred = NULL; 631 632 // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 633 // 634 // Nodes with unreleased weak edges can still be roots. 635 // Release top roots in forward order. 636 for (SmallVectorImpl<SUnit*>::const_iterator 637 I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { 638 SchedImpl->releaseTopNode(*I); 639 } 640 // Release bottom roots in reverse order so the higher priority nodes appear 641 // first. This is more natural and slightly more efficient. 642 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 643 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { 644 SchedImpl->releaseBottomNode(*I); 645 } 646 647 releaseSuccessors(&EntrySU); 648 releasePredecessors(&ExitSU); 649 650 SchedImpl->registerRoots(); 651 652 // Advance past initial DebugValues. 653 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); 654 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 655 TopRPTracker.setPos(CurrentTop); 656 657 CurrentBottom = RegionEnd; 658 } 659 660 /// Move an instruction and update register pressure. 661 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { 662 // Move the instruction to its new location in the instruction stream. 663 MachineInstr *MI = SU->getInstr(); 664 665 if (IsTopNode) { 666 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 667 if (&*CurrentTop == MI) 668 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 669 else { 670 moveInstruction(MI, CurrentTop); 671 TopRPTracker.setPos(MI); 672 } 673 674 // Update top scheduled pressure. 675 TopRPTracker.advance(); 676 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 677 updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); 678 } 679 else { 680 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 681 MachineBasicBlock::iterator priorII = 682 priorNonDebug(CurrentBottom, CurrentTop); 683 if (&*priorII == MI) 684 CurrentBottom = priorII; 685 else { 686 if (&*CurrentTop == MI) { 687 CurrentTop = nextIfDebug(++CurrentTop, priorII); 688 TopRPTracker.setPos(CurrentTop); 689 } 690 moveInstruction(MI, CurrentBottom); 691 CurrentBottom = MI; 692 } 693 // Update bottom scheduled pressure. 694 BotRPTracker.recede(); 695 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 696 updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); 697 } 698 } 699 700 /// Update scheduler queues after scheduling an instruction. 701 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 702 // Release dependent instructions for scheduling. 703 if (IsTopNode) 704 releaseSuccessors(SU); 705 else 706 releasePredecessors(SU); 707 708 SU->isScheduled = true; 709 710 if (DFSResult) { 711 unsigned SubtreeID = DFSResult->getSubtreeID(SU); 712 if (!ScheduledTrees.test(SubtreeID)) { 713 ScheduledTrees.set(SubtreeID); 714 DFSResult->scheduleTree(SubtreeID); 715 SchedImpl->scheduleTree(SubtreeID); 716 } 717 } 718 719 // Notify the scheduling strategy after updating the DAG. 720 SchedImpl->schedNode(SU, IsTopNode); 721 } 722 723 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 724 void ScheduleDAGMI::placeDebugValues() { 725 // If first instruction was a DBG_VALUE then put it back. 726 if (FirstDbgValue) { 727 BB->splice(RegionBegin, BB, FirstDbgValue); 728 RegionBegin = FirstDbgValue; 729 } 730 731 for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator 732 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 733 std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); 734 MachineInstr *DbgValue = P.first; 735 MachineBasicBlock::iterator OrigPrevMI = P.second; 736 if (&*RegionBegin == DbgValue) 737 ++RegionBegin; 738 BB->splice(++OrigPrevMI, BB, DbgValue); 739 if (OrigPrevMI == llvm::prior(RegionEnd)) 740 RegionEnd = DbgValue; 741 } 742 DbgValues.clear(); 743 FirstDbgValue = NULL; 744 } 745 746 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 747 void ScheduleDAGMI::dumpSchedule() const { 748 for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { 749 if (SUnit *SU = getSUnit(&(*MI))) 750 SU->dump(this); 751 else 752 dbgs() << "Missing SUnit\n"; 753 } 754 } 755 #endif 756 757 //===----------------------------------------------------------------------===// 758 // LoadClusterMutation - DAG post-processing to cluster loads. 759 //===----------------------------------------------------------------------===// 760 761 namespace { 762 /// \brief Post-process the DAG to create cluster edges between neighboring 763 /// loads. 764 class LoadClusterMutation : public ScheduleDAGMutation { 765 struct LoadInfo { 766 SUnit *SU; 767 unsigned BaseReg; 768 unsigned Offset; 769 LoadInfo(SUnit *su, unsigned reg, unsigned ofs) 770 : SU(su), BaseReg(reg), Offset(ofs) {} 771 }; 772 static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, 773 const LoadClusterMutation::LoadInfo &RHS); 774 775 const TargetInstrInfo *TII; 776 const TargetRegisterInfo *TRI; 777 public: 778 LoadClusterMutation(const TargetInstrInfo *tii, 779 const TargetRegisterInfo *tri) 780 : TII(tii), TRI(tri) {} 781 782 virtual void apply(ScheduleDAGMI *DAG); 783 protected: 784 void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); 785 }; 786 } // anonymous 787 788 bool LoadClusterMutation::LoadInfoLess( 789 const LoadClusterMutation::LoadInfo &LHS, 790 const LoadClusterMutation::LoadInfo &RHS) { 791 if (LHS.BaseReg != RHS.BaseReg) 792 return LHS.BaseReg < RHS.BaseReg; 793 return LHS.Offset < RHS.Offset; 794 } 795 796 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, 797 ScheduleDAGMI *DAG) { 798 SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; 799 for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { 800 SUnit *SU = Loads[Idx]; 801 unsigned BaseReg; 802 unsigned Offset; 803 if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) 804 LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); 805 } 806 if (LoadRecords.size() < 2) 807 return; 808 std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); 809 unsigned ClusterLength = 1; 810 for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { 811 if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { 812 ClusterLength = 1; 813 continue; 814 } 815 816 SUnit *SUa = LoadRecords[Idx].SU; 817 SUnit *SUb = LoadRecords[Idx+1].SU; 818 if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) 819 && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { 820 821 DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" 822 << SUb->NodeNum << ")\n"); 823 // Copy successor edges from SUa to SUb. Interleaving computation 824 // dependent on SUa can prevent load combining due to register reuse. 825 // Predecessor edges do not need to be copied from SUb to SUa since nearby 826 // loads should have effectively the same inputs. 827 for (SUnit::const_succ_iterator 828 SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { 829 if (SI->getSUnit() == SUb) 830 continue; 831 DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); 832 DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); 833 } 834 ++ClusterLength; 835 } 836 else 837 ClusterLength = 1; 838 } 839 } 840 841 /// \brief Callback from DAG postProcessing to create cluster edges for loads. 842 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { 843 // Map DAG NodeNum to store chain ID. 844 DenseMap<unsigned, unsigned> StoreChainIDs; 845 // Map each store chain to a set of dependent loads. 846 SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; 847 for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { 848 SUnit *SU = &DAG->SUnits[Idx]; 849 if (!SU->getInstr()->mayLoad()) 850 continue; 851 unsigned ChainPredID = DAG->SUnits.size(); 852 for (SUnit::const_pred_iterator 853 PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 854 if (PI->isCtrl()) { 855 ChainPredID = PI->getSUnit()->NodeNum; 856 break; 857 } 858 } 859 // Check if this chain-like pred has been seen 860 // before. ChainPredID==MaxNodeID for loads at the top of the schedule. 861 unsigned NumChains = StoreChainDependents.size(); 862 std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = 863 StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); 864 if (Result.second) 865 StoreChainDependents.resize(NumChains + 1); 866 StoreChainDependents[Result.first->second].push_back(SU); 867 } 868 // Iterate over the store chains. 869 for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) 870 clusterNeighboringLoads(StoreChainDependents[Idx], DAG); 871 } 872 873 //===----------------------------------------------------------------------===// 874 // MacroFusion - DAG post-processing to encourage fusion of macro ops. 875 //===----------------------------------------------------------------------===// 876 877 namespace { 878 /// \brief Post-process the DAG to create cluster edges between instructions 879 /// that may be fused by the processor into a single operation. 880 class MacroFusion : public ScheduleDAGMutation { 881 const TargetInstrInfo *TII; 882 public: 883 MacroFusion(const TargetInstrInfo *tii): TII(tii) {} 884 885 virtual void apply(ScheduleDAGMI *DAG); 886 }; 887 } // anonymous 888 889 /// \brief Callback from DAG postProcessing to create cluster edges to encourage 890 /// fused operations. 891 void MacroFusion::apply(ScheduleDAGMI *DAG) { 892 // For now, assume targets can only fuse with the branch. 893 MachineInstr *Branch = DAG->ExitSU.getInstr(); 894 if (!Branch) 895 return; 896 897 for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { 898 SUnit *SU = &DAG->SUnits[--Idx]; 899 if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) 900 continue; 901 902 // Create a single weak edge from SU to ExitSU. The only effect is to cause 903 // bottom-up scheduling to heavily prioritize the clustered SU. There is no 904 // need to copy predecessor edges from ExitSU to SU, since top-down 905 // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling 906 // of SU, we could create an artificial edge from the deepest root, but it 907 // hasn't been needed yet. 908 bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); 909 (void)Success; 910 assert(Success && "No DAG nodes should be reachable from ExitSU"); 911 912 DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); 913 break; 914 } 915 } 916 917 //===----------------------------------------------------------------------===// 918 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. 919 //===----------------------------------------------------------------------===// 920 921 namespace { 922 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance 923 /// the schedule. 924 class ConvergingScheduler : public MachineSchedStrategy { 925 public: 926 /// Represent the type of SchedCandidate found within a single queue. 927 /// pickNodeBidirectional depends on these listed by decreasing priority. 928 enum CandReason { 929 NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, 930 ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 931 TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, 932 NodeOrder}; 933 934 #ifndef NDEBUG 935 static const char *getReasonStr(ConvergingScheduler::CandReason Reason); 936 #endif 937 938 /// Policy for scheduling the next instruction in the candidate's zone. 939 struct CandPolicy { 940 bool ReduceLatency; 941 unsigned ReduceResIdx; 942 unsigned DemandResIdx; 943 944 CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} 945 }; 946 947 /// Status of an instruction's critical resource consumption. 948 struct SchedResourceDelta { 949 // Count critical resources in the scheduled region required by SU. 950 unsigned CritResources; 951 952 // Count critical resources from another region consumed by SU. 953 unsigned DemandedResources; 954 955 SchedResourceDelta(): CritResources(0), DemandedResources(0) {} 956 957 bool operator==(const SchedResourceDelta &RHS) const { 958 return CritResources == RHS.CritResources 959 && DemandedResources == RHS.DemandedResources; 960 } 961 bool operator!=(const SchedResourceDelta &RHS) const { 962 return !operator==(RHS); 963 } 964 }; 965 966 /// Store the state used by ConvergingScheduler heuristics, required for the 967 /// lifetime of one invocation of pickNode(). 968 struct SchedCandidate { 969 CandPolicy Policy; 970 971 // The best SUnit candidate. 972 SUnit *SU; 973 974 // The reason for this candidate. 975 CandReason Reason; 976 977 // Register pressure values for the best candidate. 978 RegPressureDelta RPDelta; 979 980 // Critical resource consumption of the best candidate. 981 SchedResourceDelta ResDelta; 982 983 SchedCandidate(const CandPolicy &policy) 984 : Policy(policy), SU(NULL), Reason(NoCand) {} 985 986 bool isValid() const { return SU; } 987 988 // Copy the status of another candidate without changing policy. 989 void setBest(SchedCandidate &Best) { 990 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 991 SU = Best.SU; 992 Reason = Best.Reason; 993 RPDelta = Best.RPDelta; 994 ResDelta = Best.ResDelta; 995 } 996 997 void initResourceDelta(const ScheduleDAGMI *DAG, 998 const TargetSchedModel *SchedModel); 999 }; 1000 1001 /// Summarize the unscheduled region. 1002 struct SchedRemainder { 1003 // Critical path through the DAG in expected latency. 1004 unsigned CriticalPath; 1005 1006 // Unscheduled resources 1007 SmallVector<unsigned, 16> RemainingCounts; 1008 // Critical resource for the unscheduled zone. 1009 unsigned CritResIdx; 1010 // Number of micro-ops left to schedule. 1011 unsigned RemainingMicroOps; 1012 1013 void reset() { 1014 CriticalPath = 0; 1015 RemainingCounts.clear(); 1016 CritResIdx = 0; 1017 RemainingMicroOps = 0; 1018 } 1019 1020 SchedRemainder() { reset(); } 1021 1022 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 1023 1024 unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const { 1025 if (!SchedModel->hasInstrSchedModel()) 1026 return 0; 1027 1028 return std::max( 1029 RemainingMicroOps * SchedModel->getMicroOpFactor(), 1030 RemainingCounts[CritResIdx]); 1031 } 1032 }; 1033 1034 /// Each Scheduling boundary is associated with ready queues. It tracks the 1035 /// current cycle in the direction of movement, and maintains the state 1036 /// of "hazards" and other interlocks at the current cycle. 1037 struct SchedBoundary { 1038 ScheduleDAGMI *DAG; 1039 const TargetSchedModel *SchedModel; 1040 SchedRemainder *Rem; 1041 1042 ReadyQueue Available; 1043 ReadyQueue Pending; 1044 bool CheckPending; 1045 1046 // For heuristics, keep a list of the nodes that immediately depend on the 1047 // most recently scheduled node. 1048 SmallPtrSet<const SUnit*, 8> NextSUs; 1049 1050 ScheduleHazardRecognizer *HazardRec; 1051 1052 unsigned CurrCycle; 1053 unsigned IssueCount; 1054 1055 /// MinReadyCycle - Cycle of the soonest available instruction. 1056 unsigned MinReadyCycle; 1057 1058 // The expected latency of the critical path in this scheduled zone. 1059 unsigned ExpectedLatency; 1060 1061 // Resources used in the scheduled zone beyond this boundary. 1062 SmallVector<unsigned, 16> ResourceCounts; 1063 1064 // Cache the critical resources ID in this scheduled zone. 1065 unsigned CritResIdx; 1066 1067 // Is the scheduled region resource limited vs. latency limited. 1068 bool IsResourceLimited; 1069 1070 unsigned ExpectedCount; 1071 1072 #ifndef NDEBUG 1073 // Remember the greatest min operand latency. 1074 unsigned MaxMinLatency; 1075 #endif 1076 1077 void reset() { 1078 // A new HazardRec is created for each DAG and owned by SchedBoundary. 1079 delete HazardRec; 1080 1081 Available.clear(); 1082 Pending.clear(); 1083 CheckPending = false; 1084 NextSUs.clear(); 1085 HazardRec = 0; 1086 CurrCycle = 0; 1087 IssueCount = 0; 1088 MinReadyCycle = UINT_MAX; 1089 ExpectedLatency = 0; 1090 ResourceCounts.resize(1); 1091 assert(!ResourceCounts[0] && "nonzero count for bad resource"); 1092 CritResIdx = 0; 1093 IsResourceLimited = false; 1094 ExpectedCount = 0; 1095 #ifndef NDEBUG 1096 MaxMinLatency = 0; 1097 #endif 1098 // Reserve a zero-count for invalid CritResIdx. 1099 ResourceCounts.resize(1); 1100 } 1101 1102 /// Pending queues extend the ready queues with the same ID and the 1103 /// PendingFlag set. 1104 SchedBoundary(unsigned ID, const Twine &Name): 1105 DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), 1106 Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), 1107 HazardRec(0) { 1108 reset(); 1109 } 1110 1111 ~SchedBoundary() { delete HazardRec; } 1112 1113 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 1114 SchedRemainder *rem); 1115 1116 bool isTop() const { 1117 return Available.getID() == ConvergingScheduler::TopQID; 1118 } 1119 1120 unsigned getUnscheduledLatency(SUnit *SU) const { 1121 if (isTop()) 1122 return SU->getHeight(); 1123 return SU->getDepth() + SU->Latency; 1124 } 1125 1126 unsigned getCriticalCount() const { 1127 return ResourceCounts[CritResIdx]; 1128 } 1129 1130 bool checkHazard(SUnit *SU); 1131 1132 void setLatencyPolicy(CandPolicy &Policy); 1133 1134 void releaseNode(SUnit *SU, unsigned ReadyCycle); 1135 1136 void bumpCycle(); 1137 1138 void countResource(unsigned PIdx, unsigned Cycles); 1139 1140 void bumpNode(SUnit *SU); 1141 1142 void releasePending(); 1143 1144 void removeReady(SUnit *SU); 1145 1146 SUnit *pickOnlyChoice(); 1147 }; 1148 1149 private: 1150 ScheduleDAGMI *DAG; 1151 const TargetSchedModel *SchedModel; 1152 const TargetRegisterInfo *TRI; 1153 1154 // State of the top and bottom scheduled instruction boundaries. 1155 SchedRemainder Rem; 1156 SchedBoundary Top; 1157 SchedBoundary Bot; 1158 1159 public: 1160 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 1161 enum { 1162 TopQID = 1, 1163 BotQID = 2, 1164 LogMaxQID = 2 1165 }; 1166 1167 ConvergingScheduler(): 1168 DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} 1169 1170 virtual void initialize(ScheduleDAGMI *dag); 1171 1172 virtual SUnit *pickNode(bool &IsTopNode); 1173 1174 virtual void schedNode(SUnit *SU, bool IsTopNode); 1175 1176 virtual void releaseTopNode(SUnit *SU); 1177 1178 virtual void releaseBottomNode(SUnit *SU); 1179 1180 virtual void registerRoots(); 1181 1182 protected: 1183 void balanceZones( 1184 ConvergingScheduler::SchedBoundary &CriticalZone, 1185 ConvergingScheduler::SchedCandidate &CriticalCand, 1186 ConvergingScheduler::SchedBoundary &OppositeZone, 1187 ConvergingScheduler::SchedCandidate &OppositeCand); 1188 1189 void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, 1190 ConvergingScheduler::SchedCandidate &BotCand); 1191 1192 void tryCandidate(SchedCandidate &Cand, 1193 SchedCandidate &TryCand, 1194 SchedBoundary &Zone, 1195 const RegPressureTracker &RPTracker, 1196 RegPressureTracker &TempTracker); 1197 1198 SUnit *pickNodeBidirectional(bool &IsTopNode); 1199 1200 void pickNodeFromQueue(SchedBoundary &Zone, 1201 const RegPressureTracker &RPTracker, 1202 SchedCandidate &Candidate); 1203 1204 void reschedulePhysRegCopies(SUnit *SU, bool isTop); 1205 1206 #ifndef NDEBUG 1207 void traceCandidate(const SchedCandidate &Cand); 1208 #endif 1209 }; 1210 } // namespace 1211 1212 void ConvergingScheduler::SchedRemainder:: 1213 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 1214 reset(); 1215 if (!SchedModel->hasInstrSchedModel()) 1216 return; 1217 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 1218 for (std::vector<SUnit>::iterator 1219 I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { 1220 const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); 1221 RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); 1222 for (TargetSchedModel::ProcResIter 1223 PI = SchedModel->getWriteProcResBegin(SC), 1224 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1225 unsigned PIdx = PI->ProcResourceIdx; 1226 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1227 RemainingCounts[PIdx] += (Factor * PI->Cycles); 1228 } 1229 } 1230 for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds(); 1231 PIdx != PEnd; ++PIdx) { 1232 if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx]) 1233 >= (int)SchedModel->getLatencyFactor()) { 1234 CritResIdx = PIdx; 1235 } 1236 } 1237 } 1238 1239 void ConvergingScheduler::SchedBoundary:: 1240 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 1241 reset(); 1242 DAG = dag; 1243 SchedModel = smodel; 1244 Rem = rem; 1245 if (SchedModel->hasInstrSchedModel()) 1246 ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); 1247 } 1248 1249 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { 1250 DAG = dag; 1251 SchedModel = DAG->getSchedModel(); 1252 TRI = DAG->TRI; 1253 1254 Rem.init(DAG, SchedModel); 1255 Top.init(DAG, SchedModel, &Rem); 1256 Bot.init(DAG, SchedModel, &Rem); 1257 1258 // Initialize resource counts. 1259 1260 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 1261 // are disabled, then these HazardRecs will be disabled. 1262 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 1263 const TargetMachine &TM = DAG->MF.getTarget(); 1264 Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1265 Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); 1266 1267 assert((!ForceTopDown || !ForceBottomUp) && 1268 "-misched-topdown incompatible with -misched-bottomup"); 1269 } 1270 1271 void ConvergingScheduler::releaseTopNode(SUnit *SU) { 1272 if (SU->isScheduled) 1273 return; 1274 1275 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1276 I != E; ++I) { 1277 unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; 1278 unsigned MinLatency = I->getMinLatency(); 1279 #ifndef NDEBUG 1280 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 1281 #endif 1282 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 1283 SU->TopReadyCycle = PredReadyCycle + MinLatency; 1284 } 1285 Top.releaseNode(SU, SU->TopReadyCycle); 1286 } 1287 1288 void ConvergingScheduler::releaseBottomNode(SUnit *SU) { 1289 if (SU->isScheduled) 1290 return; 1291 1292 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 1293 1294 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1295 I != E; ++I) { 1296 if (I->isWeak()) 1297 continue; 1298 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 1299 unsigned MinLatency = I->getMinLatency(); 1300 #ifndef NDEBUG 1301 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 1302 #endif 1303 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 1304 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 1305 } 1306 Bot.releaseNode(SU, SU->BotReadyCycle); 1307 } 1308 1309 void ConvergingScheduler::registerRoots() { 1310 Rem.CriticalPath = DAG->ExitSU.getDepth(); 1311 // Some roots may not feed into ExitSU. Check all of them in case. 1312 for (std::vector<SUnit*>::const_iterator 1313 I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { 1314 if ((*I)->getDepth() > Rem.CriticalPath) 1315 Rem.CriticalPath = (*I)->getDepth(); 1316 } 1317 DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); 1318 } 1319 1320 /// Does this SU have a hazard within the current instruction group. 1321 /// 1322 /// The scheduler supports two modes of hazard recognition. The first is the 1323 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 1324 /// supports highly complicated in-order reservation tables 1325 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. 1326 /// 1327 /// The second is a streamlined mechanism that checks for hazards based on 1328 /// simple counters that the scheduler itself maintains. It explicitly checks 1329 /// for instruction dispatch limitations, including the number of micro-ops that 1330 /// can dispatch per cycle. 1331 /// 1332 /// TODO: Also check whether the SU must start a new group. 1333 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { 1334 if (HazardRec->isEnabled()) 1335 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 1336 1337 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 1338 if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { 1339 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 1340 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 1341 return true; 1342 } 1343 return false; 1344 } 1345 1346 /// Compute the remaining latency to determine whether ILP should be increased. 1347 void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { 1348 // FIXME: compile time. In all, we visit four queues here one we should only 1349 // need to visit the one that was last popped if we cache the result. 1350 unsigned RemLatency = 0; 1351 for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); 1352 I != E; ++I) { 1353 unsigned L = getUnscheduledLatency(*I); 1354 DEBUG(dbgs() << " " << Available.getName() 1355 << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n'); 1356 if (L > RemLatency) 1357 RemLatency = L; 1358 } 1359 for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end(); 1360 I != E; ++I) { 1361 unsigned L = getUnscheduledLatency(*I); 1362 if (L > RemLatency) 1363 RemLatency = L; 1364 } 1365 unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); 1366 DEBUG(dbgs() << " " << Available.getName() 1367 << " ExpectedLatency " << ExpectedLatency 1368 << " CP Limit " << CriticalPathLimit << '\n'); 1369 if (RemLatency + ExpectedLatency >= CriticalPathLimit 1370 && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { 1371 Policy.ReduceLatency = true; 1372 DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n'); 1373 } 1374 } 1375 1376 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, 1377 unsigned ReadyCycle) { 1378 1379 if (ReadyCycle < MinReadyCycle) 1380 MinReadyCycle = ReadyCycle; 1381 1382 // Check for interlocks first. For the purpose of other heuristics, an 1383 // instruction that cannot issue appears as if it's not in the ReadyQueue. 1384 if (ReadyCycle > CurrCycle || checkHazard(SU)) 1385 Pending.push(SU); 1386 else 1387 Available.push(SU); 1388 1389 // Record this node as an immediate dependent of the scheduled node. 1390 NextSUs.insert(SU); 1391 } 1392 1393 /// Move the boundary of scheduled code by one cycle. 1394 void ConvergingScheduler::SchedBoundary::bumpCycle() { 1395 unsigned Width = SchedModel->getIssueWidth(); 1396 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 1397 1398 unsigned NextCycle = CurrCycle + 1; 1399 assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); 1400 if (MinReadyCycle > NextCycle) { 1401 IssueCount = 0; 1402 NextCycle = MinReadyCycle; 1403 } 1404 1405 if (!HazardRec->isEnabled()) { 1406 // Bypass HazardRec virtual calls. 1407 CurrCycle = NextCycle; 1408 } 1409 else { 1410 // Bypass getHazardType calls in case of long latency. 1411 for (; CurrCycle != NextCycle; ++CurrCycle) { 1412 if (isTop()) 1413 HazardRec->AdvanceCycle(); 1414 else 1415 HazardRec->RecedeCycle(); 1416 } 1417 } 1418 CheckPending = true; 1419 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1420 1421 DEBUG(dbgs() << " " << Available.getName() 1422 << " Cycle: " << CurrCycle << '\n'); 1423 } 1424 1425 /// Add the given processor resource to this scheduled zone. 1426 void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, 1427 unsigned Cycles) { 1428 unsigned Factor = SchedModel->getResourceFactor(PIdx); 1429 DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name 1430 << " +(" << Cycles << "x" << Factor 1431 << ") / " << SchedModel->getLatencyFactor() << '\n'); 1432 1433 unsigned Count = Factor * Cycles; 1434 ResourceCounts[PIdx] += Count; 1435 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 1436 Rem->RemainingCounts[PIdx] -= Count; 1437 1438 // Check if this resource exceeds the current critical resource by a full 1439 // cycle. If so, it becomes the critical resource. 1440 if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) 1441 >= (int)SchedModel->getLatencyFactor()) { 1442 CritResIdx = PIdx; 1443 DEBUG(dbgs() << " *** Critical resource " 1444 << SchedModel->getProcResource(PIdx)->Name << " x" 1445 << ResourceCounts[PIdx] << '\n'); 1446 } 1447 } 1448 1449 /// Move the boundary of scheduled code by one SUnit. 1450 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { 1451 // Update the reservation table. 1452 if (HazardRec->isEnabled()) { 1453 if (!isTop() && SU->isCall) { 1454 // Calls are scheduled with their preceding instructions. For bottom-up 1455 // scheduling, clear the pipeline state before emitting. 1456 HazardRec->Reset(); 1457 } 1458 HazardRec->EmitInstruction(SU); 1459 } 1460 // Update resource counts and critical resource. 1461 if (SchedModel->hasInstrSchedModel()) { 1462 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1463 Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); 1464 for (TargetSchedModel::ProcResIter 1465 PI = SchedModel->getWriteProcResBegin(SC), 1466 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1467 countResource(PI->ProcResourceIdx, PI->Cycles); 1468 } 1469 } 1470 if (isTop()) { 1471 if (SU->getDepth() > ExpectedLatency) 1472 ExpectedLatency = SU->getDepth(); 1473 } 1474 else { 1475 if (SU->getHeight() > ExpectedLatency) 1476 ExpectedLatency = SU->getHeight(); 1477 } 1478 1479 IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); 1480 1481 // Check the instruction group dispatch limit. 1482 // TODO: Check if this SU must end a dispatch group. 1483 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 1484 1485 // checkHazard prevents scheduling multiple instructions per cycle that exceed 1486 // issue width. However, we commonly reach the maximum. In this case 1487 // opportunistically bump the cycle to avoid uselessly checking everything in 1488 // the readyQ. Furthermore, a single instruction may produce more than one 1489 // cycle's worth of micro-ops. 1490 if (IssueCount >= SchedModel->getIssueWidth()) { 1491 DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); 1492 bumpCycle(); 1493 } 1494 } 1495 1496 /// Release pending ready nodes in to the available queue. This makes them 1497 /// visible to heuristics. 1498 void ConvergingScheduler::SchedBoundary::releasePending() { 1499 // If the available queue is empty, it is safe to reset MinReadyCycle. 1500 if (Available.empty()) 1501 MinReadyCycle = UINT_MAX; 1502 1503 // Check to see if any of the pending instructions are ready to issue. If 1504 // so, add them to the available queue. 1505 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 1506 SUnit *SU = *(Pending.begin()+i); 1507 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 1508 1509 if (ReadyCycle < MinReadyCycle) 1510 MinReadyCycle = ReadyCycle; 1511 1512 if (ReadyCycle > CurrCycle) 1513 continue; 1514 1515 if (checkHazard(SU)) 1516 continue; 1517 1518 Available.push(SU); 1519 Pending.remove(Pending.begin()+i); 1520 --i; --e; 1521 } 1522 DEBUG(if (!Pending.empty()) Pending.dump()); 1523 CheckPending = false; 1524 } 1525 1526 /// Remove SU from the ready set for this boundary. 1527 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { 1528 if (Available.isInQueue(SU)) 1529 Available.remove(Available.find(SU)); 1530 else { 1531 assert(Pending.isInQueue(SU) && "bad ready count"); 1532 Pending.remove(Pending.find(SU)); 1533 } 1534 } 1535 1536 /// If this queue only has one ready candidate, return it. As a side effect, 1537 /// defer any nodes that now hit a hazard, and advance the cycle until at least 1538 /// one node is ready. If multiple instructions are ready, return NULL. 1539 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { 1540 if (CheckPending) 1541 releasePending(); 1542 1543 if (IssueCount > 0) { 1544 // Defer any ready instrs that now have a hazard. 1545 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 1546 if (checkHazard(*I)) { 1547 Pending.push(*I); 1548 I = Available.remove(I); 1549 continue; 1550 } 1551 ++I; 1552 } 1553 } 1554 for (unsigned i = 0; Available.empty(); ++i) { 1555 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 1556 "permanent hazard"); (void)i; 1557 bumpCycle(); 1558 releasePending(); 1559 } 1560 if (Available.size() == 1) 1561 return *Available.begin(); 1562 return NULL; 1563 } 1564 1565 /// Record the candidate policy for opposite zones with different critical 1566 /// resources. 1567 /// 1568 /// If the CriticalZone is latency limited, don't force a policy for the 1569 /// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed. 1570 void ConvergingScheduler::balanceZones( 1571 ConvergingScheduler::SchedBoundary &CriticalZone, 1572 ConvergingScheduler::SchedCandidate &CriticalCand, 1573 ConvergingScheduler::SchedBoundary &OppositeZone, 1574 ConvergingScheduler::SchedCandidate &OppositeCand) { 1575 1576 if (!CriticalZone.IsResourceLimited) 1577 return; 1578 assert(SchedModel->hasInstrSchedModel() && "required schedmodel"); 1579 1580 SchedRemainder *Rem = CriticalZone.Rem; 1581 1582 // If the critical zone is overconsuming a resource relative to the 1583 // remainder, try to reduce it. 1584 unsigned RemainingCritCount = 1585 Rem->RemainingCounts[CriticalZone.CritResIdx]; 1586 if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) 1587 > (int)SchedModel->getLatencyFactor()) { 1588 CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; 1589 DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName() 1590 << " reduce " 1591 << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name 1592 << '\n'); 1593 } 1594 // If the other zone is underconsuming a resource relative to the full zone, 1595 // try to increase it. 1596 unsigned OppositeCount = 1597 OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; 1598 if ((int)(OppositeZone.ExpectedCount - OppositeCount) 1599 > (int)SchedModel->getLatencyFactor()) { 1600 OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; 1601 DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName() 1602 << " demand " 1603 << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name 1604 << '\n'); 1605 } 1606 } 1607 1608 /// Determine if the scheduled zones exceed resource limits or critical path and 1609 /// set each candidate's ReduceHeight policy accordingly. 1610 void ConvergingScheduler::checkResourceLimits( 1611 ConvergingScheduler::SchedCandidate &TopCand, 1612 ConvergingScheduler::SchedCandidate &BotCand) { 1613 1614 // Set ReduceLatency to true if needed. 1615 Bot.setLatencyPolicy(BotCand.Policy); 1616 Top.setLatencyPolicy(TopCand.Policy); 1617 1618 // Handle resource-limited regions. 1619 if (Top.IsResourceLimited && Bot.IsResourceLimited 1620 && Top.CritResIdx == Bot.CritResIdx) { 1621 // If the scheduled critical resource in both zones is no longer the 1622 // critical remaining resource, attempt to reduce resource height both ways. 1623 if (Top.CritResIdx != Rem.CritResIdx) { 1624 TopCand.Policy.ReduceResIdx = Top.CritResIdx; 1625 BotCand.Policy.ReduceResIdx = Bot.CritResIdx; 1626 DEBUG(dbgs() << " Reduce scheduled " 1627 << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); 1628 } 1629 return; 1630 } 1631 // Handle latency-limited regions. 1632 if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { 1633 // If the total scheduled expected latency exceeds the region's critical 1634 // path then reduce latency both ways. 1635 // 1636 // Just because a zone is not resource limited does not mean it is latency 1637 // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle 1638 // to exceed expected latency. 1639 if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) 1640 && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { 1641 TopCand.Policy.ReduceLatency = true; 1642 BotCand.Policy.ReduceLatency = true; 1643 DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency 1644 << " + " << Bot.ExpectedLatency << '\n'); 1645 } 1646 return; 1647 } 1648 // The critical resource is different in each zone, so request balancing. 1649 1650 // Compute the cost of each zone. 1651 Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); 1652 Top.ExpectedCount = std::max( 1653 Top.getCriticalCount(), 1654 Top.ExpectedCount * SchedModel->getLatencyFactor()); 1655 Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); 1656 Bot.ExpectedCount = std::max( 1657 Bot.getCriticalCount(), 1658 Bot.ExpectedCount * SchedModel->getLatencyFactor()); 1659 1660 balanceZones(Top, TopCand, Bot, BotCand); 1661 balanceZones(Bot, BotCand, Top, TopCand); 1662 } 1663 1664 void ConvergingScheduler::SchedCandidate:: 1665 initResourceDelta(const ScheduleDAGMI *DAG, 1666 const TargetSchedModel *SchedModel) { 1667 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 1668 return; 1669 1670 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 1671 for (TargetSchedModel::ProcResIter 1672 PI = SchedModel->getWriteProcResBegin(SC), 1673 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 1674 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 1675 ResDelta.CritResources += PI->Cycles; 1676 if (PI->ProcResourceIdx == Policy.DemandResIdx) 1677 ResDelta.DemandedResources += PI->Cycles; 1678 } 1679 } 1680 1681 /// Return true if this heuristic determines order. 1682 static bool tryLess(int TryVal, int CandVal, 1683 ConvergingScheduler::SchedCandidate &TryCand, 1684 ConvergingScheduler::SchedCandidate &Cand, 1685 ConvergingScheduler::CandReason Reason) { 1686 if (TryVal < CandVal) { 1687 TryCand.Reason = Reason; 1688 return true; 1689 } 1690 if (TryVal > CandVal) { 1691 if (Cand.Reason > Reason) 1692 Cand.Reason = Reason; 1693 return true; 1694 } 1695 return false; 1696 } 1697 1698 static bool tryGreater(int TryVal, int CandVal, 1699 ConvergingScheduler::SchedCandidate &TryCand, 1700 ConvergingScheduler::SchedCandidate &Cand, 1701 ConvergingScheduler::CandReason Reason) { 1702 if (TryVal > CandVal) { 1703 TryCand.Reason = Reason; 1704 return true; 1705 } 1706 if (TryVal < CandVal) { 1707 if (Cand.Reason > Reason) 1708 Cand.Reason = Reason; 1709 return true; 1710 } 1711 return false; 1712 } 1713 1714 static unsigned getWeakLeft(const SUnit *SU, bool isTop) { 1715 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 1716 } 1717 1718 /// Minimize physical register live ranges. Regalloc wants them adjacent to 1719 /// their physreg def/use. 1720 /// 1721 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf 1722 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled 1723 /// with the operation that produces or consumes the physreg. We'll do this when 1724 /// regalloc has support for parallel copies. 1725 static int biasPhysRegCopy(const SUnit *SU, bool isTop) { 1726 const MachineInstr *MI = SU->getInstr(); 1727 if (!MI->isCopy()) 1728 return 0; 1729 1730 unsigned ScheduledOper = isTop ? 1 : 0; 1731 unsigned UnscheduledOper = isTop ? 0 : 1; 1732 // If we have already scheduled the physreg produce/consumer, immediately 1733 // schedule the copy. 1734 if (TargetRegisterInfo::isPhysicalRegister( 1735 MI->getOperand(ScheduledOper).getReg())) 1736 return 1; 1737 // If the physreg is at the boundary, defer it. Otherwise schedule it 1738 // immediately to free the dependent. We can hoist the copy later. 1739 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; 1740 if (TargetRegisterInfo::isPhysicalRegister( 1741 MI->getOperand(UnscheduledOper).getReg())) 1742 return AtBoundary ? -1 : 1; 1743 return 0; 1744 } 1745 1746 /// Apply a set of heursitics to a new candidate. Heuristics are currently 1747 /// hierarchical. This may be more efficient than a graduated cost model because 1748 /// we don't need to evaluate all aspects of the model for each node in the 1749 /// queue. But it's really done to make the heuristics easier to debug and 1750 /// statistically analyze. 1751 /// 1752 /// \param Cand provides the policy and current best candidate. 1753 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 1754 /// \param Zone describes the scheduled zone that we are extending. 1755 /// \param RPTracker describes reg pressure within the scheduled zone. 1756 /// \param TempTracker is a scratch pressure tracker to reuse in queries. 1757 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, 1758 SchedCandidate &TryCand, 1759 SchedBoundary &Zone, 1760 const RegPressureTracker &RPTracker, 1761 RegPressureTracker &TempTracker) { 1762 1763 // Always initialize TryCand's RPDelta. 1764 TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, 1765 DAG->getRegionCriticalPSets(), 1766 DAG->getRegPressure().MaxSetPressure); 1767 1768 // Initialize the candidate if needed. 1769 if (!Cand.isValid()) { 1770 TryCand.Reason = NodeOrder; 1771 return; 1772 } 1773 1774 if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()), 1775 biasPhysRegCopy(Cand.SU, Zone.isTop()), 1776 TryCand, Cand, PhysRegCopy)) 1777 return; 1778 1779 // Avoid exceeding the target's limit. 1780 if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, 1781 Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) 1782 return; 1783 if (Cand.Reason == SingleExcess) 1784 Cand.Reason = MultiPressure; 1785 1786 // Avoid increasing the max critical pressure in the scheduled region. 1787 if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, 1788 Cand.RPDelta.CriticalMax.UnitIncrease, 1789 TryCand, Cand, SingleCritical)) 1790 return; 1791 if (Cand.Reason == SingleCritical) 1792 Cand.Reason = MultiPressure; 1793 1794 // Keep clustered nodes together to encourage downstream peephole 1795 // optimizations which may reduce resource requirements. 1796 // 1797 // This is a best effort to set things up for a post-RA pass. Optimizations 1798 // like generating loads of multiple registers should ideally be done within 1799 // the scheduler pass by combining the loads during DAG postprocessing. 1800 const SUnit *NextClusterSU = 1801 Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 1802 if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, 1803 TryCand, Cand, Cluster)) 1804 return; 1805 // Currently, weak edges are for clustering, so we hard-code that reason. 1806 // However, deferring the current TryCand will not change Cand's reason. 1807 CandReason OrigReason = Cand.Reason; 1808 if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), 1809 getWeakLeft(Cand.SU, Zone.isTop()), 1810 TryCand, Cand, Cluster)) { 1811 Cand.Reason = OrigReason; 1812 return; 1813 } 1814 // Avoid critical resource consumption and balance the schedule. 1815 TryCand.initResourceDelta(DAG, SchedModel); 1816 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 1817 TryCand, Cand, ResourceReduce)) 1818 return; 1819 if (tryGreater(TryCand.ResDelta.DemandedResources, 1820 Cand.ResDelta.DemandedResources, 1821 TryCand, Cand, ResourceDemand)) 1822 return; 1823 1824 // Avoid serializing long latency dependence chains. 1825 if (Cand.Policy.ReduceLatency) { 1826 if (Zone.isTop()) { 1827 if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() 1828 > Zone.ExpectedCount) { 1829 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1830 TryCand, Cand, TopDepthReduce)) 1831 return; 1832 } 1833 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1834 TryCand, Cand, TopPathReduce)) 1835 return; 1836 } 1837 else { 1838 if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() 1839 > Zone.ExpectedCount) { 1840 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 1841 TryCand, Cand, BotHeightReduce)) 1842 return; 1843 } 1844 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 1845 TryCand, Cand, BotPathReduce)) 1846 return; 1847 } 1848 } 1849 1850 // Avoid increasing the max pressure of the entire region. 1851 if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, 1852 Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) 1853 return; 1854 if (Cand.Reason == SingleMax) 1855 Cand.Reason = MultiPressure; 1856 1857 // Prefer immediate defs/users of the last scheduled instruction. This is a 1858 // nice pressure avoidance strategy that also conserves the processor's 1859 // register renaming resources and keeps the machine code readable. 1860 if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), 1861 TryCand, Cand, NextDefUse)) 1862 return; 1863 1864 // Fall through to original instruction order. 1865 if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 1866 || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 1867 TryCand.Reason = NodeOrder; 1868 } 1869 } 1870 1871 /// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is 1872 /// more desirable than RHS from scheduling standpoint. 1873 static bool compareRPDelta(const RegPressureDelta &LHS, 1874 const RegPressureDelta &RHS) { 1875 // Compare each component of pressure in decreasing order of importance 1876 // without checking if any are valid. Invalid PressureElements are assumed to 1877 // have UnitIncrease==0, so are neutral. 1878 1879 // Avoid increasing the max critical pressure in the scheduled region. 1880 if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { 1881 DEBUG(dbgs() << " RP excess top - bot: " 1882 << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); 1883 return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; 1884 } 1885 // Avoid increasing the max critical pressure in the scheduled region. 1886 if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { 1887 DEBUG(dbgs() << " RP critical top - bot: " 1888 << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) 1889 << '\n'); 1890 return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; 1891 } 1892 // Avoid increasing the max pressure of the entire region. 1893 if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { 1894 DEBUG(dbgs() << " RP current top - bot: " 1895 << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) 1896 << '\n'); 1897 return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; 1898 } 1899 return false; 1900 } 1901 1902 #ifndef NDEBUG 1903 const char *ConvergingScheduler::getReasonStr( 1904 ConvergingScheduler::CandReason Reason) { 1905 switch (Reason) { 1906 case NoCand: return "NOCAND "; 1907 case PhysRegCopy: return "PREG-COPY"; 1908 case SingleExcess: return "REG-EXCESS"; 1909 case SingleCritical: return "REG-CRIT "; 1910 case Cluster: return "CLUSTER "; 1911 case SingleMax: return "REG-MAX "; 1912 case MultiPressure: return "REG-MULTI "; 1913 case ResourceReduce: return "RES-REDUCE"; 1914 case ResourceDemand: return "RES-DEMAND"; 1915 case TopDepthReduce: return "TOP-DEPTH "; 1916 case TopPathReduce: return "TOP-PATH "; 1917 case BotHeightReduce:return "BOT-HEIGHT"; 1918 case BotPathReduce: return "BOT-PATH "; 1919 case NextDefUse: return "DEF-USE "; 1920 case NodeOrder: return "ORDER "; 1921 }; 1922 llvm_unreachable("Unknown reason!"); 1923 } 1924 1925 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { 1926 PressureElement P; 1927 unsigned ResIdx = 0; 1928 unsigned Latency = 0; 1929 switch (Cand.Reason) { 1930 default: 1931 break; 1932 case SingleExcess: 1933 P = Cand.RPDelta.Excess; 1934 break; 1935 case SingleCritical: 1936 P = Cand.RPDelta.CriticalMax; 1937 break; 1938 case SingleMax: 1939 P = Cand.RPDelta.CurrentMax; 1940 break; 1941 case ResourceReduce: 1942 ResIdx = Cand.Policy.ReduceResIdx; 1943 break; 1944 case ResourceDemand: 1945 ResIdx = Cand.Policy.DemandResIdx; 1946 break; 1947 case TopDepthReduce: 1948 Latency = Cand.SU->getDepth(); 1949 break; 1950 case TopPathReduce: 1951 Latency = Cand.SU->getHeight(); 1952 break; 1953 case BotHeightReduce: 1954 Latency = Cand.SU->getHeight(); 1955 break; 1956 case BotPathReduce: 1957 Latency = Cand.SU->getDepth(); 1958 break; 1959 } 1960 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); 1961 if (P.isValid()) 1962 dbgs() << " " << TRI->getRegPressureSetName(P.PSetID) 1963 << ":" << P.UnitIncrease << " "; 1964 else 1965 dbgs() << " "; 1966 if (ResIdx) 1967 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; 1968 else 1969 dbgs() << " "; 1970 if (Latency) 1971 dbgs() << " " << Latency << " cycles "; 1972 else 1973 dbgs() << " "; 1974 dbgs() << '\n'; 1975 } 1976 #endif 1977 1978 /// Pick the best candidate from the top queue. 1979 /// 1980 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 1981 /// DAG building. To adjust for the current scheduling location we need to 1982 /// maintain the number of vreg uses remaining to be top-scheduled. 1983 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, 1984 const RegPressureTracker &RPTracker, 1985 SchedCandidate &Cand) { 1986 ReadyQueue &Q = Zone.Available; 1987 1988 DEBUG(Q.dump()); 1989 1990 // getMaxPressureDelta temporarily modifies the tracker. 1991 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 1992 1993 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 1994 1995 SchedCandidate TryCand(Cand.Policy); 1996 TryCand.SU = *I; 1997 tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); 1998 if (TryCand.Reason != NoCand) { 1999 // Initialize resource delta if needed in case future heuristics query it. 2000 if (TryCand.ResDelta == SchedResourceDelta()) 2001 TryCand.initResourceDelta(DAG, SchedModel); 2002 Cand.setBest(TryCand); 2003 DEBUG(traceCandidate(Cand)); 2004 } 2005 } 2006 } 2007 2008 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, 2009 bool IsTop) { 2010 DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") 2011 << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); 2012 } 2013 2014 /// Pick the best candidate node from either the top or bottom queue. 2015 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { 2016 // Schedule as far as possible in the direction of no choice. This is most 2017 // efficient, but also provides the best heuristics for CriticalPSets. 2018 if (SUnit *SU = Bot.pickOnlyChoice()) { 2019 IsTopNode = false; 2020 DEBUG(dbgs() << "Pick Top NOCAND\n"); 2021 return SU; 2022 } 2023 if (SUnit *SU = Top.pickOnlyChoice()) { 2024 IsTopNode = true; 2025 DEBUG(dbgs() << "Pick Bot NOCAND\n"); 2026 return SU; 2027 } 2028 CandPolicy NoPolicy; 2029 SchedCandidate BotCand(NoPolicy); 2030 SchedCandidate TopCand(NoPolicy); 2031 checkResourceLimits(TopCand, BotCand); 2032 2033 // Prefer bottom scheduling when heuristics are silent. 2034 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 2035 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 2036 2037 // If either Q has a single candidate that provides the least increase in 2038 // Excess pressure, we can immediately schedule from that Q. 2039 // 2040 // RegionCriticalPSets summarizes the pressure within the scheduled region and 2041 // affects picking from either Q. If scheduling in one direction must 2042 // increase pressure for one of the excess PSets, then schedule in that 2043 // direction first to provide more freedom in the other direction. 2044 if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { 2045 IsTopNode = false; 2046 tracePick(BotCand, IsTopNode); 2047 return BotCand.SU; 2048 } 2049 // Check if the top Q has a better candidate. 2050 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 2051 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 2052 2053 // If either Q has a single candidate that minimizes pressure above the 2054 // original region's pressure pick it. 2055 if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { 2056 if (TopCand.Reason < BotCand.Reason) { 2057 IsTopNode = true; 2058 tracePick(TopCand, IsTopNode); 2059 return TopCand.SU; 2060 } 2061 IsTopNode = false; 2062 tracePick(BotCand, IsTopNode); 2063 return BotCand.SU; 2064 } 2065 // Check for a salient pressure difference and pick the best from either side. 2066 if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { 2067 IsTopNode = true; 2068 tracePick(TopCand, IsTopNode); 2069 return TopCand.SU; 2070 } 2071 // Otherwise prefer the bottom candidate, in node order if all else failed. 2072 if (TopCand.Reason < BotCand.Reason) { 2073 IsTopNode = true; 2074 tracePick(TopCand, IsTopNode); 2075 return TopCand.SU; 2076 } 2077 IsTopNode = false; 2078 tracePick(BotCand, IsTopNode); 2079 return BotCand.SU; 2080 } 2081 2082 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 2083 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { 2084 if (DAG->top() == DAG->bottom()) { 2085 assert(Top.Available.empty() && Top.Pending.empty() && 2086 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 2087 return NULL; 2088 } 2089 SUnit *SU; 2090 do { 2091 if (ForceTopDown) { 2092 SU = Top.pickOnlyChoice(); 2093 if (!SU) { 2094 CandPolicy NoPolicy; 2095 SchedCandidate TopCand(NoPolicy); 2096 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 2097 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 2098 SU = TopCand.SU; 2099 } 2100 IsTopNode = true; 2101 } 2102 else if (ForceBottomUp) { 2103 SU = Bot.pickOnlyChoice(); 2104 if (!SU) { 2105 CandPolicy NoPolicy; 2106 SchedCandidate BotCand(NoPolicy); 2107 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 2108 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 2109 SU = BotCand.SU; 2110 } 2111 IsTopNode = false; 2112 } 2113 else { 2114 SU = pickNodeBidirectional(IsTopNode); 2115 } 2116 } while (SU->isScheduled); 2117 2118 if (SU->isTopReady()) 2119 Top.removeReady(SU); 2120 if (SU->isBottomReady()) 2121 Bot.removeReady(SU); 2122 2123 DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); 2124 return SU; 2125 } 2126 2127 void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { 2128 2129 MachineBasicBlock::iterator InsertPos = SU->getInstr(); 2130 if (!isTop) 2131 ++InsertPos; 2132 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; 2133 2134 // Find already scheduled copies with a single physreg dependence and move 2135 // them just above the scheduled instruction. 2136 for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); 2137 I != E; ++I) { 2138 if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) 2139 continue; 2140 SUnit *DepSU = I->getSUnit(); 2141 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) 2142 continue; 2143 MachineInstr *Copy = DepSU->getInstr(); 2144 if (!Copy->isCopy()) 2145 continue; 2146 DEBUG(dbgs() << " Rescheduling physreg copy "; 2147 I->getSUnit()->dump(DAG)); 2148 DAG->moveInstruction(Copy, InsertPos); 2149 } 2150 } 2151 2152 /// Update the scheduler's state after scheduling a node. This is the same node 2153 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update 2154 /// it's state based on the current cycle before MachineSchedStrategy does. 2155 /// 2156 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling 2157 /// them here. See comments in biasPhysRegCopy. 2158 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { 2159 if (IsTopNode) { 2160 SU->TopReadyCycle = Top.CurrCycle; 2161 Top.bumpNode(SU); 2162 if (SU->hasPhysRegUses) 2163 reschedulePhysRegCopies(SU, true); 2164 } 2165 else { 2166 SU->BotReadyCycle = Bot.CurrCycle; 2167 Bot.bumpNode(SU); 2168 if (SU->hasPhysRegDefs) 2169 reschedulePhysRegCopies(SU, false); 2170 } 2171 } 2172 2173 /// Create the standard converging machine scheduler. This will be used as the 2174 /// default scheduler if the target does not set a default. 2175 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { 2176 assert((!ForceTopDown || !ForceBottomUp) && 2177 "-misched-topdown incompatible with -misched-bottomup"); 2178 ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); 2179 // Register DAG post-processors. 2180 if (EnableLoadCluster) 2181 DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); 2182 if (EnableMacroFusion) 2183 DAG->addMutation(new MacroFusion(DAG->TII)); 2184 return DAG; 2185 } 2186 static MachineSchedRegistry 2187 ConvergingSchedRegistry("converge", "Standard converging scheduler.", 2188 createConvergingSched); 2189 2190 //===----------------------------------------------------------------------===// 2191 // ILP Scheduler. Currently for experimental analysis of heuristics. 2192 //===----------------------------------------------------------------------===// 2193 2194 namespace { 2195 /// \brief Order nodes by the ILP metric. 2196 struct ILPOrder { 2197 const SchedDFSResult *DFSResult; 2198 const BitVector *ScheduledTrees; 2199 bool MaximizeILP; 2200 2201 ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {} 2202 2203 /// \brief Apply a less-than relation on node priority. 2204 /// 2205 /// (Return true if A comes after B in the Q.) 2206 bool operator()(const SUnit *A, const SUnit *B) const { 2207 unsigned SchedTreeA = DFSResult->getSubtreeID(A); 2208 unsigned SchedTreeB = DFSResult->getSubtreeID(B); 2209 if (SchedTreeA != SchedTreeB) { 2210 // Unscheduled trees have lower priority. 2211 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) 2212 return ScheduledTrees->test(SchedTreeB); 2213 2214 // Trees with shallower connections have have lower priority. 2215 if (DFSResult->getSubtreeLevel(SchedTreeA) 2216 != DFSResult->getSubtreeLevel(SchedTreeB)) { 2217 return DFSResult->getSubtreeLevel(SchedTreeA) 2218 < DFSResult->getSubtreeLevel(SchedTreeB); 2219 } 2220 } 2221 if (MaximizeILP) 2222 return DFSResult->getILP(A) < DFSResult->getILP(B); 2223 else 2224 return DFSResult->getILP(A) > DFSResult->getILP(B); 2225 } 2226 }; 2227 2228 /// \brief Schedule based on the ILP metric. 2229 class ILPScheduler : public MachineSchedStrategy { 2230 /// In case all subtrees are eventually connected to a common root through 2231 /// data dependence (e.g. reduction), place an upper limit on their size. 2232 /// 2233 /// FIXME: A subtree limit is generally good, but in the situation commented 2234 /// above, where multiple similar subtrees feed a common root, we should 2235 /// only split at a point where the resulting subtrees will be balanced. 2236 /// (a motivating test case must be found). 2237 static const unsigned SubtreeLimit = 16; 2238 2239 ScheduleDAGMI *DAG; 2240 ILPOrder Cmp; 2241 2242 std::vector<SUnit*> ReadyQ; 2243 public: 2244 ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} 2245 2246 virtual void initialize(ScheduleDAGMI *dag) { 2247 DAG = dag; 2248 DAG->computeDFSResult(); 2249 Cmp.DFSResult = DAG->getDFSResult(); 2250 Cmp.ScheduledTrees = &DAG->getScheduledTrees(); 2251 ReadyQ.clear(); 2252 } 2253 2254 virtual void registerRoots() { 2255 // Restore the heap in ReadyQ with the updated DFS results. 2256 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2257 } 2258 2259 /// Implement MachineSchedStrategy interface. 2260 /// ----------------------------------------- 2261 2262 /// Callback to select the highest priority node from the ready Q. 2263 virtual SUnit *pickNode(bool &IsTopNode) { 2264 if (ReadyQ.empty()) return NULL; 2265 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2266 SUnit *SU = ReadyQ.back(); 2267 ReadyQ.pop_back(); 2268 IsTopNode = false; 2269 DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " 2270 << " ILP: " << DAG->getDFSResult()->getILP(SU) 2271 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" 2272 << DAG->getDFSResult()->getSubtreeLevel( 2273 DAG->getDFSResult()->getSubtreeID(SU)) << '\n' 2274 << "Scheduling " << *SU->getInstr()); 2275 return SU; 2276 } 2277 2278 /// \brief Scheduler callback to notify that a new subtree is scheduled. 2279 virtual void scheduleTree(unsigned SubtreeID) { 2280 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2281 } 2282 2283 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify 2284 /// DFSResults, and resort the priority Q. 2285 virtual void schedNode(SUnit *SU, bool IsTopNode) { 2286 assert(!IsTopNode && "SchedDFSResult needs bottom-up"); 2287 } 2288 2289 virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } 2290 2291 virtual void releaseBottomNode(SUnit *SU) { 2292 ReadyQ.push_back(SU); 2293 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 2294 } 2295 }; 2296 } // namespace 2297 2298 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 2299 return new ScheduleDAGMI(C, new ILPScheduler(true)); 2300 } 2301 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 2302 return new ScheduleDAGMI(C, new ILPScheduler(false)); 2303 } 2304 static MachineSchedRegistry ILPMaxRegistry( 2305 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 2306 static MachineSchedRegistry ILPMinRegistry( 2307 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 2308 2309 //===----------------------------------------------------------------------===// 2310 // Machine Instruction Shuffler for Correctness Testing 2311 //===----------------------------------------------------------------------===// 2312 2313 #ifndef NDEBUG 2314 namespace { 2315 /// Apply a less-than relation on the node order, which corresponds to the 2316 /// instruction order prior to scheduling. IsReverse implements greater-than. 2317 template<bool IsReverse> 2318 struct SUnitOrder { 2319 bool operator()(SUnit *A, SUnit *B) const { 2320 if (IsReverse) 2321 return A->NodeNum > B->NodeNum; 2322 else 2323 return A->NodeNum < B->NodeNum; 2324 } 2325 }; 2326 2327 /// Reorder instructions as much as possible. 2328 class InstructionShuffler : public MachineSchedStrategy { 2329 bool IsAlternating; 2330 bool IsTopDown; 2331 2332 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 2333 // gives nodes with a higher number higher priority causing the latest 2334 // instructions to be scheduled first. 2335 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > 2336 TopQ; 2337 // When scheduling bottom-up, use greater-than as the queue priority. 2338 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > 2339 BottomQ; 2340 public: 2341 InstructionShuffler(bool alternate, bool topdown) 2342 : IsAlternating(alternate), IsTopDown(topdown) {} 2343 2344 virtual void initialize(ScheduleDAGMI *) { 2345 TopQ.clear(); 2346 BottomQ.clear(); 2347 } 2348 2349 /// Implement MachineSchedStrategy interface. 2350 /// ----------------------------------------- 2351 2352 virtual SUnit *pickNode(bool &IsTopNode) { 2353 SUnit *SU; 2354 if (IsTopDown) { 2355 do { 2356 if (TopQ.empty()) return NULL; 2357 SU = TopQ.top(); 2358 TopQ.pop(); 2359 } while (SU->isScheduled); 2360 IsTopNode = true; 2361 } 2362 else { 2363 do { 2364 if (BottomQ.empty()) return NULL; 2365 SU = BottomQ.top(); 2366 BottomQ.pop(); 2367 } while (SU->isScheduled); 2368 IsTopNode = false; 2369 } 2370 if (IsAlternating) 2371 IsTopDown = !IsTopDown; 2372 return SU; 2373 } 2374 2375 virtual void schedNode(SUnit *SU, bool IsTopNode) {} 2376 2377 virtual void releaseTopNode(SUnit *SU) { 2378 TopQ.push(SU); 2379 } 2380 virtual void releaseBottomNode(SUnit *SU) { 2381 BottomQ.push(SU); 2382 } 2383 }; 2384 } // namespace 2385 2386 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 2387 bool Alternate = !ForceTopDown && !ForceBottomUp; 2388 bool TopDown = !ForceBottomUp; 2389 assert((TopDown || !ForceTopDown) && 2390 "-misched-topdown incompatible with -misched-bottomup"); 2391 return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); 2392 } 2393 static MachineSchedRegistry ShufflerRegistry( 2394 "shuffle", "Shuffle machine instructions alternating directions", 2395 createInstructionShuffler); 2396 #endif // !NDEBUG 2397 2398 //===----------------------------------------------------------------------===// 2399 // GraphWriter support for ScheduleDAGMI. 2400 //===----------------------------------------------------------------------===// 2401 2402 #ifndef NDEBUG 2403 namespace llvm { 2404 2405 template<> struct GraphTraits< 2406 ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; 2407 2408 template<> 2409 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { 2410 2411 DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} 2412 2413 static std::string getGraphName(const ScheduleDAG *G) { 2414 return G->MF.getName(); 2415 } 2416 2417 static bool renderGraphFromBottomUp() { 2418 return true; 2419 } 2420 2421 static bool isNodeHidden(const SUnit *Node) { 2422 return (Node->NumPreds > 10 || Node->NumSuccs > 10); 2423 } 2424 2425 static bool hasNodeAddressLabel(const SUnit *Node, 2426 const ScheduleDAG *Graph) { 2427 return false; 2428 } 2429 2430 /// If you want to override the dot attributes printed for a particular 2431 /// edge, override this method. 2432 static std::string getEdgeAttributes(const SUnit *Node, 2433 SUnitIterator EI, 2434 const ScheduleDAG *Graph) { 2435 if (EI.isArtificialDep()) 2436 return "color=cyan,style=dashed"; 2437 if (EI.isCtrlDep()) 2438 return "color=blue,style=dashed"; 2439 return ""; 2440 } 2441 2442 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { 2443 std::string Str; 2444 raw_string_ostream SS(Str); 2445 SS << "SU(" << SU->NodeNum << ')'; 2446 return SS.str(); 2447 } 2448 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { 2449 return G->getGraphNodeLabel(SU); 2450 } 2451 2452 static std::string getNodeAttributes(const SUnit *N, 2453 const ScheduleDAG *Graph) { 2454 std::string Str("shape=Mrecord"); 2455 const SchedDFSResult *DFS = 2456 static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult(); 2457 if (DFS) { 2458 Str += ",style=filled,fillcolor=\"#"; 2459 Str += DOT::getColorString(DFS->getSubtreeID(N)); 2460 Str += '"'; 2461 } 2462 return Str; 2463 } 2464 }; 2465 } // namespace llvm 2466 #endif // NDEBUG 2467 2468 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG 2469 /// rendered using 'dot'. 2470 /// 2471 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { 2472 #ifndef NDEBUG 2473 ViewGraph(this, Name, false, Title); 2474 #else 2475 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " 2476 << "systems with Graphviz or gv!\n"; 2477 #endif // NDEBUG 2478 } 2479 2480 /// Out-of-line implementation with no arguments is handy for gdb. 2481 void ScheduleDAGMI::viewGraph() { 2482 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); 2483 } 2484