1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // MachineScheduler schedules machine instructions after phi elimination. It 10 // preserves LiveIntervals so it can be invoked before register allocation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/MachineScheduler.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/BitVector.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/PriorityQueue.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/ADT/iterator_range.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/CodeGen/LiveInterval.h" 25 #include "llvm/CodeGen/LiveIntervals.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineDominators.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include "llvm/CodeGen/MachineInstr.h" 31 #include "llvm/CodeGen/MachineLoopInfo.h" 32 #include "llvm/CodeGen/MachineOperand.h" 33 #include "llvm/CodeGen/MachinePassRegistry.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/MachineValueType.h" 36 #include "llvm/CodeGen/RegisterClassInfo.h" 37 #include "llvm/CodeGen/RegisterPressure.h" 38 #include "llvm/CodeGen/ScheduleDAG.h" 39 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 40 #include "llvm/CodeGen/ScheduleDAGMutation.h" 41 #include "llvm/CodeGen/ScheduleDFS.h" 42 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 43 #include "llvm/CodeGen/SlotIndexes.h" 44 #include "llvm/CodeGen/TargetFrameLowering.h" 45 #include "llvm/CodeGen/TargetInstrInfo.h" 46 #include "llvm/CodeGen/TargetLowering.h" 47 #include "llvm/CodeGen/TargetPassConfig.h" 48 #include "llvm/CodeGen/TargetRegisterInfo.h" 49 #include "llvm/CodeGen/TargetSchedule.h" 50 #include "llvm/CodeGen/TargetSubtargetInfo.h" 51 #include "llvm/Config/llvm-config.h" 52 #include "llvm/InitializePasses.h" 53 #include "llvm/MC/LaneBitmask.h" 54 #include "llvm/Pass.h" 55 #include "llvm/Support/CommandLine.h" 56 #include "llvm/Support/Compiler.h" 57 #include "llvm/Support/Debug.h" 58 #include "llvm/Support/ErrorHandling.h" 59 #include "llvm/Support/GraphWriter.h" 60 #include "llvm/Support/raw_ostream.h" 61 #include <algorithm> 62 #include <cassert> 63 #include <cstdint> 64 #include <iterator> 65 #include <limits> 66 #include <memory> 67 #include <string> 68 #include <tuple> 69 #include <utility> 70 #include <vector> 71 72 using namespace llvm; 73 74 #define DEBUG_TYPE "machine-scheduler" 75 76 STATISTIC(NumClustered, "Number of load/store pairs clustered"); 77 78 namespace llvm { 79 80 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, 81 cl::desc("Force top-down list scheduling")); 82 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, 83 cl::desc("Force bottom-up list scheduling")); 84 cl::opt<bool> 85 DumpCriticalPathLength("misched-dcpl", cl::Hidden, 86 cl::desc("Print critical path length to stdout")); 87 88 cl::opt<bool> VerifyScheduling( 89 "verify-misched", cl::Hidden, 90 cl::desc("Verify machine instrs before and after machine scheduling")); 91 92 #ifndef NDEBUG 93 cl::opt<bool> ViewMISchedDAGs( 94 "view-misched-dags", cl::Hidden, 95 cl::desc("Pop up a window to show MISched dags after they are processed")); 96 cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden, 97 cl::desc("Print schedule DAGs")); 98 cl::opt<bool> MISchedDumpReservedCycles( 99 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false), 100 cl::desc("Dump resource usage at schedule boundary.")); 101 #else 102 const bool ViewMISchedDAGs = false; 103 const bool PrintDAGs = false; 104 #ifdef LLVM_ENABLE_DUMP 105 const bool MISchedDumpReservedCycles = false; 106 #endif // LLVM_ENABLE_DUMP 107 #endif // NDEBUG 108 109 } // end namespace llvm 110 111 #ifndef NDEBUG 112 /// In some situations a few uninteresting nodes depend on nearly all other 113 /// nodes in the graph, provide a cutoff to hide them. 114 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, 115 cl::desc("Hide nodes with more predecessor/successor than cutoff")); 116 117 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, 118 cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); 119 120 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, 121 cl::desc("Only schedule this function")); 122 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, 123 cl::desc("Only schedule this MBB#")); 124 #endif // NDEBUG 125 126 /// Avoid quadratic complexity in unusually large basic blocks by limiting the 127 /// size of the ready lists. 128 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden, 129 cl::desc("Limit ready list to N instructions"), cl::init(256)); 130 131 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, 132 cl::desc("Enable register pressure scheduling."), cl::init(true)); 133 134 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, 135 cl::desc("Enable cyclic critical path analysis."), cl::init(true)); 136 137 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden, 138 cl::desc("Enable memop clustering."), 139 cl::init(true)); 140 static cl::opt<bool> 141 ForceFastCluster("force-fast-cluster", cl::Hidden, 142 cl::desc("Switch to fast cluster algorithm with the lost " 143 "of some fusion opportunities"), 144 cl::init(false)); 145 static cl::opt<unsigned> 146 FastClusterThreshold("fast-cluster-threshold", cl::Hidden, 147 cl::desc("The threshold for fast cluster"), 148 cl::init(1000)); 149 150 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 151 static cl::opt<bool> MISchedDumpScheduleTrace( 152 "misched-dump-schedule-trace", cl::Hidden, cl::init(false), 153 cl::desc("Dump resource usage at schedule boundary.")); 154 static cl::opt<unsigned> 155 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, 156 cl::desc("Set width of the columns with " 157 "the resources and schedule units"), 158 cl::init(19)); 159 static cl::opt<unsigned> 160 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, 161 cl::desc("Set width of the columns showing resource booking."), 162 cl::init(5)); 163 #endif 164 165 static cl::opt<unsigned> 166 MIResourceCutOff("misched-resource-cutoff", cl::Hidden, 167 cl::desc("Number of intervals to track"), cl::init(10)); 168 169 // DAG subtrees must have at least this many nodes. 170 static const unsigned MinSubtreeSize = 8; 171 172 // Pin the vtables to this file. 173 void MachineSchedStrategy::anchor() {} 174 175 void ScheduleDAGMutation::anchor() {} 176 177 //===----------------------------------------------------------------------===// 178 // Machine Instruction Scheduling Pass and Registry 179 //===----------------------------------------------------------------------===// 180 181 MachineSchedContext::MachineSchedContext() { 182 RegClassInfo = new RegisterClassInfo(); 183 } 184 185 MachineSchedContext::~MachineSchedContext() { 186 delete RegClassInfo; 187 } 188 189 namespace { 190 191 /// Base class for a machine scheduler class that can run at any point. 192 class MachineSchedulerBase : public MachineSchedContext, 193 public MachineFunctionPass { 194 public: 195 MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} 196 197 void print(raw_ostream &O, const Module* = nullptr) const override; 198 199 protected: 200 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags); 201 }; 202 203 /// MachineScheduler runs after coalescing and before register allocation. 204 class MachineScheduler : public MachineSchedulerBase { 205 public: 206 MachineScheduler(); 207 208 void getAnalysisUsage(AnalysisUsage &AU) const override; 209 210 bool runOnMachineFunction(MachineFunction&) override; 211 212 static char ID; // Class identification, replacement for typeinfo 213 214 protected: 215 ScheduleDAGInstrs *createMachineScheduler(); 216 }; 217 218 /// PostMachineScheduler runs after shortly before code emission. 219 class PostMachineScheduler : public MachineSchedulerBase { 220 public: 221 PostMachineScheduler(); 222 223 void getAnalysisUsage(AnalysisUsage &AU) const override; 224 225 bool runOnMachineFunction(MachineFunction&) override; 226 227 static char ID; // Class identification, replacement for typeinfo 228 229 protected: 230 ScheduleDAGInstrs *createPostMachineScheduler(); 231 }; 232 233 } // end anonymous namespace 234 235 char MachineScheduler::ID = 0; 236 237 char &llvm::MachineSchedulerID = MachineScheduler::ID; 238 239 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, 240 "Machine Instruction Scheduler", false, false) 241 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 242 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 243 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 244 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 245 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 246 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE, 247 "Machine Instruction Scheduler", false, false) 248 249 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) { 250 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 251 } 252 253 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 254 AU.setPreservesCFG(); 255 AU.addRequired<MachineDominatorTree>(); 256 AU.addRequired<MachineLoopInfo>(); 257 AU.addRequired<AAResultsWrapperPass>(); 258 AU.addRequired<TargetPassConfig>(); 259 AU.addRequired<SlotIndexes>(); 260 AU.addPreserved<SlotIndexes>(); 261 AU.addRequired<LiveIntervals>(); 262 AU.addPreserved<LiveIntervals>(); 263 MachineFunctionPass::getAnalysisUsage(AU); 264 } 265 266 char PostMachineScheduler::ID = 0; 267 268 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; 269 270 INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched", 271 "PostRA Machine Instruction Scheduler", false, false) 272 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 273 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 274 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 275 INITIALIZE_PASS_END(PostMachineScheduler, "postmisched", 276 "PostRA Machine Instruction Scheduler", false, false) 277 278 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) { 279 initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); 280 } 281 282 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { 283 AU.setPreservesCFG(); 284 AU.addRequired<MachineDominatorTree>(); 285 AU.addRequired<MachineLoopInfo>(); 286 AU.addRequired<AAResultsWrapperPass>(); 287 AU.addRequired<TargetPassConfig>(); 288 MachineFunctionPass::getAnalysisUsage(AU); 289 } 290 291 MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor> 292 MachineSchedRegistry::Registry; 293 294 /// A dummy default scheduler factory indicates whether the scheduler 295 /// is overridden on the command line. 296 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { 297 return nullptr; 298 } 299 300 /// MachineSchedOpt allows command line selection of the scheduler. 301 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, 302 RegisterPassParser<MachineSchedRegistry>> 303 MachineSchedOpt("misched", 304 cl::init(&useDefaultMachineSched), cl::Hidden, 305 cl::desc("Machine instruction scheduler to use")); 306 307 static MachineSchedRegistry 308 DefaultSchedRegistry("default", "Use the target's default scheduler choice.", 309 useDefaultMachineSched); 310 311 static cl::opt<bool> EnableMachineSched( 312 "enable-misched", 313 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), 314 cl::Hidden); 315 316 static cl::opt<bool> EnablePostRAMachineSched( 317 "enable-post-misched", 318 cl::desc("Enable the post-ra machine instruction scheduling pass."), 319 cl::init(true), cl::Hidden); 320 321 /// Decrement this iterator until reaching the top or a non-debug instr. 322 static MachineBasicBlock::const_iterator 323 priorNonDebug(MachineBasicBlock::const_iterator I, 324 MachineBasicBlock::const_iterator Beg) { 325 assert(I != Beg && "reached the top of the region, cannot decrement"); 326 while (--I != Beg) { 327 if (!I->isDebugOrPseudoInstr()) 328 break; 329 } 330 return I; 331 } 332 333 /// Non-const version. 334 static MachineBasicBlock::iterator 335 priorNonDebug(MachineBasicBlock::iterator I, 336 MachineBasicBlock::const_iterator Beg) { 337 return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg) 338 .getNonConstIterator(); 339 } 340 341 /// If this iterator is a debug value, increment until reaching the End or a 342 /// non-debug instruction. 343 static MachineBasicBlock::const_iterator 344 nextIfDebug(MachineBasicBlock::const_iterator I, 345 MachineBasicBlock::const_iterator End) { 346 for(; I != End; ++I) { 347 if (!I->isDebugOrPseudoInstr()) 348 break; 349 } 350 return I; 351 } 352 353 /// Non-const version. 354 static MachineBasicBlock::iterator 355 nextIfDebug(MachineBasicBlock::iterator I, 356 MachineBasicBlock::const_iterator End) { 357 return nextIfDebug(MachineBasicBlock::const_iterator(I), End) 358 .getNonConstIterator(); 359 } 360 361 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller. 362 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { 363 // Select the scheduler, or set the default. 364 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; 365 if (Ctor != useDefaultMachineSched) 366 return Ctor(this); 367 368 // Get the default scheduler set by the target for this function. 369 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); 370 if (Scheduler) 371 return Scheduler; 372 373 // Default to GenericScheduler. 374 return createGenericSchedLive(this); 375 } 376 377 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by 378 /// the caller. We don't have a command line option to override the postRA 379 /// scheduler. The Target must configure it. 380 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { 381 // Get the postRA scheduler set by the target for this function. 382 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); 383 if (Scheduler) 384 return Scheduler; 385 386 // Default to GenericScheduler. 387 return createGenericSchedPostRA(this); 388 } 389 390 /// Top-level MachineScheduler pass driver. 391 /// 392 /// Visit blocks in function order. Divide each block into scheduling regions 393 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is 394 /// consistent with the DAG builder, which traverses the interior of the 395 /// scheduling regions bottom-up. 396 /// 397 /// This design avoids exposing scheduling boundaries to the DAG builder, 398 /// simplifying the DAG builder's support for "special" target instructions. 399 /// At the same time the design allows target schedulers to operate across 400 /// scheduling boundaries, for example to bundle the boundary instructions 401 /// without reordering them. This creates complexity, because the target 402 /// scheduler must update the RegionBegin and RegionEnd positions cached by 403 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler 404 /// design would be to split blocks at scheduling boundaries, but LLVM has a 405 /// general bias against block splitting purely for implementation simplicity. 406 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { 407 if (skipFunction(mf.getFunction())) 408 return false; 409 410 if (EnableMachineSched.getNumOccurrences()) { 411 if (!EnableMachineSched) 412 return false; 413 } else if (!mf.getSubtarget().enableMachineScheduler()) 414 return false; 415 416 LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs())); 417 418 // Initialize the context of the pass. 419 MF = &mf; 420 MLI = &getAnalysis<MachineLoopInfo>(); 421 MDT = &getAnalysis<MachineDominatorTree>(); 422 PassConfig = &getAnalysis<TargetPassConfig>(); 423 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 424 425 LIS = &getAnalysis<LiveIntervals>(); 426 427 if (VerifyScheduling) { 428 LLVM_DEBUG(LIS->dump()); 429 MF->verify(this, "Before machine scheduling."); 430 } 431 RegClassInfo->runOnMachineFunction(*MF); 432 433 // Instantiate the selected scheduler for this target, function, and 434 // optimization level. 435 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); 436 scheduleRegions(*Scheduler, false); 437 438 LLVM_DEBUG(LIS->dump()); 439 if (VerifyScheduling) 440 MF->verify(this, "After machine scheduling."); 441 return true; 442 } 443 444 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { 445 if (skipFunction(mf.getFunction())) 446 return false; 447 448 if (EnablePostRAMachineSched.getNumOccurrences()) { 449 if (!EnablePostRAMachineSched) 450 return false; 451 } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) { 452 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); 453 return false; 454 } 455 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); 456 457 // Initialize the context of the pass. 458 MF = &mf; 459 MLI = &getAnalysis<MachineLoopInfo>(); 460 PassConfig = &getAnalysis<TargetPassConfig>(); 461 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 462 463 if (VerifyScheduling) 464 MF->verify(this, "Before post machine scheduling."); 465 466 // Instantiate the selected scheduler for this target, function, and 467 // optimization level. 468 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); 469 scheduleRegions(*Scheduler, true); 470 471 if (VerifyScheduling) 472 MF->verify(this, "After post machine scheduling."); 473 return true; 474 } 475 476 /// Return true of the given instruction should not be included in a scheduling 477 /// region. 478 /// 479 /// MachineScheduler does not currently support scheduling across calls. To 480 /// handle calls, the DAG builder needs to be modified to create register 481 /// anti/output dependencies on the registers clobbered by the call's regmask 482 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents 483 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce 484 /// the boundary, but there would be no benefit to postRA scheduling across 485 /// calls this late anyway. 486 static bool isSchedBoundary(MachineBasicBlock::iterator MI, 487 MachineBasicBlock *MBB, 488 MachineFunction *MF, 489 const TargetInstrInfo *TII) { 490 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); 491 } 492 493 /// A region of an MBB for scheduling. 494 namespace { 495 struct SchedRegion { 496 /// RegionBegin is the first instruction in the scheduling region, and 497 /// RegionEnd is either MBB->end() or the scheduling boundary after the 498 /// last instruction in the scheduling region. These iterators cannot refer 499 /// to instructions outside of the identified scheduling region because 500 /// those may be reordered before scheduling this region. 501 MachineBasicBlock::iterator RegionBegin; 502 MachineBasicBlock::iterator RegionEnd; 503 unsigned NumRegionInstrs; 504 505 SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E, 506 unsigned N) : 507 RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {} 508 }; 509 } // end anonymous namespace 510 511 using MBBRegionsVector = SmallVector<SchedRegion, 16>; 512 513 static void 514 getSchedRegions(MachineBasicBlock *MBB, 515 MBBRegionsVector &Regions, 516 bool RegionsTopDown) { 517 MachineFunction *MF = MBB->getParent(); 518 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 519 520 MachineBasicBlock::iterator I = nullptr; 521 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 522 RegionEnd != MBB->begin(); RegionEnd = I) { 523 524 // Avoid decrementing RegionEnd for blocks with no terminator. 525 if (RegionEnd != MBB->end() || 526 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { 527 --RegionEnd; 528 } 529 530 // The next region starts above the previous region. Look backward in the 531 // instruction stream until we find the nearest boundary. 532 unsigned NumRegionInstrs = 0; 533 I = RegionEnd; 534 for (;I != MBB->begin(); --I) { 535 MachineInstr &MI = *std::prev(I); 536 if (isSchedBoundary(&MI, &*MBB, MF, TII)) 537 break; 538 if (!MI.isDebugOrPseudoInstr()) { 539 // MBB::size() uses instr_iterator to count. Here we need a bundle to 540 // count as a single instruction. 541 ++NumRegionInstrs; 542 } 543 } 544 545 // It's possible we found a scheduling region that only has debug 546 // instructions. Don't bother scheduling these. 547 if (NumRegionInstrs != 0) 548 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs)); 549 } 550 551 if (RegionsTopDown) 552 std::reverse(Regions.begin(), Regions.end()); 553 } 554 555 /// Main driver for both MachineScheduler and PostMachineScheduler. 556 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, 557 bool FixKillFlags) { 558 // Visit all machine basic blocks. 559 // 560 // TODO: Visit blocks in global postorder or postorder within the bottom-up 561 // loop tree. Then we can optionally compute global RegPressure. 562 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); 563 MBB != MBBEnd; ++MBB) { 564 565 Scheduler.startBlock(&*MBB); 566 567 #ifndef NDEBUG 568 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) 569 continue; 570 if (SchedOnlyBlock.getNumOccurrences() 571 && (int)SchedOnlyBlock != MBB->getNumber()) 572 continue; 573 #endif 574 575 // Break the block into scheduling regions [I, RegionEnd). RegionEnd 576 // points to the scheduling boundary at the bottom of the region. The DAG 577 // does not include RegionEnd, but the region does (i.e. the next 578 // RegionEnd is above the previous RegionBegin). If the current block has 579 // no terminator then RegionEnd == MBB->end() for the bottom region. 580 // 581 // All the regions of MBB are first found and stored in MBBRegions, which 582 // will be processed (MBB) top-down if initialized with true. 583 // 584 // The Scheduler may insert instructions during either schedule() or 585 // exitRegion(), even for empty regions. So the local iterators 'I' and 586 // 'RegionEnd' are invalid across these calls. Instructions must not be 587 // added to other regions than the current one without updating MBBRegions. 588 589 MBBRegionsVector MBBRegions; 590 getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown()); 591 for (const SchedRegion &R : MBBRegions) { 592 MachineBasicBlock::iterator I = R.RegionBegin; 593 MachineBasicBlock::iterator RegionEnd = R.RegionEnd; 594 unsigned NumRegionInstrs = R.NumRegionInstrs; 595 596 // Notify the scheduler of the region, even if we may skip scheduling 597 // it. Perhaps it still needs to be bundled. 598 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); 599 600 // Skip empty scheduling regions (0 or 1 schedulable instructions). 601 if (I == RegionEnd || I == std::prev(RegionEnd)) { 602 // Close the current region. Bundle the terminator if needed. 603 // This invalidates 'RegionEnd' and 'I'. 604 Scheduler.exitRegion(); 605 continue; 606 } 607 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n"); 608 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB) 609 << " " << MBB->getName() << "\n From: " << *I 610 << " To: "; 611 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; 612 else dbgs() << "End\n"; 613 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n'); 614 if (DumpCriticalPathLength) { 615 errs() << MF->getName(); 616 errs() << ":%bb. " << MBB->getNumber(); 617 errs() << " " << MBB->getName() << " \n"; 618 } 619 620 // Schedule a region: possibly reorder instructions. 621 // This invalidates the original region iterators. 622 Scheduler.schedule(); 623 624 // Close the current region. 625 Scheduler.exitRegion(); 626 } 627 Scheduler.finishBlock(); 628 // FIXME: Ideally, no further passes should rely on kill flags. However, 629 // thumb2 size reduction is currently an exception, so the PostMIScheduler 630 // needs to do this. 631 if (FixKillFlags) 632 Scheduler.fixupKills(*MBB); 633 } 634 Scheduler.finalizeSchedule(); 635 } 636 637 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { 638 // unimplemented 639 } 640 641 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 642 LLVM_DUMP_METHOD void ReadyQueue::dump() const { 643 dbgs() << "Queue " << Name << ": "; 644 for (const SUnit *SU : Queue) 645 dbgs() << SU->NodeNum << " "; 646 dbgs() << "\n"; 647 } 648 #endif 649 650 //===----------------------------------------------------------------------===// 651 // ScheduleDAGMI - Basic machine instruction scheduling. This is 652 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for 653 // virtual registers. 654 // ===----------------------------------------------------------------------===/ 655 656 // Provide a vtable anchor. 657 ScheduleDAGMI::~ScheduleDAGMI() = default; 658 659 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When 660 /// NumPredsLeft reaches zero, release the successor node. 661 /// 662 /// FIXME: Adjust SuccSU height based on MinLatency. 663 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { 664 SUnit *SuccSU = SuccEdge->getSUnit(); 665 666 if (SuccEdge->isWeak()) { 667 --SuccSU->WeakPredsLeft; 668 if (SuccEdge->isCluster()) 669 NextClusterSucc = SuccSU; 670 return; 671 } 672 #ifndef NDEBUG 673 if (SuccSU->NumPredsLeft == 0) { 674 dbgs() << "*** Scheduling failed! ***\n"; 675 dumpNode(*SuccSU); 676 dbgs() << " has been released too many times!\n"; 677 llvm_unreachable(nullptr); 678 } 679 #endif 680 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, 681 // CurrCycle may have advanced since then. 682 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()) 683 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); 684 685 --SuccSU->NumPredsLeft; 686 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) 687 SchedImpl->releaseTopNode(SuccSU); 688 } 689 690 /// releaseSuccessors - Call releaseSucc on each of SU's successors. 691 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { 692 for (SDep &Succ : SU->Succs) 693 releaseSucc(SU, &Succ); 694 } 695 696 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When 697 /// NumSuccsLeft reaches zero, release the predecessor node. 698 /// 699 /// FIXME: Adjust PredSU height based on MinLatency. 700 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { 701 SUnit *PredSU = PredEdge->getSUnit(); 702 703 if (PredEdge->isWeak()) { 704 --PredSU->WeakSuccsLeft; 705 if (PredEdge->isCluster()) 706 NextClusterPred = PredSU; 707 return; 708 } 709 #ifndef NDEBUG 710 if (PredSU->NumSuccsLeft == 0) { 711 dbgs() << "*** Scheduling failed! ***\n"; 712 dumpNode(*PredSU); 713 dbgs() << " has been released too many times!\n"; 714 llvm_unreachable(nullptr); 715 } 716 #endif 717 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, 718 // CurrCycle may have advanced since then. 719 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()) 720 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); 721 722 --PredSU->NumSuccsLeft; 723 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) 724 SchedImpl->releaseBottomNode(PredSU); 725 } 726 727 /// releasePredecessors - Call releasePred on each of SU's predecessors. 728 void ScheduleDAGMI::releasePredecessors(SUnit *SU) { 729 for (SDep &Pred : SU->Preds) 730 releasePred(SU, &Pred); 731 } 732 733 void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) { 734 ScheduleDAGInstrs::startBlock(bb); 735 SchedImpl->enterMBB(bb); 736 } 737 738 void ScheduleDAGMI::finishBlock() { 739 SchedImpl->leaveMBB(); 740 ScheduleDAGInstrs::finishBlock(); 741 } 742 743 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 744 /// crossing a scheduling boundary. [begin, end) includes all instructions in 745 /// the region, including the boundary itself and single-instruction regions 746 /// that don't get scheduled. 747 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, 748 MachineBasicBlock::iterator begin, 749 MachineBasicBlock::iterator end, 750 unsigned regioninstrs) 751 { 752 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); 753 754 SchedImpl->initPolicy(begin, end, regioninstrs); 755 } 756 757 /// This is normally called from the main scheduler loop but may also be invoked 758 /// by the scheduling strategy to perform additional code motion. 759 void ScheduleDAGMI::moveInstruction( 760 MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { 761 // Advance RegionBegin if the first instruction moves down. 762 if (&*RegionBegin == MI) 763 ++RegionBegin; 764 765 // Update the instruction stream. 766 BB->splice(InsertPos, BB, MI); 767 768 // Update LiveIntervals 769 if (LIS) 770 LIS->handleMove(*MI, /*UpdateFlags=*/true); 771 772 // Recede RegionBegin if an instruction moves above the first. 773 if (RegionBegin == InsertPos) 774 RegionBegin = MI; 775 } 776 777 bool ScheduleDAGMI::checkSchedLimit() { 778 #if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG) 779 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { 780 CurrentTop = CurrentBottom; 781 return false; 782 } 783 ++NumInstrsScheduled; 784 #endif 785 return true; 786 } 787 788 /// Per-region scheduling driver, called back from 789 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that 790 /// does not consider liveness or register pressure. It is useful for PostRA 791 /// scheduling and potentially other custom schedulers. 792 void ScheduleDAGMI::schedule() { 793 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); 794 LLVM_DEBUG(SchedImpl->dumpPolicy()); 795 796 // Build the DAG. 797 buildSchedGraph(AA); 798 799 postProcessDAG(); 800 801 SmallVector<SUnit*, 8> TopRoots, BotRoots; 802 findRootsAndBiasEdges(TopRoots, BotRoots); 803 804 LLVM_DEBUG(dump()); 805 if (PrintDAGs) dump(); 806 if (ViewMISchedDAGs) viewGraph(); 807 808 // Initialize the strategy before modifying the DAG. 809 // This may initialize a DFSResult to be used for queue priority. 810 SchedImpl->initialize(this); 811 812 // Initialize ready queues now that the DAG and priority data are finalized. 813 initQueues(TopRoots, BotRoots); 814 815 bool IsTopNode = false; 816 while (true) { 817 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); 818 SUnit *SU = SchedImpl->pickNode(IsTopNode); 819 if (!SU) break; 820 821 assert(!SU->isScheduled && "Node already scheduled"); 822 if (!checkSchedLimit()) 823 break; 824 825 MachineInstr *MI = SU->getInstr(); 826 if (IsTopNode) { 827 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 828 if (&*CurrentTop == MI) 829 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 830 else 831 moveInstruction(MI, CurrentTop); 832 } else { 833 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 834 MachineBasicBlock::iterator priorII = 835 priorNonDebug(CurrentBottom, CurrentTop); 836 if (&*priorII == MI) 837 CurrentBottom = priorII; 838 else { 839 if (&*CurrentTop == MI) 840 CurrentTop = nextIfDebug(++CurrentTop, priorII); 841 moveInstruction(MI, CurrentBottom); 842 CurrentBottom = MI; 843 } 844 } 845 // Notify the scheduling strategy before updating the DAG. 846 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues 847 // runs, it can then use the accurate ReadyCycle time to determine whether 848 // newly released nodes can move to the readyQ. 849 SchedImpl->schedNode(SU, IsTopNode); 850 851 updateQueues(SU, IsTopNode); 852 } 853 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 854 855 placeDebugValues(); 856 857 LLVM_DEBUG({ 858 dbgs() << "*** Final schedule for " 859 << printMBBReference(*begin()->getParent()) << " ***\n"; 860 dumpSchedule(); 861 dbgs() << '\n'; 862 }); 863 } 864 865 /// Apply each ScheduleDAGMutation step in order. 866 void ScheduleDAGMI::postProcessDAG() { 867 for (auto &m : Mutations) 868 m->apply(this); 869 } 870 871 void ScheduleDAGMI:: 872 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 873 SmallVectorImpl<SUnit*> &BotRoots) { 874 for (SUnit &SU : SUnits) { 875 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits"); 876 877 // Order predecessors so DFSResult follows the critical path. 878 SU.biasCriticalPath(); 879 880 // A SUnit is ready to top schedule if it has no predecessors. 881 if (!SU.NumPredsLeft) 882 TopRoots.push_back(&SU); 883 // A SUnit is ready to bottom schedule if it has no successors. 884 if (!SU.NumSuccsLeft) 885 BotRoots.push_back(&SU); 886 } 887 ExitSU.biasCriticalPath(); 888 } 889 890 /// Identify DAG roots and setup scheduler queues. 891 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, 892 ArrayRef<SUnit*> BotRoots) { 893 NextClusterSucc = nullptr; 894 NextClusterPred = nullptr; 895 896 // Release all DAG roots for scheduling, not including EntrySU/ExitSU. 897 // 898 // Nodes with unreleased weak edges can still be roots. 899 // Release top roots in forward order. 900 for (SUnit *SU : TopRoots) 901 SchedImpl->releaseTopNode(SU); 902 903 // Release bottom roots in reverse order so the higher priority nodes appear 904 // first. This is more natural and slightly more efficient. 905 for (SmallVectorImpl<SUnit*>::const_reverse_iterator 906 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { 907 SchedImpl->releaseBottomNode(*I); 908 } 909 910 releaseSuccessors(&EntrySU); 911 releasePredecessors(&ExitSU); 912 913 SchedImpl->registerRoots(); 914 915 // Advance past initial DebugValues. 916 CurrentTop = nextIfDebug(RegionBegin, RegionEnd); 917 CurrentBottom = RegionEnd; 918 } 919 920 /// Update scheduler queues after scheduling an instruction. 921 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { 922 // Release dependent instructions for scheduling. 923 if (IsTopNode) 924 releaseSuccessors(SU); 925 else 926 releasePredecessors(SU); 927 928 SU->isScheduled = true; 929 } 930 931 /// Reinsert any remaining debug_values, just like the PostRA scheduler. 932 void ScheduleDAGMI::placeDebugValues() { 933 // If first instruction was a DBG_VALUE then put it back. 934 if (FirstDbgValue) { 935 BB->splice(RegionBegin, BB, FirstDbgValue); 936 RegionBegin = FirstDbgValue; 937 } 938 939 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator 940 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { 941 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); 942 MachineInstr *DbgValue = P.first; 943 MachineBasicBlock::iterator OrigPrevMI = P.second; 944 if (&*RegionBegin == DbgValue) 945 ++RegionBegin; 946 BB->splice(std::next(OrigPrevMI), BB, DbgValue); 947 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd) 948 RegionEnd = DbgValue; 949 } 950 } 951 952 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 953 static const char *scheduleTableLegend = " i: issue\n x: resource booked"; 954 955 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const { 956 // Nothing to show if there is no or just one instruction. 957 if (BB->size() < 2) 958 return; 959 960 dbgs() << " * Schedule table (TopDown):\n"; 961 dbgs() << scheduleTableLegend << "\n"; 962 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle; 963 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle; 964 for (MachineInstr &MI : *this) { 965 SUnit *SU = getSUnit(&MI); 966 if (!SU) 967 continue; 968 const MCSchedClassDesc *SC = getSchedClass(SU); 969 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC), 970 PE = SchedModel.getWriteProcResEnd(SC); 971 PI != PE; ++PI) { 972 if (SU->TopReadyCycle + PI->Cycles - 1 > LastCycle) 973 LastCycle = SU->TopReadyCycle + PI->Cycles - 1; 974 } 975 } 976 // Print the header with the cycles 977 dbgs() << llvm::left_justify("Cycle", HeaderColWidth); 978 for (unsigned C = FirstCycle; C <= LastCycle; ++C) 979 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth); 980 dbgs() << "|\n"; 981 982 for (MachineInstr &MI : *this) { 983 SUnit *SU = getSUnit(&MI); 984 if (!SU) { 985 dbgs() << "Missing SUnit\n"; 986 continue; 987 } 988 std::string NodeName("SU("); 989 NodeName += std::to_string(SU->NodeNum) + ")"; 990 dbgs() << llvm::left_justify(NodeName, HeaderColWidth); 991 unsigned C = FirstCycle; 992 for (; C <= LastCycle; ++C) { 993 if (C == SU->TopReadyCycle) 994 dbgs() << llvm::left_justify("| i", ColWidth); 995 else 996 dbgs() << llvm::left_justify("|", ColWidth); 997 } 998 dbgs() << "|\n"; 999 const MCSchedClassDesc *SC = getSchedClass(SU); 1000 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC), 1001 PE = SchedModel.getWriteProcResEnd(SC); 1002 PI != PE; ++PI) { 1003 C = FirstCycle; 1004 const std::string ResName = 1005 SchedModel.getResourceName(PI->ProcResourceIdx); 1006 dbgs() << llvm::left_justify(ResName, HeaderColWidth); 1007 for (; C < SU->TopReadyCycle; ++C) { 1008 dbgs() << llvm::left_justify("|", ColWidth); 1009 } 1010 for (unsigned i = 0; i < PI->Cycles; ++i, ++C) 1011 dbgs() << llvm::left_justify("| x", ColWidth); 1012 while (C++ <= LastCycle) 1013 dbgs() << llvm::left_justify("|", ColWidth); 1014 // Place end char 1015 dbgs() << "| \n"; 1016 } 1017 } 1018 } 1019 1020 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const { 1021 // Nothing to show if there is no or just one instruction. 1022 if (BB->size() < 2) 1023 return; 1024 1025 dbgs() << " * Schedule table (BottomUp):\n"; 1026 dbgs() << scheduleTableLegend << "\n"; 1027 1028 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle; 1029 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle; 1030 for (MachineInstr &MI : *this) { 1031 SUnit *SU = getSUnit(&MI); 1032 if (!SU) 1033 continue; 1034 const MCSchedClassDesc *SC = getSchedClass(SU); 1035 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC), 1036 PE = SchedModel.getWriteProcResEnd(SC); 1037 PI != PE; ++PI) { 1038 if ((int)SU->BotReadyCycle - PI->Cycles + 1 < LastCycle) 1039 LastCycle = (int)SU->BotReadyCycle - PI->Cycles + 1; 1040 } 1041 } 1042 // Print the header with the cycles 1043 dbgs() << llvm::left_justify("Cycle", HeaderColWidth); 1044 for (int C = FirstCycle; C >= LastCycle; --C) 1045 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth); 1046 dbgs() << "|\n"; 1047 1048 for (MachineInstr &MI : *this) { 1049 SUnit *SU = getSUnit(&MI); 1050 if (!SU) { 1051 dbgs() << "Missing SUnit\n"; 1052 continue; 1053 } 1054 std::string NodeName("SU("); 1055 NodeName += std::to_string(SU->NodeNum) + ")"; 1056 dbgs() << llvm::left_justify(NodeName, HeaderColWidth); 1057 int C = FirstCycle; 1058 for (; C >= LastCycle; --C) { 1059 if (C == (int)SU->BotReadyCycle) 1060 dbgs() << llvm::left_justify("| i", ColWidth); 1061 else 1062 dbgs() << llvm::left_justify("|", ColWidth); 1063 } 1064 dbgs() << "|\n"; 1065 const MCSchedClassDesc *SC = getSchedClass(SU); 1066 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC), 1067 PE = SchedModel.getWriteProcResEnd(SC); 1068 PI != PE; ++PI) { 1069 C = FirstCycle; 1070 const std::string ResName = 1071 SchedModel.getResourceName(PI->ProcResourceIdx); 1072 dbgs() << llvm::left_justify(ResName, HeaderColWidth); 1073 for (; C > (int)SU->BotReadyCycle; --C) { 1074 dbgs() << llvm::left_justify("|", ColWidth); 1075 } 1076 for (unsigned i = 0; i < PI->Cycles; ++i, --C) 1077 dbgs() << llvm::left_justify("| x", ColWidth); 1078 while (C-- >= LastCycle) 1079 dbgs() << llvm::left_justify("|", ColWidth); 1080 // Place end char 1081 dbgs() << "| \n"; 1082 } 1083 } 1084 } 1085 #endif 1086 1087 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1088 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const { 1089 if (MISchedDumpScheduleTrace) { 1090 if (ForceTopDown) 1091 dumpScheduleTraceTopDown(); 1092 else if (ForceBottomUp) 1093 dumpScheduleTraceBottomUp(); 1094 else { 1095 dbgs() << "* Schedule table (Bidirectional): not implemented\n"; 1096 } 1097 } 1098 1099 for (MachineInstr &MI : *this) { 1100 if (SUnit *SU = getSUnit(&MI)) 1101 dumpNode(*SU); 1102 else 1103 dbgs() << "Missing SUnit\n"; 1104 } 1105 } 1106 #endif 1107 1108 //===----------------------------------------------------------------------===// 1109 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals 1110 // preservation. 1111 //===----------------------------------------------------------------------===// 1112 1113 ScheduleDAGMILive::~ScheduleDAGMILive() { 1114 delete DFSResult; 1115 } 1116 1117 void ScheduleDAGMILive::collectVRegUses(SUnit &SU) { 1118 const MachineInstr &MI = *SU.getInstr(); 1119 for (const MachineOperand &MO : MI.operands()) { 1120 if (!MO.isReg()) 1121 continue; 1122 if (!MO.readsReg()) 1123 continue; 1124 if (TrackLaneMasks && !MO.isUse()) 1125 continue; 1126 1127 Register Reg = MO.getReg(); 1128 if (!Reg.isVirtual()) 1129 continue; 1130 1131 // Ignore re-defs. 1132 if (TrackLaneMasks) { 1133 bool FoundDef = false; 1134 for (const MachineOperand &MO2 : MI.all_defs()) { 1135 if (MO2.getReg() == Reg && !MO2.isDead()) { 1136 FoundDef = true; 1137 break; 1138 } 1139 } 1140 if (FoundDef) 1141 continue; 1142 } 1143 1144 // Record this local VReg use. 1145 VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); 1146 for (; UI != VRegUses.end(); ++UI) { 1147 if (UI->SU == &SU) 1148 break; 1149 } 1150 if (UI == VRegUses.end()) 1151 VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU)); 1152 } 1153 } 1154 1155 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after 1156 /// crossing a scheduling boundary. [begin, end) includes all instructions in 1157 /// the region, including the boundary itself and single-instruction regions 1158 /// that don't get scheduled. 1159 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, 1160 MachineBasicBlock::iterator begin, 1161 MachineBasicBlock::iterator end, 1162 unsigned regioninstrs) 1163 { 1164 // ScheduleDAGMI initializes SchedImpl's per-region policy. 1165 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); 1166 1167 // For convenience remember the end of the liveness region. 1168 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd); 1169 1170 SUPressureDiffs.clear(); 1171 1172 ShouldTrackPressure = SchedImpl->shouldTrackPressure(); 1173 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks(); 1174 1175 assert((!ShouldTrackLaneMasks || ShouldTrackPressure) && 1176 "ShouldTrackLaneMasks requires ShouldTrackPressure"); 1177 } 1178 1179 // Setup the register pressure trackers for the top scheduled and bottom 1180 // scheduled regions. 1181 void ScheduleDAGMILive::initRegPressure() { 1182 VRegUses.clear(); 1183 VRegUses.setUniverse(MRI.getNumVirtRegs()); 1184 for (SUnit &SU : SUnits) 1185 collectVRegUses(SU); 1186 1187 TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin, 1188 ShouldTrackLaneMasks, false); 1189 BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, 1190 ShouldTrackLaneMasks, false); 1191 1192 // Close the RPTracker to finalize live ins. 1193 RPTracker.closeRegion(); 1194 1195 LLVM_DEBUG(RPTracker.dump()); 1196 1197 // Initialize the live ins and live outs. 1198 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); 1199 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); 1200 1201 // Close one end of the tracker so we can call 1202 // getMaxUpward/DownwardPressureDelta before advancing across any 1203 // instructions. This converts currently live regs into live ins/outs. 1204 TopRPTracker.closeTop(); 1205 BotRPTracker.closeBottom(); 1206 1207 BotRPTracker.initLiveThru(RPTracker); 1208 if (!BotRPTracker.getLiveThru().empty()) { 1209 TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); 1210 LLVM_DEBUG(dbgs() << "Live Thru: "; 1211 dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); 1212 }; 1213 1214 // For each live out vreg reduce the pressure change associated with other 1215 // uses of the same vreg below the live-out reaching def. 1216 updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); 1217 1218 // Account for liveness generated by the region boundary. 1219 if (LiveRegionEnd != RegionEnd) { 1220 SmallVector<RegisterMaskPair, 8> LiveUses; 1221 BotRPTracker.recede(&LiveUses); 1222 updatePressureDiffs(LiveUses); 1223 } 1224 1225 LLVM_DEBUG(dbgs() << "Top Pressure:\n"; 1226 dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); 1227 dbgs() << "Bottom Pressure:\n"; 1228 dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);); 1229 1230 assert((BotRPTracker.getPos() == RegionEnd || 1231 (RegionEnd->isDebugInstr() && 1232 BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) && 1233 "Can't find the region bottom"); 1234 1235 // Cache the list of excess pressure sets in this region. This will also track 1236 // the max pressure in the scheduled code for these sets. 1237 RegionCriticalPSets.clear(); 1238 const std::vector<unsigned> &RegionPressure = 1239 RPTracker.getPressure().MaxSetPressure; 1240 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { 1241 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); 1242 if (RegionPressure[i] > Limit) { 1243 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit 1244 << " Actual " << RegionPressure[i] << "\n"); 1245 RegionCriticalPSets.push_back(PressureChange(i)); 1246 } 1247 } 1248 LLVM_DEBUG(dbgs() << "Excess PSets: "; 1249 for (const PressureChange &RCPS 1250 : RegionCriticalPSets) dbgs() 1251 << TRI->getRegPressureSetName(RCPS.getPSet()) << " "; 1252 dbgs() << "\n"); 1253 } 1254 1255 void ScheduleDAGMILive:: 1256 updateScheduledPressure(const SUnit *SU, 1257 const std::vector<unsigned> &NewMaxPressure) { 1258 const PressureDiff &PDiff = getPressureDiff(SU); 1259 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); 1260 for (const PressureChange &PC : PDiff) { 1261 if (!PC.isValid()) 1262 break; 1263 unsigned ID = PC.getPSet(); 1264 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) 1265 ++CritIdx; 1266 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { 1267 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() 1268 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max()) 1269 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); 1270 } 1271 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); 1272 if (NewMaxPressure[ID] >= Limit - 2) { 1273 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " 1274 << NewMaxPressure[ID] 1275 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") 1276 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID] 1277 << " livethru)\n"); 1278 } 1279 } 1280 } 1281 1282 /// Update the PressureDiff array for liveness after scheduling this 1283 /// instruction. 1284 void ScheduleDAGMILive::updatePressureDiffs( 1285 ArrayRef<RegisterMaskPair> LiveUses) { 1286 for (const RegisterMaskPair &P : LiveUses) { 1287 Register Reg = P.RegUnit; 1288 /// FIXME: Currently assuming single-use physregs. 1289 if (!Reg.isVirtual()) 1290 continue; 1291 1292 if (ShouldTrackLaneMasks) { 1293 // If the register has just become live then other uses won't change 1294 // this fact anymore => decrement pressure. 1295 // If the register has just become dead then other uses make it come 1296 // back to life => increment pressure. 1297 bool Decrement = P.LaneMask.any(); 1298 1299 for (const VReg2SUnit &V2SU 1300 : make_range(VRegUses.find(Reg), VRegUses.end())) { 1301 SUnit &SU = *V2SU.SU; 1302 if (SU.isScheduled || &SU == &ExitSU) 1303 continue; 1304 1305 PressureDiff &PDiff = getPressureDiff(&SU); 1306 PDiff.addPressureChange(Reg, Decrement, &MRI); 1307 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") " 1308 << printReg(Reg, TRI) << ':' 1309 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr(); 1310 dbgs() << " to "; PDiff.dump(*TRI);); 1311 } 1312 } else { 1313 assert(P.LaneMask.any()); 1314 LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n"); 1315 // This may be called before CurrentBottom has been initialized. However, 1316 // BotRPTracker must have a valid position. We want the value live into the 1317 // instruction or live out of the block, so ask for the previous 1318 // instruction's live-out. 1319 const LiveInterval &LI = LIS->getInterval(Reg); 1320 VNInfo *VNI; 1321 MachineBasicBlock::const_iterator I = 1322 nextIfDebug(BotRPTracker.getPos(), BB->end()); 1323 if (I == BB->end()) 1324 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 1325 else { 1326 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I)); 1327 VNI = LRQ.valueIn(); 1328 } 1329 // RegisterPressureTracker guarantees that readsReg is true for LiveUses. 1330 assert(VNI && "No live value at use."); 1331 for (const VReg2SUnit &V2SU 1332 : make_range(VRegUses.find(Reg), VRegUses.end())) { 1333 SUnit *SU = V2SU.SU; 1334 // If this use comes before the reaching def, it cannot be a last use, 1335 // so decrease its pressure change. 1336 if (!SU->isScheduled && SU != &ExitSU) { 1337 LiveQueryResult LRQ = 1338 LI.Query(LIS->getInstructionIndex(*SU->getInstr())); 1339 if (LRQ.valueIn() == VNI) { 1340 PressureDiff &PDiff = getPressureDiff(SU); 1341 PDiff.addPressureChange(Reg, true, &MRI); 1342 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " 1343 << *SU->getInstr(); 1344 dbgs() << " to "; PDiff.dump(*TRI);); 1345 } 1346 } 1347 } 1348 } 1349 } 1350 } 1351 1352 void ScheduleDAGMILive::dump() const { 1353 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1354 if (EntrySU.getInstr() != nullptr) 1355 dumpNodeAll(EntrySU); 1356 for (const SUnit &SU : SUnits) { 1357 dumpNodeAll(SU); 1358 if (ShouldTrackPressure) { 1359 dbgs() << " Pressure Diff : "; 1360 getPressureDiff(&SU).dump(*TRI); 1361 } 1362 dbgs() << " Single Issue : "; 1363 if (SchedModel.mustBeginGroup(SU.getInstr()) && 1364 SchedModel.mustEndGroup(SU.getInstr())) 1365 dbgs() << "true;"; 1366 else 1367 dbgs() << "false;"; 1368 dbgs() << '\n'; 1369 } 1370 if (ExitSU.getInstr() != nullptr) 1371 dumpNodeAll(ExitSU); 1372 #endif 1373 } 1374 1375 /// schedule - Called back from MachineScheduler::runOnMachineFunction 1376 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 1377 /// only includes instructions that have DAG nodes, not scheduling boundaries. 1378 /// 1379 /// This is a skeletal driver, with all the functionality pushed into helpers, 1380 /// so that it can be easily extended by experimental schedulers. Generally, 1381 /// implementing MachineSchedStrategy should be sufficient to implement a new 1382 /// scheduling algorithm. However, if a scheduler further subclasses 1383 /// ScheduleDAGMILive then it will want to override this virtual method in order 1384 /// to update any specialized state. 1385 void ScheduleDAGMILive::schedule() { 1386 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); 1387 LLVM_DEBUG(SchedImpl->dumpPolicy()); 1388 buildDAGWithRegPressure(); 1389 1390 postProcessDAG(); 1391 1392 SmallVector<SUnit*, 8> TopRoots, BotRoots; 1393 findRootsAndBiasEdges(TopRoots, BotRoots); 1394 1395 // Initialize the strategy before modifying the DAG. 1396 // This may initialize a DFSResult to be used for queue priority. 1397 SchedImpl->initialize(this); 1398 1399 LLVM_DEBUG(dump()); 1400 if (PrintDAGs) dump(); 1401 if (ViewMISchedDAGs) viewGraph(); 1402 1403 // Initialize ready queues now that the DAG and priority data are finalized. 1404 initQueues(TopRoots, BotRoots); 1405 1406 bool IsTopNode = false; 1407 while (true) { 1408 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); 1409 SUnit *SU = SchedImpl->pickNode(IsTopNode); 1410 if (!SU) break; 1411 1412 assert(!SU->isScheduled && "Node already scheduled"); 1413 if (!checkSchedLimit()) 1414 break; 1415 1416 scheduleMI(SU, IsTopNode); 1417 1418 if (DFSResult) { 1419 unsigned SubtreeID = DFSResult->getSubtreeID(SU); 1420 if (!ScheduledTrees.test(SubtreeID)) { 1421 ScheduledTrees.set(SubtreeID); 1422 DFSResult->scheduleTree(SubtreeID); 1423 SchedImpl->scheduleTree(SubtreeID); 1424 } 1425 } 1426 1427 // Notify the scheduling strategy after updating the DAG. 1428 SchedImpl->schedNode(SU, IsTopNode); 1429 1430 updateQueues(SU, IsTopNode); 1431 } 1432 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 1433 1434 placeDebugValues(); 1435 1436 LLVM_DEBUG({ 1437 dbgs() << "*** Final schedule for " 1438 << printMBBReference(*begin()->getParent()) << " ***\n"; 1439 dumpSchedule(); 1440 dbgs() << '\n'; 1441 }); 1442 } 1443 1444 /// Build the DAG and setup three register pressure trackers. 1445 void ScheduleDAGMILive::buildDAGWithRegPressure() { 1446 if (!ShouldTrackPressure) { 1447 RPTracker.reset(); 1448 RegionCriticalPSets.clear(); 1449 buildSchedGraph(AA); 1450 return; 1451 } 1452 1453 // Initialize the register pressure tracker used by buildSchedGraph. 1454 RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, 1455 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true); 1456 1457 // Account for liveness generate by the region boundary. 1458 if (LiveRegionEnd != RegionEnd) 1459 RPTracker.recede(); 1460 1461 // Build the DAG, and compute current register pressure. 1462 buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks); 1463 1464 // Initialize top/bottom trackers after computing region pressure. 1465 initRegPressure(); 1466 } 1467 1468 void ScheduleDAGMILive::computeDFSResult() { 1469 if (!DFSResult) 1470 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); 1471 DFSResult->clear(); 1472 ScheduledTrees.clear(); 1473 DFSResult->resize(SUnits.size()); 1474 DFSResult->compute(SUnits); 1475 ScheduledTrees.resize(DFSResult->getNumSubtrees()); 1476 } 1477 1478 /// Compute the max cyclic critical path through the DAG. The scheduling DAG 1479 /// only provides the critical path for single block loops. To handle loops that 1480 /// span blocks, we could use the vreg path latencies provided by 1481 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently 1482 /// available for use in the scheduler. 1483 /// 1484 /// The cyclic path estimation identifies a def-use pair that crosses the back 1485 /// edge and considers the depth and height of the nodes. For example, consider 1486 /// the following instruction sequence where each instruction has unit latency 1487 /// and defines an eponymous virtual register: 1488 /// 1489 /// a->b(a,c)->c(b)->d(c)->exit 1490 /// 1491 /// The cyclic critical path is a two cycles: b->c->b 1492 /// The acyclic critical path is four cycles: a->b->c->d->exit 1493 /// LiveOutHeight = height(c) = len(c->d->exit) = 2 1494 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 1495 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 1496 /// LiveInDepth = depth(b) = len(a->b) = 1 1497 /// 1498 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2 1499 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2 1500 /// CyclicCriticalPath = min(2, 2) = 2 1501 /// 1502 /// This could be relevant to PostRA scheduling, but is currently implemented 1503 /// assuming LiveIntervals. 1504 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { 1505 // This only applies to single block loop. 1506 if (!BB->isSuccessor(BB)) 1507 return 0; 1508 1509 unsigned MaxCyclicLatency = 0; 1510 // Visit each live out vreg def to find def/use pairs that cross iterations. 1511 for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) { 1512 Register Reg = P.RegUnit; 1513 if (!Reg.isVirtual()) 1514 continue; 1515 const LiveInterval &LI = LIS->getInterval(Reg); 1516 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); 1517 if (!DefVNI) 1518 continue; 1519 1520 MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); 1521 const SUnit *DefSU = getSUnit(DefMI); 1522 if (!DefSU) 1523 continue; 1524 1525 unsigned LiveOutHeight = DefSU->getHeight(); 1526 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; 1527 // Visit all local users of the vreg def. 1528 for (const VReg2SUnit &V2SU 1529 : make_range(VRegUses.find(Reg), VRegUses.end())) { 1530 SUnit *SU = V2SU.SU; 1531 if (SU == &ExitSU) 1532 continue; 1533 1534 // Only consider uses of the phi. 1535 LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr())); 1536 if (!LRQ.valueIn()->isPHIDef()) 1537 continue; 1538 1539 // Assume that a path spanning two iterations is a cycle, which could 1540 // overestimate in strange cases. This allows cyclic latency to be 1541 // estimated as the minimum slack of the vreg's depth or height. 1542 unsigned CyclicLatency = 0; 1543 if (LiveOutDepth > SU->getDepth()) 1544 CyclicLatency = LiveOutDepth - SU->getDepth(); 1545 1546 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency; 1547 if (LiveInHeight > LiveOutHeight) { 1548 if (LiveInHeight - LiveOutHeight < CyclicLatency) 1549 CyclicLatency = LiveInHeight - LiveOutHeight; 1550 } else 1551 CyclicLatency = 0; 1552 1553 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" 1554 << SU->NodeNum << ") = " << CyclicLatency << "c\n"); 1555 if (CyclicLatency > MaxCyclicLatency) 1556 MaxCyclicLatency = CyclicLatency; 1557 } 1558 } 1559 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); 1560 return MaxCyclicLatency; 1561 } 1562 1563 /// Release ExitSU predecessors and setup scheduler queues. Re-position 1564 /// the Top RP tracker in case the region beginning has changed. 1565 void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots, 1566 ArrayRef<SUnit*> BotRoots) { 1567 ScheduleDAGMI::initQueues(TopRoots, BotRoots); 1568 if (ShouldTrackPressure) { 1569 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); 1570 TopRPTracker.setPos(CurrentTop); 1571 } 1572 } 1573 1574 /// Move an instruction and update register pressure. 1575 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { 1576 // Move the instruction to its new location in the instruction stream. 1577 MachineInstr *MI = SU->getInstr(); 1578 1579 if (IsTopNode) { 1580 assert(SU->isTopReady() && "node still has unscheduled dependencies"); 1581 if (&*CurrentTop == MI) 1582 CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); 1583 else { 1584 moveInstruction(MI, CurrentTop); 1585 TopRPTracker.setPos(MI); 1586 } 1587 1588 if (ShouldTrackPressure) { 1589 // Update top scheduled pressure. 1590 RegisterOperands RegOpers; 1591 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 1592 if (ShouldTrackLaneMasks) { 1593 // Adjust liveness and add missing dead+read-undef flags. 1594 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 1595 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 1596 } else { 1597 // Adjust for missing dead-def flags. 1598 RegOpers.detectDeadDefs(*MI, *LIS); 1599 } 1600 1601 TopRPTracker.advance(RegOpers); 1602 assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); 1603 LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure( 1604 TopRPTracker.getRegSetPressureAtPos(), TRI);); 1605 1606 updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); 1607 } 1608 } else { 1609 assert(SU->isBottomReady() && "node still has unscheduled dependencies"); 1610 MachineBasicBlock::iterator priorII = 1611 priorNonDebug(CurrentBottom, CurrentTop); 1612 if (&*priorII == MI) 1613 CurrentBottom = priorII; 1614 else { 1615 if (&*CurrentTop == MI) { 1616 CurrentTop = nextIfDebug(++CurrentTop, priorII); 1617 TopRPTracker.setPos(CurrentTop); 1618 } 1619 moveInstruction(MI, CurrentBottom); 1620 CurrentBottom = MI; 1621 BotRPTracker.setPos(CurrentBottom); 1622 } 1623 if (ShouldTrackPressure) { 1624 RegisterOperands RegOpers; 1625 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false); 1626 if (ShouldTrackLaneMasks) { 1627 // Adjust liveness and add missing dead+read-undef flags. 1628 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot(); 1629 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI); 1630 } else { 1631 // Adjust for missing dead-def flags. 1632 RegOpers.detectDeadDefs(*MI, *LIS); 1633 } 1634 1635 if (BotRPTracker.getPos() != CurrentBottom) 1636 BotRPTracker.recedeSkipDebugValues(); 1637 SmallVector<RegisterMaskPair, 8> LiveUses; 1638 BotRPTracker.recede(RegOpers, &LiveUses); 1639 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); 1640 LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure( 1641 BotRPTracker.getRegSetPressureAtPos(), TRI);); 1642 1643 updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); 1644 updatePressureDiffs(LiveUses); 1645 } 1646 } 1647 } 1648 1649 //===----------------------------------------------------------------------===// 1650 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores. 1651 //===----------------------------------------------------------------------===// 1652 1653 namespace { 1654 1655 /// Post-process the DAG to create cluster edges between neighboring 1656 /// loads or between neighboring stores. 1657 class BaseMemOpClusterMutation : public ScheduleDAGMutation { 1658 struct MemOpInfo { 1659 SUnit *SU; 1660 SmallVector<const MachineOperand *, 4> BaseOps; 1661 int64_t Offset; 1662 unsigned Width; 1663 1664 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps, 1665 int64_t Offset, unsigned Width) 1666 : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset), 1667 Width(Width) {} 1668 1669 static bool Compare(const MachineOperand *const &A, 1670 const MachineOperand *const &B) { 1671 if (A->getType() != B->getType()) 1672 return A->getType() < B->getType(); 1673 if (A->isReg()) 1674 return A->getReg() < B->getReg(); 1675 if (A->isFI()) { 1676 const MachineFunction &MF = *A->getParent()->getParent()->getParent(); 1677 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); 1678 bool StackGrowsDown = TFI.getStackGrowthDirection() == 1679 TargetFrameLowering::StackGrowsDown; 1680 return StackGrowsDown ? A->getIndex() > B->getIndex() 1681 : A->getIndex() < B->getIndex(); 1682 } 1683 1684 llvm_unreachable("MemOpClusterMutation only supports register or frame " 1685 "index bases."); 1686 } 1687 1688 bool operator<(const MemOpInfo &RHS) const { 1689 // FIXME: Don't compare everything twice. Maybe use C++20 three way 1690 // comparison instead when it's available. 1691 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(), 1692 RHS.BaseOps.begin(), RHS.BaseOps.end(), 1693 Compare)) 1694 return true; 1695 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(), 1696 BaseOps.begin(), BaseOps.end(), Compare)) 1697 return false; 1698 if (Offset != RHS.Offset) 1699 return Offset < RHS.Offset; 1700 return SU->NodeNum < RHS.SU->NodeNum; 1701 } 1702 }; 1703 1704 const TargetInstrInfo *TII; 1705 const TargetRegisterInfo *TRI; 1706 bool IsLoad; 1707 1708 public: 1709 BaseMemOpClusterMutation(const TargetInstrInfo *tii, 1710 const TargetRegisterInfo *tri, bool IsLoad) 1711 : TII(tii), TRI(tri), IsLoad(IsLoad) {} 1712 1713 void apply(ScheduleDAGInstrs *DAGInstrs) override; 1714 1715 protected: 1716 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster, 1717 ScheduleDAGInstrs *DAG); 1718 void collectMemOpRecords(std::vector<SUnit> &SUnits, 1719 SmallVectorImpl<MemOpInfo> &MemOpRecords); 1720 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG, 1721 DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups); 1722 }; 1723 1724 class StoreClusterMutation : public BaseMemOpClusterMutation { 1725 public: 1726 StoreClusterMutation(const TargetInstrInfo *tii, 1727 const TargetRegisterInfo *tri) 1728 : BaseMemOpClusterMutation(tii, tri, false) {} 1729 }; 1730 1731 class LoadClusterMutation : public BaseMemOpClusterMutation { 1732 public: 1733 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri) 1734 : BaseMemOpClusterMutation(tii, tri, true) {} 1735 }; 1736 1737 } // end anonymous namespace 1738 1739 namespace llvm { 1740 1741 std::unique_ptr<ScheduleDAGMutation> 1742 createLoadClusterDAGMutation(const TargetInstrInfo *TII, 1743 const TargetRegisterInfo *TRI) { 1744 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI) 1745 : nullptr; 1746 } 1747 1748 std::unique_ptr<ScheduleDAGMutation> 1749 createStoreClusterDAGMutation(const TargetInstrInfo *TII, 1750 const TargetRegisterInfo *TRI) { 1751 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI) 1752 : nullptr; 1753 } 1754 1755 } // end namespace llvm 1756 1757 // Sorting all the loads/stores first, then for each load/store, checking the 1758 // following load/store one by one, until reach the first non-dependent one and 1759 // call target hook to see if they can cluster. 1760 // If FastCluster is enabled, we assume that, all the loads/stores have been 1761 // preprocessed and now, they didn't have dependencies on each other. 1762 void BaseMemOpClusterMutation::clusterNeighboringMemOps( 1763 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster, 1764 ScheduleDAGInstrs *DAG) { 1765 // Keep track of the current cluster length and bytes for each SUnit. 1766 DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo; 1767 1768 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to 1769 // cluster mem ops collected within `MemOpRecords` array. 1770 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) { 1771 // Decision to cluster mem ops is taken based on target dependent logic 1772 auto MemOpa = MemOpRecords[Idx]; 1773 1774 // Seek for the next load/store to do the cluster. 1775 unsigned NextIdx = Idx + 1; 1776 for (; NextIdx < End; ++NextIdx) 1777 // Skip if MemOpb has been clustered already or has dependency with 1778 // MemOpa. 1779 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) && 1780 (FastCluster || 1781 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) && 1782 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU)))) 1783 break; 1784 if (NextIdx == End) 1785 continue; 1786 1787 auto MemOpb = MemOpRecords[NextIdx]; 1788 unsigned ClusterLength = 2; 1789 unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width; 1790 if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) { 1791 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1; 1792 CurrentClusterBytes = 1793 SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width; 1794 } 1795 1796 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength, 1797 CurrentClusterBytes)) 1798 continue; 1799 1800 SUnit *SUa = MemOpa.SU; 1801 SUnit *SUb = MemOpb.SU; 1802 if (SUa->NodeNum > SUb->NodeNum) 1803 std::swap(SUa, SUb); 1804 1805 // FIXME: Is this check really required? 1806 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) 1807 continue; 1808 1809 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU(" 1810 << SUb->NodeNum << ")\n"); 1811 ++NumClustered; 1812 1813 if (IsLoad) { 1814 // Copy successor edges from SUa to SUb. Interleaving computation 1815 // dependent on SUa can prevent load combining due to register reuse. 1816 // Predecessor edges do not need to be copied from SUb to SUa since 1817 // nearby loads should have effectively the same inputs. 1818 for (const SDep &Succ : SUa->Succs) { 1819 if (Succ.getSUnit() == SUb) 1820 continue; 1821 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum 1822 << ")\n"); 1823 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); 1824 } 1825 } else { 1826 // Copy predecessor edges from SUb to SUa to avoid the SUnits that 1827 // SUb dependent on scheduled in-between SUb and SUa. Successor edges 1828 // do not need to be copied from SUa to SUb since no one will depend 1829 // on stores. 1830 // Notice that, we don't need to care about the memory dependency as 1831 // we won't try to cluster them if they have any memory dependency. 1832 for (const SDep &Pred : SUb->Preds) { 1833 if (Pred.getSUnit() == SUa) 1834 continue; 1835 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum 1836 << ")\n"); 1837 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial)); 1838 } 1839 } 1840 1841 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength, 1842 CurrentClusterBytes}; 1843 1844 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength 1845 << ", Curr cluster bytes: " << CurrentClusterBytes 1846 << "\n"); 1847 } 1848 } 1849 1850 void BaseMemOpClusterMutation::collectMemOpRecords( 1851 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) { 1852 for (auto &SU : SUnits) { 1853 if ((IsLoad && !SU.getInstr()->mayLoad()) || 1854 (!IsLoad && !SU.getInstr()->mayStore())) 1855 continue; 1856 1857 const MachineInstr &MI = *SU.getInstr(); 1858 SmallVector<const MachineOperand *, 4> BaseOps; 1859 int64_t Offset; 1860 bool OffsetIsScalable; 1861 unsigned Width; 1862 if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, 1863 OffsetIsScalable, Width, TRI)) { 1864 MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width)); 1865 1866 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: " 1867 << Offset << ", OffsetIsScalable: " << OffsetIsScalable 1868 << ", Width: " << Width << "\n"); 1869 } 1870 #ifndef NDEBUG 1871 for (const auto *Op : BaseOps) 1872 assert(Op); 1873 #endif 1874 } 1875 } 1876 1877 bool BaseMemOpClusterMutation::groupMemOps( 1878 ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG, 1879 DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) { 1880 bool FastCluster = 1881 ForceFastCluster || 1882 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold; 1883 1884 for (const auto &MemOp : MemOps) { 1885 unsigned ChainPredID = DAG->SUnits.size(); 1886 if (FastCluster) { 1887 for (const SDep &Pred : MemOp.SU->Preds) { 1888 // We only want to cluster the mem ops that have the same ctrl(non-data) 1889 // pred so that they didn't have ctrl dependency for each other. But for 1890 // store instrs, we can still cluster them if the pred is load instr. 1891 if ((Pred.isCtrl() && 1892 (IsLoad || 1893 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) && 1894 !Pred.isArtificial()) { 1895 ChainPredID = Pred.getSUnit()->NodeNum; 1896 break; 1897 } 1898 } 1899 } else 1900 ChainPredID = 0; 1901 1902 Groups[ChainPredID].push_back(MemOp); 1903 } 1904 return FastCluster; 1905 } 1906 1907 /// Callback from DAG postProcessing to create cluster edges for loads/stores. 1908 void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) { 1909 // Collect all the clusterable loads/stores 1910 SmallVector<MemOpInfo, 32> MemOpRecords; 1911 collectMemOpRecords(DAG->SUnits, MemOpRecords); 1912 1913 if (MemOpRecords.size() < 2) 1914 return; 1915 1916 // Put the loads/stores without dependency into the same group with some 1917 // heuristic if the DAG is too complex to avoid compiling time blow up. 1918 // Notice that, some fusion pair could be lost with this. 1919 DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups; 1920 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups); 1921 1922 for (auto &Group : Groups) { 1923 // Sorting the loads/stores, so that, we can stop the cluster as early as 1924 // possible. 1925 llvm::sort(Group.second); 1926 1927 // Trying to cluster all the neighboring loads/stores. 1928 clusterNeighboringMemOps(Group.second, FastCluster, DAG); 1929 } 1930 } 1931 1932 //===----------------------------------------------------------------------===// 1933 // CopyConstrain - DAG post-processing to encourage copy elimination. 1934 //===----------------------------------------------------------------------===// 1935 1936 namespace { 1937 1938 /// Post-process the DAG to create weak edges from all uses of a copy to 1939 /// the one use that defines the copy's source vreg, most likely an induction 1940 /// variable increment. 1941 class CopyConstrain : public ScheduleDAGMutation { 1942 // Transient state. 1943 SlotIndex RegionBeginIdx; 1944 1945 // RegionEndIdx is the slot index of the last non-debug instruction in the 1946 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. 1947 SlotIndex RegionEndIdx; 1948 1949 public: 1950 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} 1951 1952 void apply(ScheduleDAGInstrs *DAGInstrs) override; 1953 1954 protected: 1955 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); 1956 }; 1957 1958 } // end anonymous namespace 1959 1960 namespace llvm { 1961 1962 std::unique_ptr<ScheduleDAGMutation> 1963 createCopyConstrainDAGMutation(const TargetInstrInfo *TII, 1964 const TargetRegisterInfo *TRI) { 1965 return std::make_unique<CopyConstrain>(TII, TRI); 1966 } 1967 1968 } // end namespace llvm 1969 1970 /// constrainLocalCopy handles two possibilities: 1971 /// 1) Local src: 1972 /// I0: = dst 1973 /// I1: src = ... 1974 /// I2: = dst 1975 /// I3: dst = src (copy) 1976 /// (create pred->succ edges I0->I1, I2->I1) 1977 /// 1978 /// 2) Local copy: 1979 /// I0: dst = src (copy) 1980 /// I1: = dst 1981 /// I2: src = ... 1982 /// I3: = dst 1983 /// (create pred->succ edges I1->I2, I3->I2) 1984 /// 1985 /// Although the MachineScheduler is currently constrained to single blocks, 1986 /// this algorithm should handle extended blocks. An EBB is a set of 1987 /// contiguously numbered blocks such that the previous block in the EBB is 1988 /// always the single predecessor. 1989 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { 1990 LiveIntervals *LIS = DAG->getLIS(); 1991 MachineInstr *Copy = CopySU->getInstr(); 1992 1993 // Check for pure vreg copies. 1994 const MachineOperand &SrcOp = Copy->getOperand(1); 1995 Register SrcReg = SrcOp.getReg(); 1996 if (!SrcReg.isVirtual() || !SrcOp.readsReg()) 1997 return; 1998 1999 const MachineOperand &DstOp = Copy->getOperand(0); 2000 Register DstReg = DstOp.getReg(); 2001 if (!DstReg.isVirtual() || DstOp.isDead()) 2002 return; 2003 2004 // Check if either the dest or source is local. If it's live across a back 2005 // edge, it's not local. Note that if both vregs are live across the back 2006 // edge, we cannot successfully contrain the copy without cyclic scheduling. 2007 // If both the copy's source and dest are local live intervals, then we 2008 // should treat the dest as the global for the purpose of adding 2009 // constraints. This adds edges from source's other uses to the copy. 2010 unsigned LocalReg = SrcReg; 2011 unsigned GlobalReg = DstReg; 2012 LiveInterval *LocalLI = &LIS->getInterval(LocalReg); 2013 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { 2014 LocalReg = DstReg; 2015 GlobalReg = SrcReg; 2016 LocalLI = &LIS->getInterval(LocalReg); 2017 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) 2018 return; 2019 } 2020 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); 2021 2022 // Find the global segment after the start of the local LI. 2023 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); 2024 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a 2025 // local live range. We could create edges from other global uses to the local 2026 // start, but the coalescer should have already eliminated these cases, so 2027 // don't bother dealing with it. 2028 if (GlobalSegment == GlobalLI->end()) 2029 return; 2030 2031 // If GlobalSegment is killed at the LocalLI->start, the call to find() 2032 // returned the next global segment. But if GlobalSegment overlaps with 2033 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI 2034 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. 2035 if (GlobalSegment->contains(LocalLI->beginIndex())) 2036 ++GlobalSegment; 2037 2038 if (GlobalSegment == GlobalLI->end()) 2039 return; 2040 2041 // Check if GlobalLI contains a hole in the vicinity of LocalLI. 2042 if (GlobalSegment != GlobalLI->begin()) { 2043 // Two address defs have no hole. 2044 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, 2045 GlobalSegment->start)) { 2046 return; 2047 } 2048 // If the prior global segment may be defined by the same two-address 2049 // instruction that also defines LocalLI, then can't make a hole here. 2050 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, 2051 LocalLI->beginIndex())) { 2052 return; 2053 } 2054 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise 2055 // it would be a disconnected component in the live range. 2056 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() && 2057 "Disconnected LRG within the scheduling region."); 2058 } 2059 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); 2060 if (!GlobalDef) 2061 return; 2062 2063 SUnit *GlobalSU = DAG->getSUnit(GlobalDef); 2064 if (!GlobalSU) 2065 return; 2066 2067 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by 2068 // constraining the uses of the last local def to precede GlobalDef. 2069 SmallVector<SUnit*,8> LocalUses; 2070 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); 2071 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); 2072 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); 2073 for (const SDep &Succ : LastLocalSU->Succs) { 2074 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg) 2075 continue; 2076 if (Succ.getSUnit() == GlobalSU) 2077 continue; 2078 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit())) 2079 return; 2080 LocalUses.push_back(Succ.getSUnit()); 2081 } 2082 // Open the top of the GlobalLI hole by constraining any earlier global uses 2083 // to precede the start of LocalLI. 2084 SmallVector<SUnit*,8> GlobalUses; 2085 MachineInstr *FirstLocalDef = 2086 LIS->getInstructionFromIndex(LocalLI->beginIndex()); 2087 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); 2088 for (const SDep &Pred : GlobalSU->Preds) { 2089 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg) 2090 continue; 2091 if (Pred.getSUnit() == FirstLocalSU) 2092 continue; 2093 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit())) 2094 return; 2095 GlobalUses.push_back(Pred.getSUnit()); 2096 } 2097 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); 2098 // Add the weak edges. 2099 for (SUnit *LU : LocalUses) { 2100 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU(" 2101 << GlobalSU->NodeNum << ")\n"); 2102 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak)); 2103 } 2104 for (SUnit *GU : GlobalUses) { 2105 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU(" 2106 << FirstLocalSU->NodeNum << ")\n"); 2107 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak)); 2108 } 2109 } 2110 2111 /// Callback from DAG postProcessing to create weak edges to encourage 2112 /// copy elimination. 2113 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) { 2114 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); 2115 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); 2116 2117 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); 2118 if (FirstPos == DAG->end()) 2119 return; 2120 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos); 2121 RegionEndIdx = DAG->getLIS()->getInstructionIndex( 2122 *priorNonDebug(DAG->end(), DAG->begin())); 2123 2124 for (SUnit &SU : DAG->SUnits) { 2125 if (!SU.getInstr()->isCopy()) 2126 continue; 2127 2128 constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG)); 2129 } 2130 } 2131 2132 //===----------------------------------------------------------------------===// 2133 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler 2134 // and possibly other custom schedulers. 2135 //===----------------------------------------------------------------------===// 2136 2137 static const unsigned InvalidCycle = ~0U; 2138 2139 SchedBoundary::~SchedBoundary() { delete HazardRec; } 2140 2141 /// Given a Count of resource usage and a Latency value, return true if a 2142 /// SchedBoundary becomes resource limited. 2143 /// If we are checking after scheduling a node, we should return true when 2144 /// we just reach the resource limit. 2145 static bool checkResourceLimit(unsigned LFactor, unsigned Count, 2146 unsigned Latency, bool AfterSchedNode) { 2147 int ResCntFactor = (int)(Count - (Latency * LFactor)); 2148 if (AfterSchedNode) 2149 return ResCntFactor >= (int)LFactor; 2150 else 2151 return ResCntFactor > (int)LFactor; 2152 } 2153 2154 void SchedBoundary::reset() { 2155 // A new HazardRec is created for each DAG and owned by SchedBoundary. 2156 // Destroying and reconstructing it is very expensive though. So keep 2157 // invalid, placeholder HazardRecs. 2158 if (HazardRec && HazardRec->isEnabled()) { 2159 delete HazardRec; 2160 HazardRec = nullptr; 2161 } 2162 Available.clear(); 2163 Pending.clear(); 2164 CheckPending = false; 2165 CurrCycle = 0; 2166 CurrMOps = 0; 2167 MinReadyCycle = std::numeric_limits<unsigned>::max(); 2168 ExpectedLatency = 0; 2169 DependentLatency = 0; 2170 RetiredMOps = 0; 2171 MaxExecutedResCount = 0; 2172 ZoneCritResIdx = 0; 2173 IsResourceLimited = false; 2174 ReservedCycles.clear(); 2175 ReservedResourceSegments.clear(); 2176 ReservedCyclesIndex.clear(); 2177 ResourceGroupSubUnitMasks.clear(); 2178 #if LLVM_ENABLE_ABI_BREAKING_CHECKS 2179 // Track the maximum number of stall cycles that could arise either from the 2180 // latency of a DAG edge or the number of cycles that a processor resource is 2181 // reserved (SchedBoundary::ReservedCycles). 2182 MaxObservedStall = 0; 2183 #endif 2184 // Reserve a zero-count for invalid CritResIdx. 2185 ExecutedResCounts.resize(1); 2186 assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); 2187 } 2188 2189 void SchedRemainder:: 2190 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { 2191 reset(); 2192 if (!SchedModel->hasInstrSchedModel()) 2193 return; 2194 RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); 2195 for (SUnit &SU : DAG->SUnits) { 2196 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU); 2197 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC) 2198 * SchedModel->getMicroOpFactor(); 2199 for (TargetSchedModel::ProcResIter 2200 PI = SchedModel->getWriteProcResBegin(SC), 2201 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2202 unsigned PIdx = PI->ProcResourceIdx; 2203 unsigned Factor = SchedModel->getResourceFactor(PIdx); 2204 assert(PI->Cycles >= PI->StartAtCycle); 2205 RemainingCounts[PIdx] += (Factor * (PI->Cycles - PI->StartAtCycle)); 2206 } 2207 } 2208 } 2209 2210 void SchedBoundary:: 2211 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { 2212 reset(); 2213 DAG = dag; 2214 SchedModel = smodel; 2215 Rem = rem; 2216 if (SchedModel->hasInstrSchedModel()) { 2217 unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); 2218 ReservedCyclesIndex.resize(ResourceCount); 2219 ExecutedResCounts.resize(ResourceCount); 2220 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0)); 2221 unsigned NumUnits = 0; 2222 2223 for (unsigned i = 0; i < ResourceCount; ++i) { 2224 ReservedCyclesIndex[i] = NumUnits; 2225 NumUnits += SchedModel->getProcResource(i)->NumUnits; 2226 if (isUnbufferedGroup(i)) { 2227 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin; 2228 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits; 2229 U != UE; ++U) 2230 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]); 2231 } 2232 } 2233 2234 ReservedCycles.resize(NumUnits, InvalidCycle); 2235 } 2236 } 2237 2238 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat 2239 /// these "soft stalls" differently than the hard stall cycles based on CPU 2240 /// resources and computed by checkHazard(). A fully in-order model 2241 /// (MicroOpBufferSize==0) will not make use of this since instructions are not 2242 /// available for scheduling until they are ready. However, a weaker in-order 2243 /// model may use this for heuristics. For example, if a processor has in-order 2244 /// behavior when reading certain resources, this may come into play. 2245 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { 2246 if (!SU->isUnbuffered) 2247 return 0; 2248 2249 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); 2250 if (ReadyCycle > CurrCycle) 2251 return ReadyCycle - CurrCycle; 2252 return 0; 2253 } 2254 2255 /// Compute the next cycle at which the given processor resource unit 2256 /// can be scheduled. 2257 unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx, 2258 unsigned Cycles, 2259 unsigned StartAtCycle) { 2260 if (SchedModel && SchedModel->enableIntervals()) { 2261 if (isTop()) 2262 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop( 2263 CurrCycle, StartAtCycle, Cycles); 2264 2265 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom( 2266 CurrCycle, StartAtCycle, Cycles); 2267 } 2268 2269 unsigned NextUnreserved = ReservedCycles[InstanceIdx]; 2270 // If this resource has never been used, always return cycle zero. 2271 if (NextUnreserved == InvalidCycle) 2272 return 0; 2273 // For bottom-up scheduling add the cycles needed for the current operation. 2274 if (!isTop()) 2275 NextUnreserved += Cycles; 2276 return NextUnreserved; 2277 } 2278 2279 /// Compute the next cycle at which the given processor resource can be 2280 /// scheduled. Returns the next cycle and the index of the processor resource 2281 /// instance in the reserved cycles vector. 2282 std::pair<unsigned, unsigned> 2283 SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, 2284 unsigned Cycles, unsigned StartAtCycle) { 2285 2286 unsigned MinNextUnreserved = InvalidCycle; 2287 unsigned InstanceIdx = 0; 2288 unsigned StartIndex = ReservedCyclesIndex[PIdx]; 2289 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits; 2290 assert(NumberOfInstances > 0 && 2291 "Cannot have zero instances of a ProcResource"); 2292 2293 if (isUnbufferedGroup(PIdx)) { 2294 // If any subunits are used by the instruction, report that the resource 2295 // group is available at 0, effectively removing the group record from 2296 // hazarding and basing the hazarding decisions on the subunit records. 2297 // Otherwise, choose the first available instance from among the subunits. 2298 // Specifications which assign cycles to both the subunits and the group or 2299 // which use an unbuffered group with buffered subunits will appear to 2300 // schedule strangely. In the first case, the additional cycles for the 2301 // group will be ignored. In the second, the group will be ignored 2302 // entirely. 2303 for (const MCWriteProcResEntry &PE : 2304 make_range(SchedModel->getWriteProcResBegin(SC), 2305 SchedModel->getWriteProcResEnd(SC))) 2306 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx]) 2307 return std::make_pair(0u, StartIndex); 2308 2309 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin; 2310 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) { 2311 unsigned NextUnreserved, NextInstanceIdx; 2312 std::tie(NextUnreserved, NextInstanceIdx) = 2313 getNextResourceCycle(SC, SubUnits[I], Cycles, StartAtCycle); 2314 if (MinNextUnreserved > NextUnreserved) { 2315 InstanceIdx = NextInstanceIdx; 2316 MinNextUnreserved = NextUnreserved; 2317 } 2318 } 2319 return std::make_pair(MinNextUnreserved, InstanceIdx); 2320 } 2321 2322 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; 2323 ++I) { 2324 unsigned NextUnreserved = 2325 getNextResourceCycleByInstance(I, Cycles, StartAtCycle); 2326 if (MinNextUnreserved > NextUnreserved) { 2327 InstanceIdx = I; 2328 MinNextUnreserved = NextUnreserved; 2329 } 2330 } 2331 return std::make_pair(MinNextUnreserved, InstanceIdx); 2332 } 2333 2334 /// Does this SU have a hazard within the current instruction group. 2335 /// 2336 /// The scheduler supports two modes of hazard recognition. The first is the 2337 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 2338 /// supports highly complicated in-order reservation tables 2339 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. 2340 /// 2341 /// The second is a streamlined mechanism that checks for hazards based on 2342 /// simple counters that the scheduler itself maintains. It explicitly checks 2343 /// for instruction dispatch limitations, including the number of micro-ops that 2344 /// can dispatch per cycle. 2345 /// 2346 /// TODO: Also check whether the SU must start a new group. 2347 bool SchedBoundary::checkHazard(SUnit *SU) { 2348 if (HazardRec->isEnabled() 2349 && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { 2350 return true; 2351 } 2352 2353 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 2354 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { 2355 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" 2356 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); 2357 return true; 2358 } 2359 2360 if (CurrMOps > 0 && 2361 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) || 2362 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) { 2363 LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must " 2364 << (isTop() ? "begin" : "end") << " group\n"); 2365 return true; 2366 } 2367 2368 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { 2369 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 2370 for (const MCWriteProcResEntry &PE : 2371 make_range(SchedModel->getWriteProcResBegin(SC), 2372 SchedModel->getWriteProcResEnd(SC))) { 2373 unsigned ResIdx = PE.ProcResourceIdx; 2374 unsigned Cycles = PE.Cycles; 2375 unsigned StartAtCycle = PE.StartAtCycle; 2376 unsigned NRCycle, InstanceIdx; 2377 std::tie(NRCycle, InstanceIdx) = 2378 getNextResourceCycle(SC, ResIdx, Cycles, StartAtCycle); 2379 if (NRCycle > CurrCycle) { 2380 #if LLVM_ENABLE_ABI_BREAKING_CHECKS 2381 MaxObservedStall = std::max(Cycles, MaxObservedStall); 2382 #endif 2383 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " 2384 << SchedModel->getResourceName(ResIdx) 2385 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' 2386 << "=" << NRCycle << "c\n"); 2387 return true; 2388 } 2389 } 2390 } 2391 return false; 2392 } 2393 2394 // Find the unscheduled node in ReadySUs with the highest latency. 2395 unsigned SchedBoundary:: 2396 findMaxLatency(ArrayRef<SUnit*> ReadySUs) { 2397 SUnit *LateSU = nullptr; 2398 unsigned RemLatency = 0; 2399 for (SUnit *SU : ReadySUs) { 2400 unsigned L = getUnscheduledLatency(SU); 2401 if (L > RemLatency) { 2402 RemLatency = L; 2403 LateSU = SU; 2404 } 2405 } 2406 if (LateSU) { 2407 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU(" 2408 << LateSU->NodeNum << ") " << RemLatency << "c\n"); 2409 } 2410 return RemLatency; 2411 } 2412 2413 // Count resources in this zone and the remaining unscheduled 2414 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical 2415 // resource index, or zero if the zone is issue limited. 2416 unsigned SchedBoundary:: 2417 getOtherResourceCount(unsigned &OtherCritIdx) { 2418 OtherCritIdx = 0; 2419 if (!SchedModel->hasInstrSchedModel()) 2420 return 0; 2421 2422 unsigned OtherCritCount = Rem->RemIssueCount 2423 + (RetiredMOps * SchedModel->getMicroOpFactor()); 2424 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " 2425 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); 2426 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); 2427 PIdx != PEnd; ++PIdx) { 2428 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; 2429 if (OtherCount > OtherCritCount) { 2430 OtherCritCount = OtherCount; 2431 OtherCritIdx = PIdx; 2432 } 2433 } 2434 if (OtherCritIdx) { 2435 LLVM_DEBUG( 2436 dbgs() << " " << Available.getName() << " + Remain CritRes: " 2437 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) 2438 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); 2439 } 2440 return OtherCritCount; 2441 } 2442 2443 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, 2444 unsigned Idx) { 2445 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 2446 2447 #if LLVM_ENABLE_ABI_BREAKING_CHECKS 2448 // ReadyCycle was been bumped up to the CurrCycle when this node was 2449 // scheduled, but CurrCycle may have been eagerly advanced immediately after 2450 // scheduling, so may now be greater than ReadyCycle. 2451 if (ReadyCycle > CurrCycle) 2452 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); 2453 #endif 2454 2455 if (ReadyCycle < MinReadyCycle) 2456 MinReadyCycle = ReadyCycle; 2457 2458 // Check for interlocks first. For the purpose of other heuristics, an 2459 // instruction that cannot issue appears as if it's not in the ReadyQueue. 2460 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; 2461 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) || 2462 checkHazard(SU) || (Available.size() >= ReadyListLimit); 2463 2464 if (!HazardDetected) { 2465 Available.push(SU); 2466 2467 if (InPQueue) 2468 Pending.remove(Pending.begin() + Idx); 2469 return; 2470 } 2471 2472 if (!InPQueue) 2473 Pending.push(SU); 2474 } 2475 2476 /// Move the boundary of scheduled code by one cycle. 2477 void SchedBoundary::bumpCycle(unsigned NextCycle) { 2478 if (SchedModel->getMicroOpBufferSize() == 0) { 2479 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && 2480 "MinReadyCycle uninitialized"); 2481 if (MinReadyCycle > NextCycle) 2482 NextCycle = MinReadyCycle; 2483 } 2484 // Update the current micro-ops, which will issue in the next cycle. 2485 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); 2486 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; 2487 2488 // Decrement DependentLatency based on the next cycle. 2489 if ((NextCycle - CurrCycle) > DependentLatency) 2490 DependentLatency = 0; 2491 else 2492 DependentLatency -= (NextCycle - CurrCycle); 2493 2494 if (!HazardRec->isEnabled()) { 2495 // Bypass HazardRec virtual calls. 2496 CurrCycle = NextCycle; 2497 } else { 2498 // Bypass getHazardType calls in case of long latency. 2499 for (; CurrCycle != NextCycle; ++CurrCycle) { 2500 if (isTop()) 2501 HazardRec->AdvanceCycle(); 2502 else 2503 HazardRec->RecedeCycle(); 2504 } 2505 } 2506 CheckPending = true; 2507 IsResourceLimited = 2508 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), 2509 getScheduledLatency(), true); 2510 2511 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() 2512 << '\n'); 2513 } 2514 2515 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { 2516 ExecutedResCounts[PIdx] += Count; 2517 if (ExecutedResCounts[PIdx] > MaxExecutedResCount) 2518 MaxExecutedResCount = ExecutedResCounts[PIdx]; 2519 } 2520 2521 /// Add the given processor resource to this scheduled zone. 2522 /// 2523 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles 2524 /// during which this resource is consumed. 2525 /// 2526 /// \return the next cycle at which the instruction may execute without 2527 /// oversubscribing resources. 2528 unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx, 2529 unsigned Cycles, unsigned NextCycle, 2530 unsigned StartAtCycle) { 2531 unsigned Factor = SchedModel->getResourceFactor(PIdx); 2532 unsigned Count = Factor * (Cycles - StartAtCycle); 2533 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" 2534 << Cycles << "x" << Factor << "u\n"); 2535 2536 // Update Executed resources counts. 2537 incExecutedResources(PIdx, Count); 2538 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); 2539 Rem->RemainingCounts[PIdx] -= Count; 2540 2541 // Check if this resource exceeds the current critical resource. If so, it 2542 // becomes the critical resource. 2543 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { 2544 ZoneCritResIdx = PIdx; 2545 LLVM_DEBUG(dbgs() << " *** Critical resource " 2546 << SchedModel->getResourceName(PIdx) << ": " 2547 << getResourceCount(PIdx) / SchedModel->getLatencyFactor() 2548 << "c\n"); 2549 } 2550 // For reserved resources, record the highest cycle using the resource. 2551 unsigned NextAvailable, InstanceIdx; 2552 std::tie(NextAvailable, InstanceIdx) = 2553 getNextResourceCycle(SC, PIdx, Cycles, StartAtCycle); 2554 if (NextAvailable > CurrCycle) { 2555 LLVM_DEBUG(dbgs() << " Resource conflict: " 2556 << SchedModel->getResourceName(PIdx) 2557 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']' 2558 << " reserved until @" << NextAvailable << "\n"); 2559 } 2560 return NextAvailable; 2561 } 2562 2563 /// Move the boundary of scheduled code by one SUnit. 2564 void SchedBoundary::bumpNode(SUnit *SU) { 2565 // Update the reservation table. 2566 if (HazardRec->isEnabled()) { 2567 if (!isTop() && SU->isCall) { 2568 // Calls are scheduled with their preceding instructions. For bottom-up 2569 // scheduling, clear the pipeline state before emitting. 2570 HazardRec->Reset(); 2571 } 2572 HazardRec->EmitInstruction(SU); 2573 // Scheduling an instruction may have made pending instructions available. 2574 CheckPending = true; 2575 } 2576 // checkHazard should prevent scheduling multiple instructions per cycle that 2577 // exceed the issue width. 2578 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 2579 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); 2580 assert( 2581 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && 2582 "Cannot schedule this instruction's MicroOps in the current cycle."); 2583 2584 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); 2585 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); 2586 2587 unsigned NextCycle = CurrCycle; 2588 switch (SchedModel->getMicroOpBufferSize()) { 2589 case 0: 2590 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); 2591 break; 2592 case 1: 2593 if (ReadyCycle > NextCycle) { 2594 NextCycle = ReadyCycle; 2595 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); 2596 } 2597 break; 2598 default: 2599 // We don't currently model the OOO reorder buffer, so consider all 2600 // scheduled MOps to be "retired". We do loosely model in-order resource 2601 // latency. If this instruction uses an in-order resource, account for any 2602 // likely stall cycles. 2603 if (SU->isUnbuffered && ReadyCycle > NextCycle) 2604 NextCycle = ReadyCycle; 2605 break; 2606 } 2607 RetiredMOps += IncMOps; 2608 2609 // Update resource counts and critical resource. 2610 if (SchedModel->hasInstrSchedModel()) { 2611 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); 2612 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); 2613 Rem->RemIssueCount -= DecRemIssue; 2614 if (ZoneCritResIdx) { 2615 // Scale scheduled micro-ops for comparing with the critical resource. 2616 unsigned ScaledMOps = 2617 RetiredMOps * SchedModel->getMicroOpFactor(); 2618 2619 // If scaled micro-ops are now more than the previous critical resource by 2620 // a full cycle, then micro-ops issue becomes critical. 2621 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) 2622 >= (int)SchedModel->getLatencyFactor()) { 2623 ZoneCritResIdx = 0; 2624 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: " 2625 << ScaledMOps / SchedModel->getLatencyFactor() 2626 << "c\n"); 2627 } 2628 } 2629 for (TargetSchedModel::ProcResIter 2630 PI = SchedModel->getWriteProcResBegin(SC), 2631 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2632 unsigned RCycle = countResource(SC, PI->ProcResourceIdx, PI->Cycles, 2633 NextCycle, PI->StartAtCycle); 2634 if (RCycle > NextCycle) 2635 NextCycle = RCycle; 2636 } 2637 if (SU->hasReservedResource) { 2638 // For reserved resources, record the highest cycle using the resource. 2639 // For top-down scheduling, this is the cycle in which we schedule this 2640 // instruction plus the number of cycles the operations reserves the 2641 // resource. For bottom-up is it simply the instruction's cycle. 2642 for (TargetSchedModel::ProcResIter 2643 PI = SchedModel->getWriteProcResBegin(SC), 2644 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2645 unsigned PIdx = PI->ProcResourceIdx; 2646 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { 2647 2648 if (SchedModel && SchedModel->enableIntervals()) { 2649 unsigned ReservedUntil, InstanceIdx; 2650 std::tie(ReservedUntil, InstanceIdx) = 2651 getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle); 2652 if (isTop()) { 2653 ReservedResourceSegments[InstanceIdx].add( 2654 ResourceSegments::getResourceIntervalTop( 2655 NextCycle, PI->StartAtCycle, PI->Cycles), 2656 MIResourceCutOff); 2657 } else { 2658 ReservedResourceSegments[InstanceIdx].add( 2659 ResourceSegments::getResourceIntervalBottom( 2660 NextCycle, PI->StartAtCycle, PI->Cycles), 2661 MIResourceCutOff); 2662 } 2663 } else { 2664 2665 unsigned ReservedUntil, InstanceIdx; 2666 std::tie(ReservedUntil, InstanceIdx) = 2667 getNextResourceCycle(SC, PIdx, 0, PI->StartAtCycle); 2668 if (isTop()) { 2669 ReservedCycles[InstanceIdx] = 2670 std::max(ReservedUntil, NextCycle + PI->Cycles); 2671 } else 2672 ReservedCycles[InstanceIdx] = NextCycle; 2673 } 2674 } 2675 } 2676 } 2677 } 2678 // Update ExpectedLatency and DependentLatency. 2679 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; 2680 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; 2681 if (SU->getDepth() > TopLatency) { 2682 TopLatency = SU->getDepth(); 2683 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU(" 2684 << SU->NodeNum << ") " << TopLatency << "c\n"); 2685 } 2686 if (SU->getHeight() > BotLatency) { 2687 BotLatency = SU->getHeight(); 2688 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU(" 2689 << SU->NodeNum << ") " << BotLatency << "c\n"); 2690 } 2691 // If we stall for any reason, bump the cycle. 2692 if (NextCycle > CurrCycle) 2693 bumpCycle(NextCycle); 2694 else 2695 // After updating ZoneCritResIdx and ExpectedLatency, check if we're 2696 // resource limited. If a stall occurred, bumpCycle does this. 2697 IsResourceLimited = 2698 checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), 2699 getScheduledLatency(), true); 2700 2701 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle 2702 // resets CurrMOps. Loop to handle instructions with more MOps than issue in 2703 // one cycle. Since we commonly reach the max MOps here, opportunistically 2704 // bump the cycle to avoid uselessly checking everything in the readyQ. 2705 CurrMOps += IncMOps; 2706 2707 // Bump the cycle count for issue group constraints. 2708 // This must be done after NextCycle has been adjust for all other stalls. 2709 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set 2710 // currCycle to X. 2711 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) || 2712 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) { 2713 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin") 2714 << " group\n"); 2715 bumpCycle(++NextCycle); 2716 } 2717 2718 while (CurrMOps >= SchedModel->getIssueWidth()) { 2719 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle " 2720 << CurrCycle << '\n'); 2721 bumpCycle(++NextCycle); 2722 } 2723 LLVM_DEBUG(dumpScheduledState()); 2724 } 2725 2726 /// Release pending ready nodes in to the available queue. This makes them 2727 /// visible to heuristics. 2728 void SchedBoundary::releasePending() { 2729 // If the available queue is empty, it is safe to reset MinReadyCycle. 2730 if (Available.empty()) 2731 MinReadyCycle = std::numeric_limits<unsigned>::max(); 2732 2733 // Check to see if any of the pending instructions are ready to issue. If 2734 // so, add them to the available queue. 2735 for (unsigned I = 0, E = Pending.size(); I < E; ++I) { 2736 SUnit *SU = *(Pending.begin() + I); 2737 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 2738 2739 if (ReadyCycle < MinReadyCycle) 2740 MinReadyCycle = ReadyCycle; 2741 2742 if (Available.size() >= ReadyListLimit) 2743 break; 2744 2745 releaseNode(SU, ReadyCycle, true, I); 2746 if (E != Pending.size()) { 2747 --I; 2748 --E; 2749 } 2750 } 2751 CheckPending = false; 2752 } 2753 2754 /// Remove SU from the ready set for this boundary. 2755 void SchedBoundary::removeReady(SUnit *SU) { 2756 if (Available.isInQueue(SU)) 2757 Available.remove(Available.find(SU)); 2758 else { 2759 assert(Pending.isInQueue(SU) && "bad ready count"); 2760 Pending.remove(Pending.find(SU)); 2761 } 2762 } 2763 2764 /// If this queue only has one ready candidate, return it. As a side effect, 2765 /// defer any nodes that now hit a hazard, and advance the cycle until at least 2766 /// one node is ready. If multiple instructions are ready, return NULL. 2767 SUnit *SchedBoundary::pickOnlyChoice() { 2768 if (CheckPending) 2769 releasePending(); 2770 2771 // Defer any ready instrs that now have a hazard. 2772 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { 2773 if (checkHazard(*I)) { 2774 Pending.push(*I); 2775 I = Available.remove(I); 2776 continue; 2777 } 2778 ++I; 2779 } 2780 for (unsigned i = 0; Available.empty(); ++i) { 2781 // FIXME: Re-enable assert once PR20057 is resolved. 2782 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && 2783 // "permanent hazard"); 2784 (void)i; 2785 bumpCycle(CurrCycle + 1); 2786 releasePending(); 2787 } 2788 2789 LLVM_DEBUG(Pending.dump()); 2790 LLVM_DEBUG(Available.dump()); 2791 2792 if (Available.size() == 1) 2793 return *Available.begin(); 2794 return nullptr; 2795 } 2796 2797 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2798 2799 /// Dump the content of the \ref ReservedCycles vector for the 2800 /// resources that are used in the basic block. 2801 /// 2802 LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const { 2803 if (!SchedModel->hasInstrSchedModel()) 2804 return; 2805 2806 unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); 2807 unsigned StartIdx = 0; 2808 2809 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) { 2810 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits; 2811 std::string ResName = SchedModel->getResourceName(ResIdx); 2812 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) { 2813 dbgs() << ResName << "(" << UnitIdx << ") = "; 2814 if (SchedModel && SchedModel->enableIntervals()) { 2815 if (ReservedResourceSegments.count(StartIdx + UnitIdx)) 2816 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx); 2817 else 2818 dbgs() << "{ }\n"; 2819 } else 2820 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n"; 2821 } 2822 StartIdx += NumUnits; 2823 } 2824 } 2825 2826 // This is useful information to dump after bumpNode. 2827 // Note that the Queue contents are more useful before pickNodeFromQueue. 2828 LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { 2829 unsigned ResFactor; 2830 unsigned ResCount; 2831 if (ZoneCritResIdx) { 2832 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); 2833 ResCount = getResourceCount(ZoneCritResIdx); 2834 } else { 2835 ResFactor = SchedModel->getMicroOpFactor(); 2836 ResCount = RetiredMOps * ResFactor; 2837 } 2838 unsigned LFactor = SchedModel->getLatencyFactor(); 2839 dbgs() << Available.getName() << " @" << CurrCycle << "c\n" 2840 << " Retired: " << RetiredMOps; 2841 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; 2842 dbgs() << "\n Critical: " << ResCount / LFactor << "c, " 2843 << ResCount / ResFactor << " " 2844 << SchedModel->getResourceName(ZoneCritResIdx) 2845 << "\n ExpectedLatency: " << ExpectedLatency << "c\n" 2846 << (IsResourceLimited ? " - Resource" : " - Latency") 2847 << " limited.\n"; 2848 if (MISchedDumpReservedCycles) 2849 dumpReservedCycles(); 2850 } 2851 #endif 2852 2853 //===----------------------------------------------------------------------===// 2854 // GenericScheduler - Generic implementation of MachineSchedStrategy. 2855 //===----------------------------------------------------------------------===// 2856 2857 void GenericSchedulerBase::SchedCandidate:: 2858 initResourceDelta(const ScheduleDAGMI *DAG, 2859 const TargetSchedModel *SchedModel) { 2860 if (!Policy.ReduceResIdx && !Policy.DemandResIdx) 2861 return; 2862 2863 const MCSchedClassDesc *SC = DAG->getSchedClass(SU); 2864 for (TargetSchedModel::ProcResIter 2865 PI = SchedModel->getWriteProcResBegin(SC), 2866 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { 2867 if (PI->ProcResourceIdx == Policy.ReduceResIdx) 2868 ResDelta.CritResources += PI->Cycles; 2869 if (PI->ProcResourceIdx == Policy.DemandResIdx) 2870 ResDelta.DemandedResources += PI->Cycles; 2871 } 2872 } 2873 2874 /// Compute remaining latency. We need this both to determine whether the 2875 /// overall schedule has become latency-limited and whether the instructions 2876 /// outside this zone are resource or latency limited. 2877 /// 2878 /// The "dependent" latency is updated incrementally during scheduling as the 2879 /// max height/depth of scheduled nodes minus the cycles since it was 2880 /// scheduled: 2881 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone 2882 /// 2883 /// The "independent" latency is the max ready queue depth: 2884 /// ILat = max N.depth for N in Available|Pending 2885 /// 2886 /// RemainingLatency is the greater of independent and dependent latency. 2887 /// 2888 /// These computations are expensive, especially in DAGs with many edges, so 2889 /// only do them if necessary. 2890 static unsigned computeRemLatency(SchedBoundary &CurrZone) { 2891 unsigned RemLatency = CurrZone.getDependentLatency(); 2892 RemLatency = std::max(RemLatency, 2893 CurrZone.findMaxLatency(CurrZone.Available.elements())); 2894 RemLatency = std::max(RemLatency, 2895 CurrZone.findMaxLatency(CurrZone.Pending.elements())); 2896 return RemLatency; 2897 } 2898 2899 /// Returns true if the current cycle plus remaning latency is greater than 2900 /// the critical path in the scheduling region. 2901 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy, 2902 SchedBoundary &CurrZone, 2903 bool ComputeRemLatency, 2904 unsigned &RemLatency) const { 2905 // The current cycle is already greater than the critical path, so we are 2906 // already latency limited and don't need to compute the remaining latency. 2907 if (CurrZone.getCurrCycle() > Rem.CriticalPath) 2908 return true; 2909 2910 // If we haven't scheduled anything yet, then we aren't latency limited. 2911 if (CurrZone.getCurrCycle() == 0) 2912 return false; 2913 2914 if (ComputeRemLatency) 2915 RemLatency = computeRemLatency(CurrZone); 2916 2917 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath; 2918 } 2919 2920 /// Set the CandPolicy given a scheduling zone given the current resources and 2921 /// latencies inside and outside the zone. 2922 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, 2923 SchedBoundary &CurrZone, 2924 SchedBoundary *OtherZone) { 2925 // Apply preemptive heuristics based on the total latency and resources 2926 // inside and outside this zone. Potential stalls should be considered before 2927 // following this policy. 2928 2929 // Compute the critical resource outside the zone. 2930 unsigned OtherCritIdx = 0; 2931 unsigned OtherCount = 2932 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; 2933 2934 bool OtherResLimited = false; 2935 unsigned RemLatency = 0; 2936 bool RemLatencyComputed = false; 2937 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) { 2938 RemLatency = computeRemLatency(CurrZone); 2939 RemLatencyComputed = true; 2940 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(), 2941 OtherCount, RemLatency, false); 2942 } 2943 2944 // Schedule aggressively for latency in PostRA mode. We don't check for 2945 // acyclic latency during PostRA, and highly out-of-order processors will 2946 // skip PostRA scheduling. 2947 if (!OtherResLimited && 2948 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed, 2949 RemLatency))) { 2950 Policy.ReduceLatency |= true; 2951 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName() 2952 << " RemainingLatency " << RemLatency << " + " 2953 << CurrZone.getCurrCycle() << "c > CritPath " 2954 << Rem.CriticalPath << "\n"); 2955 } 2956 // If the same resource is limiting inside and outside the zone, do nothing. 2957 if (CurrZone.getZoneCritResIdx() == OtherCritIdx) 2958 return; 2959 2960 LLVM_DEBUG(if (CurrZone.isResourceLimited()) { 2961 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " 2962 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n"; 2963 } if (OtherResLimited) dbgs() 2964 << " RemainingLimit: " 2965 << SchedModel->getResourceName(OtherCritIdx) << "\n"; 2966 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs() 2967 << " Latency limited both directions.\n"); 2968 2969 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) 2970 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); 2971 2972 if (OtherResLimited) 2973 Policy.DemandResIdx = OtherCritIdx; 2974 } 2975 2976 #ifndef NDEBUG 2977 const char *GenericSchedulerBase::getReasonStr( 2978 GenericSchedulerBase::CandReason Reason) { 2979 switch (Reason) { 2980 case NoCand: return "NOCAND "; 2981 case Only1: return "ONLY1 "; 2982 case PhysReg: return "PHYS-REG "; 2983 case RegExcess: return "REG-EXCESS"; 2984 case RegCritical: return "REG-CRIT "; 2985 case Stall: return "STALL "; 2986 case Cluster: return "CLUSTER "; 2987 case Weak: return "WEAK "; 2988 case RegMax: return "REG-MAX "; 2989 case ResourceReduce: return "RES-REDUCE"; 2990 case ResourceDemand: return "RES-DEMAND"; 2991 case TopDepthReduce: return "TOP-DEPTH "; 2992 case TopPathReduce: return "TOP-PATH "; 2993 case BotHeightReduce:return "BOT-HEIGHT"; 2994 case BotPathReduce: return "BOT-PATH "; 2995 case NextDefUse: return "DEF-USE "; 2996 case NodeOrder: return "ORDER "; 2997 }; 2998 llvm_unreachable("Unknown reason!"); 2999 } 3000 3001 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { 3002 PressureChange P; 3003 unsigned ResIdx = 0; 3004 unsigned Latency = 0; 3005 switch (Cand.Reason) { 3006 default: 3007 break; 3008 case RegExcess: 3009 P = Cand.RPDelta.Excess; 3010 break; 3011 case RegCritical: 3012 P = Cand.RPDelta.CriticalMax; 3013 break; 3014 case RegMax: 3015 P = Cand.RPDelta.CurrentMax; 3016 break; 3017 case ResourceReduce: 3018 ResIdx = Cand.Policy.ReduceResIdx; 3019 break; 3020 case ResourceDemand: 3021 ResIdx = Cand.Policy.DemandResIdx; 3022 break; 3023 case TopDepthReduce: 3024 Latency = Cand.SU->getDepth(); 3025 break; 3026 case TopPathReduce: 3027 Latency = Cand.SU->getHeight(); 3028 break; 3029 case BotHeightReduce: 3030 Latency = Cand.SU->getHeight(); 3031 break; 3032 case BotPathReduce: 3033 Latency = Cand.SU->getDepth(); 3034 break; 3035 } 3036 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); 3037 if (P.isValid()) 3038 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) 3039 << ":" << P.getUnitInc() << " "; 3040 else 3041 dbgs() << " "; 3042 if (ResIdx) 3043 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; 3044 else 3045 dbgs() << " "; 3046 if (Latency) 3047 dbgs() << " " << Latency << " cycles "; 3048 else 3049 dbgs() << " "; 3050 dbgs() << '\n'; 3051 } 3052 #endif 3053 3054 namespace llvm { 3055 /// Return true if this heuristic determines order. 3056 /// TODO: Consider refactor return type of these functions as integer or enum, 3057 /// as we may need to differentiate whether TryCand is better than Cand. 3058 bool tryLess(int TryVal, int CandVal, 3059 GenericSchedulerBase::SchedCandidate &TryCand, 3060 GenericSchedulerBase::SchedCandidate &Cand, 3061 GenericSchedulerBase::CandReason Reason) { 3062 if (TryVal < CandVal) { 3063 TryCand.Reason = Reason; 3064 return true; 3065 } 3066 if (TryVal > CandVal) { 3067 if (Cand.Reason > Reason) 3068 Cand.Reason = Reason; 3069 return true; 3070 } 3071 return false; 3072 } 3073 3074 bool tryGreater(int TryVal, int CandVal, 3075 GenericSchedulerBase::SchedCandidate &TryCand, 3076 GenericSchedulerBase::SchedCandidate &Cand, 3077 GenericSchedulerBase::CandReason Reason) { 3078 if (TryVal > CandVal) { 3079 TryCand.Reason = Reason; 3080 return true; 3081 } 3082 if (TryVal < CandVal) { 3083 if (Cand.Reason > Reason) 3084 Cand.Reason = Reason; 3085 return true; 3086 } 3087 return false; 3088 } 3089 3090 bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, 3091 GenericSchedulerBase::SchedCandidate &Cand, 3092 SchedBoundary &Zone) { 3093 if (Zone.isTop()) { 3094 // Prefer the candidate with the lesser depth, but only if one of them has 3095 // depth greater than the total latency scheduled so far, otherwise either 3096 // of them could be scheduled now with no stall. 3097 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) > 3098 Zone.getScheduledLatency()) { 3099 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), 3100 TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) 3101 return true; 3102 } 3103 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), 3104 TryCand, Cand, GenericSchedulerBase::TopPathReduce)) 3105 return true; 3106 } else { 3107 // Prefer the candidate with the lesser height, but only if one of them has 3108 // height greater than the total latency scheduled so far, otherwise either 3109 // of them could be scheduled now with no stall. 3110 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) > 3111 Zone.getScheduledLatency()) { 3112 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), 3113 TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) 3114 return true; 3115 } 3116 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), 3117 TryCand, Cand, GenericSchedulerBase::BotPathReduce)) 3118 return true; 3119 } 3120 return false; 3121 } 3122 } // end namespace llvm 3123 3124 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) { 3125 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") 3126 << GenericSchedulerBase::getReasonStr(Reason) << '\n'); 3127 } 3128 3129 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) { 3130 tracePick(Cand.Reason, Cand.AtTop); 3131 } 3132 3133 void GenericScheduler::initialize(ScheduleDAGMI *dag) { 3134 assert(dag->hasVRegLiveness() && 3135 "(PreRA)GenericScheduler needs vreg liveness"); 3136 DAG = static_cast<ScheduleDAGMILive*>(dag); 3137 SchedModel = DAG->getSchedModel(); 3138 TRI = DAG->TRI; 3139 3140 if (RegionPolicy.ComputeDFSResult) 3141 DAG->computeDFSResult(); 3142 3143 Rem.init(DAG, SchedModel); 3144 Top.init(DAG, SchedModel, &Rem); 3145 Bot.init(DAG, SchedModel, &Rem); 3146 3147 // Initialize resource counts. 3148 3149 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 3150 // are disabled, then these HazardRecs will be disabled. 3151 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 3152 if (!Top.HazardRec) { 3153 Top.HazardRec = 3154 DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( 3155 Itin, DAG); 3156 } 3157 if (!Bot.HazardRec) { 3158 Bot.HazardRec = 3159 DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( 3160 Itin, DAG); 3161 } 3162 TopCand.SU = nullptr; 3163 BotCand.SU = nullptr; 3164 } 3165 3166 /// Initialize the per-region scheduling policy. 3167 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, 3168 MachineBasicBlock::iterator End, 3169 unsigned NumRegionInstrs) { 3170 const MachineFunction &MF = *Begin->getMF(); 3171 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); 3172 3173 // Avoid setting up the register pressure tracker for small regions to save 3174 // compile time. As a rough heuristic, only track pressure when the number of 3175 // schedulable instructions exceeds half the integer register file. 3176 RegionPolicy.ShouldTrackPressure = true; 3177 for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) { 3178 MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; 3179 if (TLI->isTypeLegal(LegalIntVT)) { 3180 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( 3181 TLI->getRegClassFor(LegalIntVT)); 3182 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); 3183 } 3184 } 3185 3186 // For generic targets, we default to bottom-up, because it's simpler and more 3187 // compile-time optimizations have been implemented in that direction. 3188 RegionPolicy.OnlyBottomUp = true; 3189 3190 // Allow the subtarget to override default policy. 3191 MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs); 3192 3193 // After subtarget overrides, apply command line options. 3194 if (!EnableRegPressure) { 3195 RegionPolicy.ShouldTrackPressure = false; 3196 RegionPolicy.ShouldTrackLaneMasks = false; 3197 } 3198 3199 // Check -misched-topdown/bottomup can force or unforce scheduling direction. 3200 // e.g. -misched-bottomup=false allows scheduling in both directions. 3201 assert((!ForceTopDown || !ForceBottomUp) && 3202 "-misched-topdown incompatible with -misched-bottomup"); 3203 if (ForceBottomUp.getNumOccurrences() > 0) { 3204 RegionPolicy.OnlyBottomUp = ForceBottomUp; 3205 if (RegionPolicy.OnlyBottomUp) 3206 RegionPolicy.OnlyTopDown = false; 3207 } 3208 if (ForceTopDown.getNumOccurrences() > 0) { 3209 RegionPolicy.OnlyTopDown = ForceTopDown; 3210 if (RegionPolicy.OnlyTopDown) 3211 RegionPolicy.OnlyBottomUp = false; 3212 } 3213 } 3214 3215 void GenericScheduler::dumpPolicy() const { 3216 // Cannot completely remove virtual function even in release mode. 3217 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3218 dbgs() << "GenericScheduler RegionPolicy: " 3219 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure 3220 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown 3221 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp 3222 << "\n"; 3223 #endif 3224 } 3225 3226 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic 3227 /// critical path by more cycles than it takes to drain the instruction buffer. 3228 /// We estimate an upper bounds on in-flight instructions as: 3229 /// 3230 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) 3231 /// InFlightIterations = AcyclicPath / CyclesPerIteration 3232 /// InFlightResources = InFlightIterations * LoopResources 3233 /// 3234 /// TODO: Check execution resources in addition to IssueCount. 3235 void GenericScheduler::checkAcyclicLatency() { 3236 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) 3237 return; 3238 3239 // Scaled number of cycles per loop iteration. 3240 unsigned IterCount = 3241 std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), 3242 Rem.RemIssueCount); 3243 // Scaled acyclic critical path. 3244 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); 3245 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop 3246 unsigned InFlightCount = 3247 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; 3248 unsigned BufferLimit = 3249 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); 3250 3251 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; 3252 3253 LLVM_DEBUG( 3254 dbgs() << "IssueCycles=" 3255 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " 3256 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() 3257 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount 3258 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() 3259 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; 3260 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n"); 3261 } 3262 3263 void GenericScheduler::registerRoots() { 3264 Rem.CriticalPath = DAG->ExitSU.getDepth(); 3265 3266 // Some roots may not feed into ExitSU. Check all of them in case. 3267 for (const SUnit *SU : Bot.Available) { 3268 if (SU->getDepth() > Rem.CriticalPath) 3269 Rem.CriticalPath = SU->getDepth(); 3270 } 3271 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); 3272 if (DumpCriticalPathLength) { 3273 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; 3274 } 3275 3276 if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) { 3277 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); 3278 checkAcyclicLatency(); 3279 } 3280 } 3281 3282 namespace llvm { 3283 bool tryPressure(const PressureChange &TryP, 3284 const PressureChange &CandP, 3285 GenericSchedulerBase::SchedCandidate &TryCand, 3286 GenericSchedulerBase::SchedCandidate &Cand, 3287 GenericSchedulerBase::CandReason Reason, 3288 const TargetRegisterInfo *TRI, 3289 const MachineFunction &MF) { 3290 // If one candidate decreases and the other increases, go with it. 3291 // Invalid candidates have UnitInc==0. 3292 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, 3293 Reason)) { 3294 return true; 3295 } 3296 // Do not compare the magnitude of pressure changes between top and bottom 3297 // boundary. 3298 if (Cand.AtTop != TryCand.AtTop) 3299 return false; 3300 3301 // If both candidates affect the same set in the same boundary, go with the 3302 // smallest increase. 3303 unsigned TryPSet = TryP.getPSetOrMax(); 3304 unsigned CandPSet = CandP.getPSetOrMax(); 3305 if (TryPSet == CandPSet) { 3306 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, 3307 Reason); 3308 } 3309 3310 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : 3311 std::numeric_limits<int>::max(); 3312 3313 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) : 3314 std::numeric_limits<int>::max(); 3315 3316 // If the candidates are decreasing pressure, reverse priority. 3317 if (TryP.getUnitInc() < 0) 3318 std::swap(TryRank, CandRank); 3319 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); 3320 } 3321 3322 unsigned getWeakLeft(const SUnit *SU, bool isTop) { 3323 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; 3324 } 3325 3326 /// Minimize physical register live ranges. Regalloc wants them adjacent to 3327 /// their physreg def/use. 3328 /// 3329 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf 3330 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled 3331 /// with the operation that produces or consumes the physreg. We'll do this when 3332 /// regalloc has support for parallel copies. 3333 int biasPhysReg(const SUnit *SU, bool isTop) { 3334 const MachineInstr *MI = SU->getInstr(); 3335 3336 if (MI->isCopy()) { 3337 unsigned ScheduledOper = isTop ? 1 : 0; 3338 unsigned UnscheduledOper = isTop ? 0 : 1; 3339 // If we have already scheduled the physreg produce/consumer, immediately 3340 // schedule the copy. 3341 if (MI->getOperand(ScheduledOper).getReg().isPhysical()) 3342 return 1; 3343 // If the physreg is at the boundary, defer it. Otherwise schedule it 3344 // immediately to free the dependent. We can hoist the copy later. 3345 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; 3346 if (MI->getOperand(UnscheduledOper).getReg().isPhysical()) 3347 return AtBoundary ? -1 : 1; 3348 } 3349 3350 if (MI->isMoveImmediate()) { 3351 // If we have a move immediate and all successors have been assigned, bias 3352 // towards scheduling this later. Make sure all register defs are to 3353 // physical registers. 3354 bool DoBias = true; 3355 for (const MachineOperand &Op : MI->defs()) { 3356 if (Op.isReg() && !Op.getReg().isPhysical()) { 3357 DoBias = false; 3358 break; 3359 } 3360 } 3361 3362 if (DoBias) 3363 return isTop ? -1 : 1; 3364 } 3365 3366 return 0; 3367 } 3368 } // end namespace llvm 3369 3370 void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU, 3371 bool AtTop, 3372 const RegPressureTracker &RPTracker, 3373 RegPressureTracker &TempTracker) { 3374 Cand.SU = SU; 3375 Cand.AtTop = AtTop; 3376 if (DAG->isTrackingPressure()) { 3377 if (AtTop) { 3378 TempTracker.getMaxDownwardPressureDelta( 3379 Cand.SU->getInstr(), 3380 Cand.RPDelta, 3381 DAG->getRegionCriticalPSets(), 3382 DAG->getRegPressure().MaxSetPressure); 3383 } else { 3384 if (VerifyScheduling) { 3385 TempTracker.getMaxUpwardPressureDelta( 3386 Cand.SU->getInstr(), 3387 &DAG->getPressureDiff(Cand.SU), 3388 Cand.RPDelta, 3389 DAG->getRegionCriticalPSets(), 3390 DAG->getRegPressure().MaxSetPressure); 3391 } else { 3392 RPTracker.getUpwardPressureDelta( 3393 Cand.SU->getInstr(), 3394 DAG->getPressureDiff(Cand.SU), 3395 Cand.RPDelta, 3396 DAG->getRegionCriticalPSets(), 3397 DAG->getRegPressure().MaxSetPressure); 3398 } 3399 } 3400 } 3401 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs() 3402 << " Try SU(" << Cand.SU->NodeNum << ") " 3403 << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":" 3404 << Cand.RPDelta.Excess.getUnitInc() << "\n"); 3405 } 3406 3407 /// Apply a set of heuristics to a new candidate. Heuristics are currently 3408 /// hierarchical. This may be more efficient than a graduated cost model because 3409 /// we don't need to evaluate all aspects of the model for each node in the 3410 /// queue. But it's really done to make the heuristics easier to debug and 3411 /// statistically analyze. 3412 /// 3413 /// \param Cand provides the policy and current best candidate. 3414 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 3415 /// \param Zone describes the scheduled zone that we are extending, or nullptr 3416 /// if Cand is from a different zone than TryCand. 3417 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand) 3418 bool GenericScheduler::tryCandidate(SchedCandidate &Cand, 3419 SchedCandidate &TryCand, 3420 SchedBoundary *Zone) const { 3421 // Initialize the candidate if needed. 3422 if (!Cand.isValid()) { 3423 TryCand.Reason = NodeOrder; 3424 return true; 3425 } 3426 3427 // Bias PhysReg Defs and copies to their uses and defined respectively. 3428 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), 3429 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) 3430 return TryCand.Reason != NoCand; 3431 3432 // Avoid exceeding the target's limit. 3433 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, 3434 Cand.RPDelta.Excess, 3435 TryCand, Cand, RegExcess, TRI, 3436 DAG->MF)) 3437 return TryCand.Reason != NoCand; 3438 3439 // Avoid increasing the max critical pressure in the scheduled region. 3440 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, 3441 Cand.RPDelta.CriticalMax, 3442 TryCand, Cand, RegCritical, TRI, 3443 DAG->MF)) 3444 return TryCand.Reason != NoCand; 3445 3446 // We only compare a subset of features when comparing nodes between 3447 // Top and Bottom boundary. Some properties are simply incomparable, in many 3448 // other instances we should only override the other boundary if something 3449 // is a clear good pick on one boundary. Skip heuristics that are more 3450 // "tie-breaking" in nature. 3451 bool SameBoundary = Zone != nullptr; 3452 if (SameBoundary) { 3453 // For loops that are acyclic path limited, aggressively schedule for 3454 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal 3455 // heuristics to take precedence. 3456 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && 3457 tryLatency(TryCand, Cand, *Zone)) 3458 return TryCand.Reason != NoCand; 3459 3460 // Prioritize instructions that read unbuffered resources by stall cycles. 3461 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), 3462 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 3463 return TryCand.Reason != NoCand; 3464 } 3465 3466 // Keep clustered nodes together to encourage downstream peephole 3467 // optimizations which may reduce resource requirements. 3468 // 3469 // This is a best effort to set things up for a post-RA pass. Optimizations 3470 // like generating loads of multiple registers should ideally be done within 3471 // the scheduler pass by combining the loads during DAG postprocessing. 3472 const SUnit *CandNextClusterSU = 3473 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 3474 const SUnit *TryCandNextClusterSU = 3475 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); 3476 if (tryGreater(TryCand.SU == TryCandNextClusterSU, 3477 Cand.SU == CandNextClusterSU, 3478 TryCand, Cand, Cluster)) 3479 return TryCand.Reason != NoCand; 3480 3481 if (SameBoundary) { 3482 // Weak edges are for clustering and other constraints. 3483 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), 3484 getWeakLeft(Cand.SU, Cand.AtTop), 3485 TryCand, Cand, Weak)) 3486 return TryCand.Reason != NoCand; 3487 } 3488 3489 // Avoid increasing the max pressure of the entire region. 3490 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, 3491 Cand.RPDelta.CurrentMax, 3492 TryCand, Cand, RegMax, TRI, 3493 DAG->MF)) 3494 return TryCand.Reason != NoCand; 3495 3496 if (SameBoundary) { 3497 // Avoid critical resource consumption and balance the schedule. 3498 TryCand.initResourceDelta(DAG, SchedModel); 3499 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 3500 TryCand, Cand, ResourceReduce)) 3501 return TryCand.Reason != NoCand; 3502 if (tryGreater(TryCand.ResDelta.DemandedResources, 3503 Cand.ResDelta.DemandedResources, 3504 TryCand, Cand, ResourceDemand)) 3505 return TryCand.Reason != NoCand; 3506 3507 // Avoid serializing long latency dependence chains. 3508 // For acyclic path limited loops, latency was already checked above. 3509 if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && 3510 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) 3511 return TryCand.Reason != NoCand; 3512 3513 // Fall through to original instruction order. 3514 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) 3515 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { 3516 TryCand.Reason = NodeOrder; 3517 return true; 3518 } 3519 } 3520 3521 return false; 3522 } 3523 3524 /// Pick the best candidate from the queue. 3525 /// 3526 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 3527 /// DAG building. To adjust for the current scheduling location we need to 3528 /// maintain the number of vreg uses remaining to be top-scheduled. 3529 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, 3530 const CandPolicy &ZonePolicy, 3531 const RegPressureTracker &RPTracker, 3532 SchedCandidate &Cand) { 3533 // getMaxPressureDelta temporarily modifies the tracker. 3534 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); 3535 3536 ReadyQueue &Q = Zone.Available; 3537 for (SUnit *SU : Q) { 3538 3539 SchedCandidate TryCand(ZonePolicy); 3540 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker); 3541 // Pass SchedBoundary only when comparing nodes from the same boundary. 3542 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; 3543 if (tryCandidate(Cand, TryCand, ZoneArg)) { 3544 // Initialize resource delta if needed in case future heuristics query it. 3545 if (TryCand.ResDelta == SchedResourceDelta()) 3546 TryCand.initResourceDelta(DAG, SchedModel); 3547 Cand.setBest(TryCand); 3548 LLVM_DEBUG(traceCandidate(Cand)); 3549 } 3550 } 3551 } 3552 3553 /// Pick the best candidate node from either the top or bottom queue. 3554 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { 3555 // Schedule as far as possible in the direction of no choice. This is most 3556 // efficient, but also provides the best heuristics for CriticalPSets. 3557 if (SUnit *SU = Bot.pickOnlyChoice()) { 3558 IsTopNode = false; 3559 tracePick(Only1, false); 3560 return SU; 3561 } 3562 if (SUnit *SU = Top.pickOnlyChoice()) { 3563 IsTopNode = true; 3564 tracePick(Only1, true); 3565 return SU; 3566 } 3567 // Set the bottom-up policy based on the state of the current bottom zone and 3568 // the instructions outside the zone, including the top zone. 3569 CandPolicy BotPolicy; 3570 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top); 3571 // Set the top-down policy based on the state of the current top zone and 3572 // the instructions outside the zone, including the bottom zone. 3573 CandPolicy TopPolicy; 3574 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot); 3575 3576 // See if BotCand is still valid (because we previously scheduled from Top). 3577 LLVM_DEBUG(dbgs() << "Picking from Bot:\n"); 3578 if (!BotCand.isValid() || BotCand.SU->isScheduled || 3579 BotCand.Policy != BotPolicy) { 3580 BotCand.reset(CandPolicy()); 3581 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); 3582 assert(BotCand.Reason != NoCand && "failed to find the first candidate"); 3583 } else { 3584 LLVM_DEBUG(traceCandidate(BotCand)); 3585 #ifndef NDEBUG 3586 if (VerifyScheduling) { 3587 SchedCandidate TCand; 3588 TCand.reset(CandPolicy()); 3589 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); 3590 assert(TCand.SU == BotCand.SU && 3591 "Last pick result should correspond to re-picking right now"); 3592 } 3593 #endif 3594 } 3595 3596 // Check if the top Q has a better candidate. 3597 LLVM_DEBUG(dbgs() << "Picking from Top:\n"); 3598 if (!TopCand.isValid() || TopCand.SU->isScheduled || 3599 TopCand.Policy != TopPolicy) { 3600 TopCand.reset(CandPolicy()); 3601 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); 3602 assert(TopCand.Reason != NoCand && "failed to find the first candidate"); 3603 } else { 3604 LLVM_DEBUG(traceCandidate(TopCand)); 3605 #ifndef NDEBUG 3606 if (VerifyScheduling) { 3607 SchedCandidate TCand; 3608 TCand.reset(CandPolicy()); 3609 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); 3610 assert(TCand.SU == TopCand.SU && 3611 "Last pick result should correspond to re-picking right now"); 3612 } 3613 #endif 3614 } 3615 3616 // Pick best from BotCand and TopCand. 3617 assert(BotCand.isValid()); 3618 assert(TopCand.isValid()); 3619 SchedCandidate Cand = BotCand; 3620 TopCand.Reason = NoCand; 3621 if (tryCandidate(Cand, TopCand, nullptr)) { 3622 Cand.setBest(TopCand); 3623 LLVM_DEBUG(traceCandidate(Cand)); 3624 } 3625 3626 IsTopNode = Cand.AtTop; 3627 tracePick(Cand); 3628 return Cand.SU; 3629 } 3630 3631 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 3632 SUnit *GenericScheduler::pickNode(bool &IsTopNode) { 3633 if (DAG->top() == DAG->bottom()) { 3634 assert(Top.Available.empty() && Top.Pending.empty() && 3635 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 3636 return nullptr; 3637 } 3638 SUnit *SU; 3639 do { 3640 if (RegionPolicy.OnlyTopDown) { 3641 SU = Top.pickOnlyChoice(); 3642 if (!SU) { 3643 CandPolicy NoPolicy; 3644 TopCand.reset(NoPolicy); 3645 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); 3646 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 3647 tracePick(TopCand); 3648 SU = TopCand.SU; 3649 } 3650 IsTopNode = true; 3651 } else if (RegionPolicy.OnlyBottomUp) { 3652 SU = Bot.pickOnlyChoice(); 3653 if (!SU) { 3654 CandPolicy NoPolicy; 3655 BotCand.reset(NoPolicy); 3656 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); 3657 assert(BotCand.Reason != NoCand && "failed to find a candidate"); 3658 tracePick(BotCand); 3659 SU = BotCand.SU; 3660 } 3661 IsTopNode = false; 3662 } else { 3663 SU = pickNodeBidirectional(IsTopNode); 3664 } 3665 } while (SU->isScheduled); 3666 3667 if (SU->isTopReady()) 3668 Top.removeReady(SU); 3669 if (SU->isBottomReady()) 3670 Bot.removeReady(SU); 3671 3672 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 3673 << *SU->getInstr()); 3674 return SU; 3675 } 3676 3677 void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { 3678 MachineBasicBlock::iterator InsertPos = SU->getInstr(); 3679 if (!isTop) 3680 ++InsertPos; 3681 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; 3682 3683 // Find already scheduled copies with a single physreg dependence and move 3684 // them just above the scheduled instruction. 3685 for (SDep &Dep : Deps) { 3686 if (Dep.getKind() != SDep::Data || 3687 !Register::isPhysicalRegister(Dep.getReg())) 3688 continue; 3689 SUnit *DepSU = Dep.getSUnit(); 3690 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) 3691 continue; 3692 MachineInstr *Copy = DepSU->getInstr(); 3693 if (!Copy->isCopy() && !Copy->isMoveImmediate()) 3694 continue; 3695 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy "; 3696 DAG->dumpNode(*Dep.getSUnit())); 3697 DAG->moveInstruction(Copy, InsertPos); 3698 } 3699 } 3700 3701 /// Update the scheduler's state after scheduling a node. This is the same node 3702 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to 3703 /// update it's state based on the current cycle before MachineSchedStrategy 3704 /// does. 3705 /// 3706 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling 3707 /// them here. See comments in biasPhysReg. 3708 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 3709 if (IsTopNode) { 3710 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); 3711 Top.bumpNode(SU); 3712 if (SU->hasPhysRegUses) 3713 reschedulePhysReg(SU, true); 3714 } else { 3715 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); 3716 Bot.bumpNode(SU); 3717 if (SU->hasPhysRegDefs) 3718 reschedulePhysReg(SU, false); 3719 } 3720 } 3721 3722 /// Create the standard converging machine scheduler. This will be used as the 3723 /// default scheduler if the target does not set a default. 3724 ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { 3725 ScheduleDAGMILive *DAG = 3726 new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)); 3727 // Register DAG post-processors. 3728 // 3729 // FIXME: extend the mutation API to allow earlier mutations to instantiate 3730 // data and pass it to later mutations. Have a single mutation that gathers 3731 // the interesting nodes in one pass. 3732 DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); 3733 return DAG; 3734 } 3735 3736 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { 3737 return createGenericSchedLive(C); 3738 } 3739 3740 static MachineSchedRegistry 3741 GenericSchedRegistry("converge", "Standard converging scheduler.", 3742 createConvergingSched); 3743 3744 //===----------------------------------------------------------------------===// 3745 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. 3746 //===----------------------------------------------------------------------===// 3747 3748 void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { 3749 DAG = Dag; 3750 SchedModel = DAG->getSchedModel(); 3751 TRI = DAG->TRI; 3752 3753 Rem.init(DAG, SchedModel); 3754 Top.init(DAG, SchedModel, &Rem); 3755 BotRoots.clear(); 3756 3757 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, 3758 // or are disabled, then these HazardRecs will be disabled. 3759 const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); 3760 if (!Top.HazardRec) { 3761 Top.HazardRec = 3762 DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( 3763 Itin, DAG); 3764 } 3765 } 3766 3767 void PostGenericScheduler::registerRoots() { 3768 Rem.CriticalPath = DAG->ExitSU.getDepth(); 3769 3770 // Some roots may not feed into ExitSU. Check all of them in case. 3771 for (const SUnit *SU : BotRoots) { 3772 if (SU->getDepth() > Rem.CriticalPath) 3773 Rem.CriticalPath = SU->getDepth(); 3774 } 3775 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); 3776 if (DumpCriticalPathLength) { 3777 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; 3778 } 3779 } 3780 3781 /// Apply a set of heuristics to a new candidate for PostRA scheduling. 3782 /// 3783 /// \param Cand provides the policy and current best candidate. 3784 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. 3785 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand) 3786 bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand, 3787 SchedCandidate &TryCand) { 3788 // Initialize the candidate if needed. 3789 if (!Cand.isValid()) { 3790 TryCand.Reason = NodeOrder; 3791 return true; 3792 } 3793 3794 // Prioritize instructions that read unbuffered resources by stall cycles. 3795 if (tryLess(Top.getLatencyStallCycles(TryCand.SU), 3796 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) 3797 return TryCand.Reason != NoCand; 3798 3799 // Keep clustered nodes together. 3800 if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), 3801 Cand.SU == DAG->getNextClusterSucc(), 3802 TryCand, Cand, Cluster)) 3803 return TryCand.Reason != NoCand; 3804 3805 // Avoid critical resource consumption and balance the schedule. 3806 if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, 3807 TryCand, Cand, ResourceReduce)) 3808 return TryCand.Reason != NoCand; 3809 if (tryGreater(TryCand.ResDelta.DemandedResources, 3810 Cand.ResDelta.DemandedResources, 3811 TryCand, Cand, ResourceDemand)) 3812 return TryCand.Reason != NoCand; 3813 3814 // Avoid serializing long latency dependence chains. 3815 if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { 3816 return TryCand.Reason != NoCand; 3817 } 3818 3819 // Fall through to original instruction order. 3820 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) { 3821 TryCand.Reason = NodeOrder; 3822 return true; 3823 } 3824 3825 return false; 3826 } 3827 3828 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { 3829 ReadyQueue &Q = Top.Available; 3830 for (SUnit *SU : Q) { 3831 SchedCandidate TryCand(Cand.Policy); 3832 TryCand.SU = SU; 3833 TryCand.AtTop = true; 3834 TryCand.initResourceDelta(DAG, SchedModel); 3835 if (tryCandidate(Cand, TryCand)) { 3836 Cand.setBest(TryCand); 3837 LLVM_DEBUG(traceCandidate(Cand)); 3838 } 3839 } 3840 } 3841 3842 /// Pick the next node to schedule. 3843 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { 3844 if (DAG->top() == DAG->bottom()) { 3845 assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); 3846 return nullptr; 3847 } 3848 SUnit *SU; 3849 do { 3850 SU = Top.pickOnlyChoice(); 3851 if (SU) { 3852 tracePick(Only1, true); 3853 } else { 3854 CandPolicy NoPolicy; 3855 SchedCandidate TopCand(NoPolicy); 3856 // Set the top-down policy based on the state of the current top zone and 3857 // the instructions outside the zone, including the bottom zone. 3858 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); 3859 pickNodeFromQueue(TopCand); 3860 assert(TopCand.Reason != NoCand && "failed to find a candidate"); 3861 tracePick(TopCand); 3862 SU = TopCand.SU; 3863 } 3864 } while (SU->isScheduled); 3865 3866 IsTopNode = true; 3867 Top.removeReady(SU); 3868 3869 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " 3870 << *SU->getInstr()); 3871 return SU; 3872 } 3873 3874 /// Called after ScheduleDAGMI has scheduled an instruction and updated 3875 /// scheduled/remaining flags in the DAG nodes. 3876 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { 3877 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); 3878 Top.bumpNode(SU); 3879 } 3880 3881 ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { 3882 return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C), 3883 /*RemoveKillFlags=*/true); 3884 } 3885 3886 //===----------------------------------------------------------------------===// 3887 // ILP Scheduler. Currently for experimental analysis of heuristics. 3888 //===----------------------------------------------------------------------===// 3889 3890 namespace { 3891 3892 /// Order nodes by the ILP metric. 3893 struct ILPOrder { 3894 const SchedDFSResult *DFSResult = nullptr; 3895 const BitVector *ScheduledTrees = nullptr; 3896 bool MaximizeILP; 3897 3898 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {} 3899 3900 /// Apply a less-than relation on node priority. 3901 /// 3902 /// (Return true if A comes after B in the Q.) 3903 bool operator()(const SUnit *A, const SUnit *B) const { 3904 unsigned SchedTreeA = DFSResult->getSubtreeID(A); 3905 unsigned SchedTreeB = DFSResult->getSubtreeID(B); 3906 if (SchedTreeA != SchedTreeB) { 3907 // Unscheduled trees have lower priority. 3908 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) 3909 return ScheduledTrees->test(SchedTreeB); 3910 3911 // Trees with shallower connections have have lower priority. 3912 if (DFSResult->getSubtreeLevel(SchedTreeA) 3913 != DFSResult->getSubtreeLevel(SchedTreeB)) { 3914 return DFSResult->getSubtreeLevel(SchedTreeA) 3915 < DFSResult->getSubtreeLevel(SchedTreeB); 3916 } 3917 } 3918 if (MaximizeILP) 3919 return DFSResult->getILP(A) < DFSResult->getILP(B); 3920 else 3921 return DFSResult->getILP(A) > DFSResult->getILP(B); 3922 } 3923 }; 3924 3925 /// Schedule based on the ILP metric. 3926 class ILPScheduler : public MachineSchedStrategy { 3927 ScheduleDAGMILive *DAG = nullptr; 3928 ILPOrder Cmp; 3929 3930 std::vector<SUnit*> ReadyQ; 3931 3932 public: 3933 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {} 3934 3935 void initialize(ScheduleDAGMI *dag) override { 3936 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); 3937 DAG = static_cast<ScheduleDAGMILive*>(dag); 3938 DAG->computeDFSResult(); 3939 Cmp.DFSResult = DAG->getDFSResult(); 3940 Cmp.ScheduledTrees = &DAG->getScheduledTrees(); 3941 ReadyQ.clear(); 3942 } 3943 3944 void registerRoots() override { 3945 // Restore the heap in ReadyQ with the updated DFS results. 3946 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3947 } 3948 3949 /// Implement MachineSchedStrategy interface. 3950 /// ----------------------------------------- 3951 3952 /// Callback to select the highest priority node from the ready Q. 3953 SUnit *pickNode(bool &IsTopNode) override { 3954 if (ReadyQ.empty()) return nullptr; 3955 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3956 SUnit *SU = ReadyQ.back(); 3957 ReadyQ.pop_back(); 3958 IsTopNode = false; 3959 LLVM_DEBUG(dbgs() << "Pick node " 3960 << "SU(" << SU->NodeNum << ") " 3961 << " ILP: " << DAG->getDFSResult()->getILP(SU) 3962 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) 3963 << " @" 3964 << DAG->getDFSResult()->getSubtreeLevel( 3965 DAG->getDFSResult()->getSubtreeID(SU)) 3966 << '\n' 3967 << "Scheduling " << *SU->getInstr()); 3968 return SU; 3969 } 3970 3971 /// Scheduler callback to notify that a new subtree is scheduled. 3972 void scheduleTree(unsigned SubtreeID) override { 3973 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3974 } 3975 3976 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify 3977 /// DFSResults, and resort the priority Q. 3978 void schedNode(SUnit *SU, bool IsTopNode) override { 3979 assert(!IsTopNode && "SchedDFSResult needs bottom-up"); 3980 } 3981 3982 void releaseTopNode(SUnit *) override { /*only called for top roots*/ } 3983 3984 void releaseBottomNode(SUnit *SU) override { 3985 ReadyQ.push_back(SU); 3986 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); 3987 } 3988 }; 3989 3990 } // end anonymous namespace 3991 3992 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { 3993 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true)); 3994 } 3995 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { 3996 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false)); 3997 } 3998 3999 static MachineSchedRegistry ILPMaxRegistry( 4000 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); 4001 static MachineSchedRegistry ILPMinRegistry( 4002 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); 4003 4004 //===----------------------------------------------------------------------===// 4005 // Machine Instruction Shuffler for Correctness Testing 4006 //===----------------------------------------------------------------------===// 4007 4008 #ifndef NDEBUG 4009 namespace { 4010 4011 /// Apply a less-than relation on the node order, which corresponds to the 4012 /// instruction order prior to scheduling. IsReverse implements greater-than. 4013 template<bool IsReverse> 4014 struct SUnitOrder { 4015 bool operator()(SUnit *A, SUnit *B) const { 4016 if (IsReverse) 4017 return A->NodeNum > B->NodeNum; 4018 else 4019 return A->NodeNum < B->NodeNum; 4020 } 4021 }; 4022 4023 /// Reorder instructions as much as possible. 4024 class InstructionShuffler : public MachineSchedStrategy { 4025 bool IsAlternating; 4026 bool IsTopDown; 4027 4028 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority 4029 // gives nodes with a higher number higher priority causing the latest 4030 // instructions to be scheduled first. 4031 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>> 4032 TopQ; 4033 4034 // When scheduling bottom-up, use greater-than as the queue priority. 4035 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>> 4036 BottomQ; 4037 4038 public: 4039 InstructionShuffler(bool alternate, bool topdown) 4040 : IsAlternating(alternate), IsTopDown(topdown) {} 4041 4042 void initialize(ScheduleDAGMI*) override { 4043 TopQ.clear(); 4044 BottomQ.clear(); 4045 } 4046 4047 /// Implement MachineSchedStrategy interface. 4048 /// ----------------------------------------- 4049 4050 SUnit *pickNode(bool &IsTopNode) override { 4051 SUnit *SU; 4052 if (IsTopDown) { 4053 do { 4054 if (TopQ.empty()) return nullptr; 4055 SU = TopQ.top(); 4056 TopQ.pop(); 4057 } while (SU->isScheduled); 4058 IsTopNode = true; 4059 } else { 4060 do { 4061 if (BottomQ.empty()) return nullptr; 4062 SU = BottomQ.top(); 4063 BottomQ.pop(); 4064 } while (SU->isScheduled); 4065 IsTopNode = false; 4066 } 4067 if (IsAlternating) 4068 IsTopDown = !IsTopDown; 4069 return SU; 4070 } 4071 4072 void schedNode(SUnit *SU, bool IsTopNode) override {} 4073 4074 void releaseTopNode(SUnit *SU) override { 4075 TopQ.push(SU); 4076 } 4077 void releaseBottomNode(SUnit *SU) override { 4078 BottomQ.push(SU); 4079 } 4080 }; 4081 4082 } // end anonymous namespace 4083 4084 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { 4085 bool Alternate = !ForceTopDown && !ForceBottomUp; 4086 bool TopDown = !ForceBottomUp; 4087 assert((TopDown || !ForceTopDown) && 4088 "-misched-topdown incompatible with -misched-bottomup"); 4089 return new ScheduleDAGMILive( 4090 C, std::make_unique<InstructionShuffler>(Alternate, TopDown)); 4091 } 4092 4093 static MachineSchedRegistry ShufflerRegistry( 4094 "shuffle", "Shuffle machine instructions alternating directions", 4095 createInstructionShuffler); 4096 #endif // !NDEBUG 4097 4098 //===----------------------------------------------------------------------===// 4099 // GraphWriter support for ScheduleDAGMILive. 4100 //===----------------------------------------------------------------------===// 4101 4102 #ifndef NDEBUG 4103 namespace llvm { 4104 4105 template<> struct GraphTraits< 4106 ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; 4107 4108 template<> 4109 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { 4110 DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} 4111 4112 static std::string getGraphName(const ScheduleDAG *G) { 4113 return std::string(G->MF.getName()); 4114 } 4115 4116 static bool renderGraphFromBottomUp() { 4117 return true; 4118 } 4119 4120 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) { 4121 if (ViewMISchedCutoff == 0) 4122 return false; 4123 return (Node->Preds.size() > ViewMISchedCutoff 4124 || Node->Succs.size() > ViewMISchedCutoff); 4125 } 4126 4127 /// If you want to override the dot attributes printed for a particular 4128 /// edge, override this method. 4129 static std::string getEdgeAttributes(const SUnit *Node, 4130 SUnitIterator EI, 4131 const ScheduleDAG *Graph) { 4132 if (EI.isArtificialDep()) 4133 return "color=cyan,style=dashed"; 4134 if (EI.isCtrlDep()) 4135 return "color=blue,style=dashed"; 4136 return ""; 4137 } 4138 4139 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { 4140 std::string Str; 4141 raw_string_ostream SS(Str); 4142 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); 4143 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? 4144 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; 4145 SS << "SU:" << SU->NodeNum; 4146 if (DFS) 4147 SS << " I:" << DFS->getNumInstrs(SU); 4148 return SS.str(); 4149 } 4150 4151 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { 4152 return G->getGraphNodeLabel(SU); 4153 } 4154 4155 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) { 4156 std::string Str("shape=Mrecord"); 4157 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); 4158 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? 4159 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; 4160 if (DFS) { 4161 Str += ",style=filled,fillcolor=\"#"; 4162 Str += DOT::getColorString(DFS->getSubtreeID(N)); 4163 Str += '"'; 4164 } 4165 return Str; 4166 } 4167 }; 4168 4169 } // end namespace llvm 4170 #endif // NDEBUG 4171 4172 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG 4173 /// rendered using 'dot'. 4174 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { 4175 #ifndef NDEBUG 4176 ViewGraph(this, Name, false, Title); 4177 #else 4178 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " 4179 << "systems with Graphviz or gv!\n"; 4180 #endif // NDEBUG 4181 } 4182 4183 /// Out-of-line implementation with no arguments is handy for gdb. 4184 void ScheduleDAGMI::viewGraph() { 4185 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); 4186 } 4187 4188 /// Sort predicate for the intervals stored in an instance of 4189 /// ResourceSegments. Intervals are always disjoint (no intersection 4190 /// for any pairs of intervals), therefore we can sort the totality of 4191 /// the intervals by looking only at the left boundary. 4192 static bool sortIntervals(const ResourceSegments::IntervalTy &A, 4193 const ResourceSegments::IntervalTy &B) { 4194 return A.first < B.first; 4195 } 4196 4197 unsigned ResourceSegments::getFirstAvailableAt( 4198 unsigned CurrCycle, unsigned StartAtCycle, unsigned Cycle, 4199 std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)> 4200 IntervalBuilder) const { 4201 assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals), 4202 sortIntervals) && 4203 "Cannot execute on an un-sorted set of intervals."); 4204 unsigned RetCycle = CurrCycle; 4205 ResourceSegments::IntervalTy NewInterval = 4206 IntervalBuilder(RetCycle, StartAtCycle, Cycle); 4207 for (auto &Interval : _Intervals) { 4208 if (!intersects(NewInterval, Interval)) 4209 continue; 4210 4211 // Move the interval right next to the top of the one it 4212 // intersects. 4213 assert(Interval.second > NewInterval.first && 4214 "Invalid intervals configuration."); 4215 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first; 4216 NewInterval = IntervalBuilder(RetCycle, StartAtCycle, Cycle); 4217 } 4218 return RetCycle; 4219 } 4220 4221 void ResourceSegments::add(ResourceSegments::IntervalTy A, 4222 const unsigned CutOff) { 4223 assert(A.first < A.second && "Cannot add empty resource usage"); 4224 assert(CutOff > 0 && "0-size interval history has no use."); 4225 assert(all_of(_Intervals, 4226 [&A](const ResourceSegments::IntervalTy &Interval) -> bool { 4227 return !intersects(A, Interval); 4228 }) && 4229 "A resource is being overwritten"); 4230 _Intervals.push_back(A); 4231 4232 sortAndMerge(); 4233 4234 // Do not keep the full history of the intervals, just the 4235 // latest #CutOff. 4236 while (_Intervals.size() > CutOff) 4237 _Intervals.pop_front(); 4238 } 4239 4240 bool ResourceSegments::intersects(ResourceSegments::IntervalTy A, 4241 ResourceSegments::IntervalTy B) { 4242 assert(A.first <= A.second && "Invalid interval"); 4243 assert(B.first <= B.second && "Invalid interval"); 4244 4245 // Share one boundary. 4246 if ((A.first == B.first) || (A.second == B.second)) 4247 return true; 4248 4249 // full intersersect: [ *** ) B 4250 // [***) A 4251 if ((A.first > B.first) && (A.second < B.second)) 4252 return true; 4253 4254 // right intersect: [ ***) B 4255 // [*** ) A 4256 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second)) 4257 return true; 4258 4259 // left intersect: [*** ) B 4260 // [ ***) A 4261 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first)) 4262 return true; 4263 4264 return false; 4265 } 4266 4267 void ResourceSegments::sortAndMerge() { 4268 if (_Intervals.size() <= 1) 4269 return; 4270 4271 // First sort the collection. 4272 _Intervals.sort(sortIntervals); 4273 4274 // can use next because I have at least 2 elements in the list 4275 auto next = std::next(std::begin(_Intervals)); 4276 auto E = std::end(_Intervals); 4277 for (; next != E; ++next) { 4278 if (std::prev(next)->second >= next->first) { 4279 next->first = std::prev(next)->first; 4280 _Intervals.erase(std::prev(next)); 4281 continue; 4282 } 4283 } 4284 } 4285