1 //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // MachineScheduler schedules machine instructions after phi elimination. It 10 // preserves LiveIntervals so it can be invoked before register allocation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/VLIWMachineScheduler.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/CodeGen/DFAPacketizer.h" 17 #include "llvm/CodeGen/MachineBasicBlock.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineLoopInfo.h" 21 #include "llvm/CodeGen/RegisterClassInfo.h" 22 #include "llvm/CodeGen/RegisterPressure.h" 23 #include "llvm/CodeGen/ScheduleDAG.h" 24 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/CodeGen/TargetOpcodes.h" 27 #include "llvm/CodeGen/TargetRegisterInfo.h" 28 #include "llvm/CodeGen/TargetSchedule.h" 29 #include "llvm/CodeGen/TargetSubtargetInfo.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/raw_ostream.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <iomanip> 36 #include <limits> 37 #include <memory> 38 #include <sstream> 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "machine-scheduler" 43 44 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, 45 cl::init(false)); 46 47 static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden, 48 cl::init(true)); 49 50 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", 51 cl::Hidden, cl::init(1)); 52 53 // Check if the scheduler should penalize instructions that are available to 54 // early due to a zero-latency dependence. 55 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, 56 cl::init(true)); 57 58 // This value is used to determine if a register class is a high pressure set. 59 // We compute the maximum number of registers needed and divided by the total 60 // available. Then, we compare the result to this value. 61 static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden, 62 cl::init(0.75f), 63 cl::desc("High register pressure threhold.")); 64 65 VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI, 66 const TargetSchedModel *SM) 67 : TII(STI.getInstrInfo()), SchedModel(SM) { 68 ResourcesModel = createPacketizer(STI); 69 70 // This hard requirement could be relaxed, 71 // but for now do not let it proceed. 72 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); 73 74 Packet.reserve(SchedModel->getIssueWidth()); 75 Packet.clear(); 76 ResourcesModel->clearResources(); 77 } 78 79 void VLIWResourceModel::reset() { 80 Packet.clear(); 81 ResourcesModel->clearResources(); 82 } 83 84 VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; } 85 86 /// Return true if there is a dependence between SUd and SUu. 87 bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) { 88 if (SUd->Succs.size() == 0) 89 return false; 90 91 for (const auto &S : SUd->Succs) { 92 // Since we do not add pseudos to packets, might as well 93 // ignore order dependencies. 94 if (S.isCtrl()) 95 continue; 96 97 if (S.getSUnit() == SUu && S.getLatency() > 0) 98 return true; 99 } 100 return false; 101 } 102 103 /// Check if scheduling of this SU is possible 104 /// in the current packet. 105 /// It is _not_ precise (statefull), it is more like 106 /// another heuristic. Many corner cases are figured 107 /// empirically. 108 bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { 109 if (!SU || !SU->getInstr()) 110 return false; 111 112 // First see if the pipeline could receive this instruction 113 // in the current cycle. 114 switch (SU->getInstr()->getOpcode()) { 115 default: 116 if (!ResourcesModel->canReserveResources(*SU->getInstr())) 117 return false; 118 break; 119 case TargetOpcode::EXTRACT_SUBREG: 120 case TargetOpcode::INSERT_SUBREG: 121 case TargetOpcode::SUBREG_TO_REG: 122 case TargetOpcode::REG_SEQUENCE: 123 case TargetOpcode::IMPLICIT_DEF: 124 case TargetOpcode::COPY: 125 case TargetOpcode::INLINEASM: 126 case TargetOpcode::INLINEASM_BR: 127 break; 128 } 129 130 // Now see if there are no other dependencies to instructions already 131 // in the packet. 132 if (IsTop) { 133 for (const SUnit *U : Packet) 134 if (hasDependence(U, SU)) 135 return false; 136 } else { 137 for (const SUnit *U : Packet) 138 if (hasDependence(SU, U)) 139 return false; 140 } 141 return true; 142 } 143 144 /// Keep track of available resources. 145 bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { 146 bool startNewCycle = false; 147 // Artificially reset state. 148 if (!SU) { 149 reset(); 150 TotalPackets++; 151 return false; 152 } 153 // If this SU does not fit in the packet or the packet is now full 154 // start a new one. 155 if (!isResourceAvailable(SU, IsTop) || 156 Packet.size() >= SchedModel->getIssueWidth()) { 157 reset(); 158 TotalPackets++; 159 startNewCycle = true; 160 } 161 162 switch (SU->getInstr()->getOpcode()) { 163 default: 164 ResourcesModel->reserveResources(*SU->getInstr()); 165 break; 166 case TargetOpcode::EXTRACT_SUBREG: 167 case TargetOpcode::INSERT_SUBREG: 168 case TargetOpcode::SUBREG_TO_REG: 169 case TargetOpcode::REG_SEQUENCE: 170 case TargetOpcode::IMPLICIT_DEF: 171 case TargetOpcode::KILL: 172 case TargetOpcode::CFI_INSTRUCTION: 173 case TargetOpcode::EH_LABEL: 174 case TargetOpcode::COPY: 175 case TargetOpcode::INLINEASM: 176 case TargetOpcode::INLINEASM_BR: 177 break; 178 } 179 Packet.push_back(SU); 180 181 #ifndef NDEBUG 182 LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); 183 for (unsigned i = 0, e = Packet.size(); i != e; ++i) { 184 LLVM_DEBUG(dbgs() << "\t[" << i << "] SU("); 185 LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); 186 LLVM_DEBUG(Packet[i]->getInstr()->dump()); 187 } 188 #endif 189 190 return startNewCycle; 191 } 192 193 DFAPacketizer * 194 VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const { 195 return STI.getInstrInfo()->CreateTargetScheduleState(STI); 196 } 197 198 /// schedule - Called back from MachineScheduler::runOnMachineFunction 199 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 200 /// only includes instructions that have DAG nodes, not scheduling boundaries. 201 void VLIWMachineScheduler::schedule() { 202 LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " 203 << printMBBReference(*BB) << " " << BB->getName() 204 << " in_func " << BB->getParent()->getName() 205 << " at loop depth " << MLI->getLoopDepth(BB) << " \n"); 206 207 buildDAGWithRegPressure(); 208 209 Topo.InitDAGTopologicalSorting(); 210 211 // Postprocess the DAG to add platform-specific artificial dependencies. 212 postProcessDAG(); 213 214 SmallVector<SUnit *, 8> TopRoots, BotRoots; 215 findRootsAndBiasEdges(TopRoots, BotRoots); 216 217 // Initialize the strategy before modifying the DAG. 218 SchedImpl->initialize(this); 219 220 LLVM_DEBUG({ 221 unsigned maxH = 0; 222 for (const SUnit &SU : SUnits) 223 if (SU.getHeight() > maxH) 224 maxH = SU.getHeight(); 225 dbgs() << "Max Height " << maxH << "\n"; 226 }); 227 LLVM_DEBUG({ 228 unsigned maxD = 0; 229 for (const SUnit &SU : SUnits) 230 if (SU.getDepth() > maxD) 231 maxD = SU.getDepth(); 232 dbgs() << "Max Depth " << maxD << "\n"; 233 }); 234 LLVM_DEBUG(dump()); 235 if (ViewMISchedDAGs) 236 viewGraph(); 237 238 initQueues(TopRoots, BotRoots); 239 240 bool IsTopNode = false; 241 while (true) { 242 LLVM_DEBUG( 243 dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); 244 SUnit *SU = SchedImpl->pickNode(IsTopNode); 245 if (!SU) 246 break; 247 248 if (!checkSchedLimit()) 249 break; 250 251 scheduleMI(SU, IsTopNode); 252 253 // Notify the scheduling strategy after updating the DAG. 254 SchedImpl->schedNode(SU, IsTopNode); 255 256 updateQueues(SU, IsTopNode); 257 } 258 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 259 260 placeDebugValues(); 261 262 LLVM_DEBUG({ 263 dbgs() << "*** Final schedule for " 264 << printMBBReference(*begin()->getParent()) << " ***\n"; 265 dumpSchedule(); 266 dbgs() << '\n'; 267 }); 268 } 269 270 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { 271 DAG = static_cast<VLIWMachineScheduler *>(dag); 272 SchedModel = DAG->getSchedModel(); 273 274 Top.init(DAG, SchedModel); 275 Bot.init(DAG, SchedModel); 276 277 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 278 // are disabled, then these HazardRecs will be disabled. 279 const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); 280 const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); 281 const TargetInstrInfo *TII = STI.getInstrInfo(); 282 delete Top.HazardRec; 283 delete Bot.HazardRec; 284 Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 285 Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 286 287 delete Top.ResourceModel; 288 delete Bot.ResourceModel; 289 Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); 290 Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); 291 292 const std::vector<unsigned> &MaxPressure = 293 DAG->getRegPressure().MaxSetPressure; 294 HighPressureSets.assign(MaxPressure.size(), false); 295 for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { 296 unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); 297 HighPressureSets[i] = 298 ((float)MaxPressure[i] > ((float)Limit * RPThreshold)); 299 } 300 } 301 302 VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel( 303 const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const { 304 return new VLIWResourceModel(STI, SchedModel); 305 } 306 307 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { 308 for (const SDep &PI : SU->Preds) { 309 unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; 310 unsigned MinLatency = PI.getLatency(); 311 #ifndef NDEBUG 312 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 313 #endif 314 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 315 SU->TopReadyCycle = PredReadyCycle + MinLatency; 316 } 317 318 if (!SU->isScheduled) 319 Top.releaseNode(SU, SU->TopReadyCycle); 320 } 321 322 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { 323 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 324 325 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; 326 ++I) { 327 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 328 unsigned MinLatency = I->getLatency(); 329 #ifndef NDEBUG 330 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 331 #endif 332 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 333 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 334 } 335 336 if (!SU->isScheduled) 337 Bot.releaseNode(SU, SU->BotReadyCycle); 338 } 339 340 ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() { 341 delete ResourceModel; 342 delete HazardRec; 343 } 344 345 /// Does this SU have a hazard within the current instruction group. 346 /// 347 /// The scheduler supports two modes of hazard recognition. The first is the 348 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 349 /// supports highly complicated in-order reservation tables 350 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. 351 /// 352 /// The second is a streamlined mechanism that checks for hazards based on 353 /// simple counters that the scheduler itself maintains. It explicitly checks 354 /// for instruction dispatch limitations, including the number of micro-ops that 355 /// can dispatch per cycle. 356 /// 357 /// TODO: Also check whether the SU must start a new group. 358 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { 359 if (HazardRec->isEnabled()) 360 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 361 362 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 363 if (IssueCount + uops > SchedModel->getIssueWidth()) 364 return true; 365 366 return false; 367 } 368 369 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode( 370 SUnit *SU, unsigned ReadyCycle) { 371 if (ReadyCycle < MinReadyCycle) 372 MinReadyCycle = ReadyCycle; 373 374 // Check for interlocks first. For the purpose of other heuristics, an 375 // instruction that cannot issue appears as if it's not in the ReadyQueue. 376 if (ReadyCycle > CurrCycle || checkHazard(SU)) 377 378 Pending.push(SU); 379 else 380 Available.push(SU); 381 } 382 383 /// Move the boundary of scheduled code by one cycle. 384 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { 385 unsigned Width = SchedModel->getIssueWidth(); 386 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 387 388 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && 389 "MinReadyCycle uninitialized"); 390 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); 391 392 if (!HazardRec->isEnabled()) { 393 // Bypass HazardRec virtual calls. 394 CurrCycle = NextCycle; 395 } else { 396 // Bypass getHazardType calls in case of long latency. 397 for (; CurrCycle != NextCycle; ++CurrCycle) { 398 if (isTop()) 399 HazardRec->AdvanceCycle(); 400 else 401 HazardRec->RecedeCycle(); 402 } 403 } 404 CheckPending = true; 405 406 LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " 407 << CurrCycle << '\n'); 408 } 409 410 /// Move the boundary of scheduled code by one SUnit. 411 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { 412 bool startNewCycle = false; 413 414 // Update the reservation table. 415 if (HazardRec->isEnabled()) { 416 if (!isTop() && SU->isCall) { 417 // Calls are scheduled with their preceding instructions. For bottom-up 418 // scheduling, clear the pipeline state before emitting. 419 HazardRec->Reset(); 420 } 421 HazardRec->EmitInstruction(SU); 422 } 423 424 // Update DFA model. 425 startNewCycle = ResourceModel->reserveResources(SU, isTop()); 426 427 // Check the instruction group dispatch limit. 428 // TODO: Check if this SU must end a dispatch group. 429 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 430 if (startNewCycle) { 431 LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); 432 bumpCycle(); 433 } else 434 LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " 435 << CurrCycle << '\n'); 436 } 437 438 /// Release pending ready nodes in to the available queue. This makes them 439 /// visible to heuristics. 440 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { 441 // If the available queue is empty, it is safe to reset MinReadyCycle. 442 if (Available.empty()) 443 MinReadyCycle = std::numeric_limits<unsigned>::max(); 444 445 // Check to see if any of the pending instructions are ready to issue. If 446 // so, add them to the available queue. 447 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 448 SUnit *SU = *(Pending.begin() + i); 449 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 450 451 if (ReadyCycle < MinReadyCycle) 452 MinReadyCycle = ReadyCycle; 453 454 if (ReadyCycle > CurrCycle) 455 continue; 456 457 if (checkHazard(SU)) 458 continue; 459 460 Available.push(SU); 461 Pending.remove(Pending.begin() + i); 462 --i; 463 --e; 464 } 465 CheckPending = false; 466 } 467 468 /// Remove SU from the ready set for this boundary. 469 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { 470 if (Available.isInQueue(SU)) 471 Available.remove(Available.find(SU)); 472 else { 473 assert(Pending.isInQueue(SU) && "bad ready count"); 474 Pending.remove(Pending.find(SU)); 475 } 476 } 477 478 /// If this queue only has one ready candidate, return it. As a side effect, 479 /// advance the cycle until at least one node is ready. If multiple instructions 480 /// are ready, return NULL. 481 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { 482 if (CheckPending) 483 releasePending(); 484 485 auto AdvanceCycle = [this]() { 486 if (Available.empty()) 487 return true; 488 if (Available.size() == 1 && Pending.size() > 0) 489 return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) || 490 getWeakLeft(*Available.begin(), isTop()) != 0; 491 return false; 492 }; 493 for (unsigned i = 0; AdvanceCycle(); ++i) { 494 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 495 "permanent hazard"); 496 (void)i; 497 ResourceModel->reserveResources(nullptr, isTop()); 498 bumpCycle(); 499 releasePending(); 500 } 501 if (Available.size() == 1) 502 return *Available.begin(); 503 return nullptr; 504 } 505 506 #ifndef NDEBUG 507 void ConvergingVLIWScheduler::traceCandidate(const char *Label, 508 const ReadyQueue &Q, SUnit *SU, 509 int Cost, PressureChange P) { 510 dbgs() << Label << " " << Q.getName() << " "; 511 if (P.isValid()) 512 dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" 513 << P.getUnitInc() << " "; 514 else 515 dbgs() << " "; 516 dbgs() << "cost(" << Cost << ")\t"; 517 DAG->dumpNode(*SU); 518 } 519 520 // Very detailed queue dump, to be used with higher verbosity levels. 521 void ConvergingVLIWScheduler::readyQueueVerboseDump( 522 const RegPressureTracker &RPTracker, SchedCandidate &Candidate, 523 ReadyQueue &Q) { 524 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 525 526 dbgs() << ">>> " << Q.getName() << "\n"; 527 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 528 RegPressureDelta RPDelta; 529 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 530 DAG->getRegionCriticalPSets(), 531 DAG->getRegPressure().MaxSetPressure); 532 std::stringstream dbgstr; 533 dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; 534 dbgs() << dbgstr.str(); 535 SchedulingCost(Q, *I, Candidate, RPDelta, true); 536 dbgs() << "\t"; 537 (*I)->getInstr()->dump(); 538 } 539 dbgs() << "\n"; 540 } 541 #endif 542 543 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor 544 /// of SU, return true (we may have duplicates) 545 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { 546 if (SU->NumPredsLeft == 0) 547 return false; 548 549 for (auto &Pred : SU->Preds) { 550 // We found an available, but not scheduled, predecessor. 551 if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) 552 return false; 553 } 554 555 return true; 556 } 557 558 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor 559 /// of SU, return true (we may have duplicates) 560 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { 561 if (SU->NumSuccsLeft == 0) 562 return false; 563 564 for (auto &Succ : SU->Succs) { 565 // We found an available, but not scheduled, successor. 566 if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) 567 return false; 568 } 569 return true; 570 } 571 572 /// Check if the instruction changes the register pressure of a register in the 573 /// high pressure set. The function returns a negative value if the pressure 574 /// decreases and a positive value is the pressure increases. If the instruction 575 /// doesn't use a high pressure register or doesn't change the register 576 /// pressure, then return 0. 577 int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { 578 PressureDiff &PD = DAG->getPressureDiff(SU); 579 for (const auto &P : PD) { 580 if (!P.isValid()) 581 continue; 582 // The pressure differences are computed bottom-up, so the comparison for 583 // an increase is positive in the bottom direction, but negative in the 584 // top-down direction. 585 if (HighPressureSets[P.getPSet()]) 586 return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); 587 } 588 return 0; 589 } 590 591 /// Single point to compute overall scheduling cost. 592 /// TODO: More heuristics will be used soon. 593 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, 594 SchedCandidate &Candidate, 595 RegPressureDelta &Delta, 596 bool verbose) { 597 // Initial trivial priority. 598 int ResCount = 1; 599 600 // Do not waste time on a node that is already scheduled. 601 if (!SU || SU->isScheduled) 602 return ResCount; 603 604 LLVM_DEBUG(if (verbose) dbgs() 605 << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); 606 // Forced priority is high. 607 if (SU->isScheduleHigh) { 608 ResCount += PriorityOne; 609 LLVM_DEBUG(dbgs() << "H|"); 610 } 611 612 unsigned IsAvailableAmt = 0; 613 // Critical path first. 614 if (Q.getID() == TopQID) { 615 if (Top.isLatencyBound(SU)) { 616 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 617 ResCount += (SU->getHeight() * ScaleTwo); 618 } 619 620 LLVM_DEBUG(if (verbose) { 621 std::stringstream dbgstr; 622 dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; 623 dbgs() << dbgstr.str(); 624 }); 625 626 // If resources are available for it, multiply the 627 // chance of scheduling. 628 if (Top.ResourceModel->isResourceAvailable(SU, true)) { 629 IsAvailableAmt = (PriorityTwo + PriorityThree); 630 ResCount += IsAvailableAmt; 631 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 632 } else 633 LLVM_DEBUG(if (verbose) dbgs() << " |"); 634 } else { 635 if (Bot.isLatencyBound(SU)) { 636 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 637 ResCount += (SU->getDepth() * ScaleTwo); 638 } 639 640 LLVM_DEBUG(if (verbose) { 641 std::stringstream dbgstr; 642 dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; 643 dbgs() << dbgstr.str(); 644 }); 645 646 // If resources are available for it, multiply the 647 // chance of scheduling. 648 if (Bot.ResourceModel->isResourceAvailable(SU, false)) { 649 IsAvailableAmt = (PriorityTwo + PriorityThree); 650 ResCount += IsAvailableAmt; 651 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 652 } else 653 LLVM_DEBUG(if (verbose) dbgs() << " |"); 654 } 655 656 unsigned NumNodesBlocking = 0; 657 if (Q.getID() == TopQID) { 658 // How many SUs does it block from scheduling? 659 // Look at all of the successors of this node. 660 // Count the number of nodes that 661 // this node is the sole unscheduled node for. 662 if (Top.isLatencyBound(SU)) 663 for (const SDep &SI : SU->Succs) 664 if (isSingleUnscheduledPred(SI.getSUnit(), SU)) 665 ++NumNodesBlocking; 666 } else { 667 // How many unscheduled predecessors block this node? 668 if (Bot.isLatencyBound(SU)) 669 for (const SDep &PI : SU->Preds) 670 if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) 671 ++NumNodesBlocking; 672 } 673 ResCount += (NumNodesBlocking * ScaleTwo); 674 675 LLVM_DEBUG(if (verbose) { 676 std::stringstream dbgstr; 677 dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; 678 dbgs() << dbgstr.str(); 679 }); 680 681 // Factor in reg pressure as a heuristic. 682 if (!IgnoreBBRegPressure) { 683 // Decrease priority by the amount that register pressure exceeds the limit. 684 ResCount -= (Delta.Excess.getUnitInc() * PriorityOne); 685 // Decrease priority if register pressure exceeds the limit. 686 ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne); 687 // Decrease priority slightly if register pressure would increase over the 688 // current maximum. 689 ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo); 690 // If there are register pressure issues, then we remove the value added for 691 // the instruction being available. The rationale is that we really don't 692 // want to schedule an instruction that causes a spill. 693 if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 && 694 (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || 695 Delta.CurrentMax.getUnitInc())) 696 ResCount -= IsAvailableAmt; 697 LLVM_DEBUG(if (verbose) { 698 dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" 699 << Delta.CriticalMax.getUnitInc() << "/" 700 << Delta.CurrentMax.getUnitInc() << ")|"; 701 }); 702 } 703 704 // Give preference to a zero latency instruction if the dependent 705 // instruction is in the current packet. 706 if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) { 707 for (const SDep &PI : SU->Preds) { 708 if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && 709 PI.getLatency() == 0 && 710 Top.ResourceModel->isInPacket(PI.getSUnit())) { 711 ResCount += PriorityThree; 712 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 713 } 714 } 715 } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) { 716 for (const SDep &SI : SU->Succs) { 717 if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && 718 SI.getLatency() == 0 && 719 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 720 ResCount += PriorityThree; 721 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 722 } 723 } 724 } 725 726 // If the instruction has a non-zero latency dependence with an instruction in 727 // the current packet, then it should not be scheduled yet. The case occurs 728 // when the dependent instruction is scheduled in a new packet, so the 729 // scheduler updates the current cycle and pending instructions become 730 // available. 731 if (CheckEarlyAvail) { 732 if (Q.getID() == TopQID) { 733 for (const auto &PI : SU->Preds) { 734 if (PI.getLatency() > 0 && 735 Top.ResourceModel->isInPacket(PI.getSUnit())) { 736 ResCount -= PriorityOne; 737 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 738 } 739 } 740 } else { 741 for (const auto &SI : SU->Succs) { 742 if (SI.getLatency() > 0 && 743 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 744 ResCount -= PriorityOne; 745 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 746 } 747 } 748 } 749 } 750 751 LLVM_DEBUG(if (verbose) { 752 std::stringstream dbgstr; 753 dbgstr << "Total " << std::setw(4) << ResCount << ")"; 754 dbgs() << dbgstr.str(); 755 }); 756 757 return ResCount; 758 } 759 760 /// Pick the best candidate from the top queue. 761 /// 762 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 763 /// DAG building. To adjust for the current scheduling location we need to 764 /// maintain the number of vreg uses remaining to be top-scheduled. 765 ConvergingVLIWScheduler::CandResult 766 ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone, 767 const RegPressureTracker &RPTracker, 768 SchedCandidate &Candidate) { 769 ReadyQueue &Q = Zone.Available; 770 LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) 771 readyQueueVerboseDump(RPTracker, Candidate, Q); 772 else Q.dump();); 773 774 // getMaxPressureDelta temporarily modifies the tracker. 775 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 776 777 // BestSU remains NULL if no top candidates beat the best existing candidate. 778 CandResult FoundCandidate = NoCand; 779 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 780 RegPressureDelta RPDelta; 781 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 782 DAG->getRegionCriticalPSets(), 783 DAG->getRegPressure().MaxSetPressure); 784 785 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); 786 787 // Initialize the candidate if needed. 788 if (!Candidate.SU) { 789 LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); 790 Candidate.SU = *I; 791 Candidate.RPDelta = RPDelta; 792 Candidate.SCost = CurrentCost; 793 FoundCandidate = NodeOrder; 794 continue; 795 } 796 797 // Choose node order for negative cost candidates. There is no good 798 // candidate in this case. 799 if (CurrentCost < 0 && Candidate.SCost < 0) { 800 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || 801 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 802 LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost)); 803 Candidate.SU = *I; 804 Candidate.RPDelta = RPDelta; 805 Candidate.SCost = CurrentCost; 806 FoundCandidate = NodeOrder; 807 } 808 continue; 809 } 810 811 // Best cost. 812 if (CurrentCost > Candidate.SCost) { 813 LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); 814 Candidate.SU = *I; 815 Candidate.RPDelta = RPDelta; 816 Candidate.SCost = CurrentCost; 817 FoundCandidate = BestCost; 818 continue; 819 } 820 821 // Choose an instruction that does not depend on an artificial edge. 822 unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID)); 823 unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID)); 824 if (CurrWeak != CandWeak) { 825 if (CurrWeak < CandWeak) { 826 LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost)); 827 Candidate.SU = *I; 828 Candidate.RPDelta = RPDelta; 829 Candidate.SCost = CurrentCost; 830 FoundCandidate = Weak; 831 } 832 continue; 833 } 834 835 if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) { 836 unsigned CurrSize, CandSize; 837 if (Q.getID() == TopQID) { 838 CurrSize = (*I)->Succs.size(); 839 CandSize = Candidate.SU->Succs.size(); 840 } else { 841 CurrSize = (*I)->Preds.size(); 842 CandSize = Candidate.SU->Preds.size(); 843 } 844 if (CurrSize > CandSize) { 845 LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); 846 Candidate.SU = *I; 847 Candidate.RPDelta = RPDelta; 848 Candidate.SCost = CurrentCost; 849 FoundCandidate = BestCost; 850 } 851 // Keep the old candidate if it's a better candidate. That is, don't use 852 // the subsequent tie breaker. 853 if (CurrSize != CandSize) 854 continue; 855 } 856 857 // Tie breaker. 858 // To avoid scheduling indeterminism, we need a tie breaker 859 // for the case when cost is identical for two nodes. 860 if (UseNewerCandidate && CurrentCost == Candidate.SCost) { 861 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || 862 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 863 LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost)); 864 Candidate.SU = *I; 865 Candidate.RPDelta = RPDelta; 866 Candidate.SCost = CurrentCost; 867 FoundCandidate = NodeOrder; 868 continue; 869 } 870 } 871 872 // Fall through to original instruction order. 873 // Only consider node order if Candidate was chosen from this Q. 874 if (FoundCandidate == NoCand) 875 continue; 876 } 877 return FoundCandidate; 878 } 879 880 /// Pick the best candidate node from either the top or bottom queue. 881 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { 882 // Schedule as far as possible in the direction of no choice. This is most 883 // efficient, but also provides the best heuristics for CriticalPSets. 884 if (SUnit *SU = Bot.pickOnlyChoice()) { 885 LLVM_DEBUG(dbgs() << "Picked only Bottom\n"); 886 IsTopNode = false; 887 return SU; 888 } 889 if (SUnit *SU = Top.pickOnlyChoice()) { 890 LLVM_DEBUG(dbgs() << "Picked only Top\n"); 891 IsTopNode = true; 892 return SU; 893 } 894 SchedCandidate BotCand; 895 // Prefer bottom scheduling when heuristics are silent. 896 CandResult BotResult = 897 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 898 assert(BotResult != NoCand && "failed to find the first candidate"); 899 900 // If either Q has a single candidate that provides the least increase in 901 // Excess pressure, we can immediately schedule from that Q. 902 // 903 // RegionCriticalPSets summarizes the pressure within the scheduled region and 904 // affects picking from either Q. If scheduling in one direction must 905 // increase pressure for one of the excess PSets, then schedule in that 906 // direction first to provide more freedom in the other direction. 907 if (BotResult == SingleExcess || BotResult == SingleCritical) { 908 LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n"); 909 IsTopNode = false; 910 return BotCand.SU; 911 } 912 // Check if the top Q has a better candidate. 913 SchedCandidate TopCand; 914 CandResult TopResult = 915 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 916 assert(TopResult != NoCand && "failed to find the first candidate"); 917 918 if (TopResult == SingleExcess || TopResult == SingleCritical) { 919 LLVM_DEBUG(dbgs() << "Prefered Top Node\n"); 920 IsTopNode = true; 921 return TopCand.SU; 922 } 923 // If either Q has a single candidate that minimizes pressure above the 924 // original region's pressure pick it. 925 if (BotResult == SingleMax) { 926 LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); 927 IsTopNode = false; 928 return BotCand.SU; 929 } 930 if (TopResult == SingleMax) { 931 LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); 932 IsTopNode = true; 933 return TopCand.SU; 934 } 935 if (TopCand.SCost > BotCand.SCost) { 936 LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n"); 937 IsTopNode = true; 938 return TopCand.SU; 939 } 940 // Otherwise prefer the bottom candidate in node order. 941 LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n"); 942 IsTopNode = false; 943 return BotCand.SU; 944 } 945 946 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 947 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { 948 if (DAG->top() == DAG->bottom()) { 949 assert(Top.Available.empty() && Top.Pending.empty() && 950 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 951 return nullptr; 952 } 953 SUnit *SU; 954 if (PreRADirection == MISched::TopDown) { 955 SU = Top.pickOnlyChoice(); 956 if (!SU) { 957 SchedCandidate TopCand; 958 CandResult TopResult = 959 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 960 assert(TopResult != NoCand && "failed to find the first candidate"); 961 (void)TopResult; 962 SU = TopCand.SU; 963 } 964 IsTopNode = true; 965 } else if (PreRADirection == MISched::BottomUp) { 966 SU = Bot.pickOnlyChoice(); 967 if (!SU) { 968 SchedCandidate BotCand; 969 CandResult BotResult = 970 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 971 assert(BotResult != NoCand && "failed to find the first candidate"); 972 (void)BotResult; 973 SU = BotCand.SU; 974 } 975 IsTopNode = false; 976 } else { 977 SU = pickNodeBidrectional(IsTopNode); 978 } 979 if (SU->isTopReady()) 980 Top.removeReady(SU); 981 if (SU->isBottomReady()) 982 Bot.removeReady(SU); 983 984 LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 985 << " Scheduling instruction in cycle " 986 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" 987 << reportPackets() << ")\n"; 988 DAG->dumpNode(*SU)); 989 return SU; 990 } 991 992 /// Update the scheduler's state after scheduling a node. This is the same node 993 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs 994 /// to update it's state based on the current cycle before MachineSchedStrategy 995 /// does. 996 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { 997 if (IsTopNode) { 998 Top.bumpNode(SU); 999 SU->TopReadyCycle = Top.CurrCycle; 1000 } else { 1001 Bot.bumpNode(SU); 1002 SU->BotReadyCycle = Bot.CurrCycle; 1003 } 1004 } 1005