1 //===- VLIWMachineScheduler.cpp - VLIW-Focused Scheduling Pass ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // MachineScheduler schedules machine instructions after phi elimination. It 10 // preserves LiveIntervals so it can be invoked before register allocation. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/VLIWMachineScheduler.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/CodeGen/DFAPacketizer.h" 17 #include "llvm/CodeGen/MachineBasicBlock.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineLoopInfo.h" 21 #include "llvm/CodeGen/RegisterClassInfo.h" 22 #include "llvm/CodeGen/RegisterPressure.h" 23 #include "llvm/CodeGen/ScheduleDAG.h" 24 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/CodeGen/TargetOpcodes.h" 27 #include "llvm/CodeGen/TargetRegisterInfo.h" 28 #include "llvm/CodeGen/TargetSchedule.h" 29 #include "llvm/CodeGen/TargetSubtargetInfo.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/raw_ostream.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <iomanip> 36 #include <limits> 37 #include <memory> 38 #include <sstream> 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "machine-scheduler" 43 44 static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, 45 cl::ZeroOrMore, cl::init(false)); 46 47 static cl::opt<bool> UseNewerCandidate("use-newer-candidate", cl::Hidden, 48 cl::ZeroOrMore, cl::init(true)); 49 50 static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", 51 cl::Hidden, cl::ZeroOrMore, 52 cl::init(1)); 53 54 // Check if the scheduler should penalize instructions that are available to 55 // early due to a zero-latency dependence. 56 static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, 57 cl::ZeroOrMore, cl::init(true)); 58 59 // This value is used to determine if a register class is a high pressure set. 60 // We compute the maximum number of registers needed and divided by the total 61 // available. Then, we compare the result to this value. 62 static cl::opt<float> RPThreshold("vliw-misched-reg-pressure", cl::Hidden, 63 cl::init(0.75f), 64 cl::desc("High register pressure threhold.")); 65 66 VLIWResourceModel::VLIWResourceModel(const TargetSubtargetInfo &STI, 67 const TargetSchedModel *SM) 68 : TII(STI.getInstrInfo()), SchedModel(SM) { 69 ResourcesModel = createPacketizer(STI); 70 71 // This hard requirement could be relaxed, 72 // but for now do not let it proceed. 73 assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); 74 75 Packet.reserve(SchedModel->getIssueWidth()); 76 Packet.clear(); 77 ResourcesModel->clearResources(); 78 } 79 80 void VLIWResourceModel::reset() { 81 Packet.clear(); 82 ResourcesModel->clearResources(); 83 } 84 85 VLIWResourceModel::~VLIWResourceModel() { delete ResourcesModel; } 86 87 /// Return true if there is a dependence between SUd and SUu. 88 bool VLIWResourceModel::hasDependence(const SUnit *SUd, const SUnit *SUu) { 89 if (SUd->Succs.size() == 0) 90 return false; 91 92 for (const auto &S : SUd->Succs) { 93 // Since we do not add pseudos to packets, might as well 94 // ignore order dependencies. 95 if (S.isCtrl()) 96 continue; 97 98 if (S.getSUnit() == SUu && S.getLatency() > 0) 99 return true; 100 } 101 return false; 102 } 103 104 /// Check if scheduling of this SU is possible 105 /// in the current packet. 106 /// It is _not_ precise (statefull), it is more like 107 /// another heuristic. Many corner cases are figured 108 /// empirically. 109 bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { 110 if (!SU || !SU->getInstr()) 111 return false; 112 113 // First see if the pipeline could receive this instruction 114 // in the current cycle. 115 switch (SU->getInstr()->getOpcode()) { 116 default: 117 if (!ResourcesModel->canReserveResources(*SU->getInstr())) 118 return false; 119 break; 120 case TargetOpcode::EXTRACT_SUBREG: 121 case TargetOpcode::INSERT_SUBREG: 122 case TargetOpcode::SUBREG_TO_REG: 123 case TargetOpcode::REG_SEQUENCE: 124 case TargetOpcode::IMPLICIT_DEF: 125 case TargetOpcode::COPY: 126 case TargetOpcode::INLINEASM: 127 case TargetOpcode::INLINEASM_BR: 128 break; 129 } 130 131 // Now see if there are no other dependencies to instructions already 132 // in the packet. 133 if (IsTop) { 134 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 135 if (hasDependence(Packet[i], SU)) 136 return false; 137 } else { 138 for (unsigned i = 0, e = Packet.size(); i != e; ++i) 139 if (hasDependence(SU, Packet[i])) 140 return false; 141 } 142 return true; 143 } 144 145 /// Keep track of available resources. 146 bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { 147 bool startNewCycle = false; 148 // Artificially reset state. 149 if (!SU) { 150 reset(); 151 TotalPackets++; 152 return false; 153 } 154 // If this SU does not fit in the packet or the packet is now full 155 // start a new one. 156 if (!isResourceAvailable(SU, IsTop) || 157 Packet.size() >= SchedModel->getIssueWidth()) { 158 reset(); 159 TotalPackets++; 160 startNewCycle = true; 161 } 162 163 switch (SU->getInstr()->getOpcode()) { 164 default: 165 ResourcesModel->reserveResources(*SU->getInstr()); 166 break; 167 case TargetOpcode::EXTRACT_SUBREG: 168 case TargetOpcode::INSERT_SUBREG: 169 case TargetOpcode::SUBREG_TO_REG: 170 case TargetOpcode::REG_SEQUENCE: 171 case TargetOpcode::IMPLICIT_DEF: 172 case TargetOpcode::KILL: 173 case TargetOpcode::CFI_INSTRUCTION: 174 case TargetOpcode::EH_LABEL: 175 case TargetOpcode::COPY: 176 case TargetOpcode::INLINEASM: 177 case TargetOpcode::INLINEASM_BR: 178 break; 179 } 180 Packet.push_back(SU); 181 182 #ifndef NDEBUG 183 LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); 184 for (unsigned i = 0, e = Packet.size(); i != e; ++i) { 185 LLVM_DEBUG(dbgs() << "\t[" << i << "] SU("); 186 LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); 187 LLVM_DEBUG(Packet[i]->getInstr()->dump()); 188 } 189 #endif 190 191 return startNewCycle; 192 } 193 194 DFAPacketizer * 195 VLIWResourceModel::createPacketizer(const TargetSubtargetInfo &STI) const { 196 return STI.getInstrInfo()->CreateTargetScheduleState(STI); 197 } 198 199 /// schedule - Called back from MachineScheduler::runOnMachineFunction 200 /// after setting up the current scheduling region. [RegionBegin, RegionEnd) 201 /// only includes instructions that have DAG nodes, not scheduling boundaries. 202 void VLIWMachineScheduler::schedule() { 203 LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " 204 << printMBBReference(*BB) << " " << BB->getName() 205 << " in_func " << BB->getParent()->getName() 206 << " at loop depth " << MLI->getLoopDepth(BB) << " \n"); 207 208 buildDAGWithRegPressure(); 209 210 Topo.InitDAGTopologicalSorting(); 211 212 // Postprocess the DAG to add platform-specific artificial dependencies. 213 postprocessDAG(); 214 215 SmallVector<SUnit *, 8> TopRoots, BotRoots; 216 findRootsAndBiasEdges(TopRoots, BotRoots); 217 218 // Initialize the strategy before modifying the DAG. 219 SchedImpl->initialize(this); 220 221 LLVM_DEBUG({ 222 unsigned maxH = 0; 223 for (const SUnit &SU : SUnits) 224 if (SU.getHeight() > maxH) 225 maxH = SU.getHeight(); 226 dbgs() << "Max Height " << maxH << "\n"; 227 }); 228 LLVM_DEBUG({ 229 unsigned maxD = 0; 230 for (const SUnit &SU : SUnits) 231 if (SU.getDepth() > maxD) 232 maxD = SU.getDepth(); 233 dbgs() << "Max Depth " << maxD << "\n"; 234 }); 235 LLVM_DEBUG(dump()); 236 if (ViewMISchedDAGs) 237 viewGraph(); 238 239 initQueues(TopRoots, BotRoots); 240 241 bool IsTopNode = false; 242 while (true) { 243 LLVM_DEBUG( 244 dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); 245 SUnit *SU = SchedImpl->pickNode(IsTopNode); 246 if (!SU) 247 break; 248 249 if (!checkSchedLimit()) 250 break; 251 252 scheduleMI(SU, IsTopNode); 253 254 // Notify the scheduling strategy after updating the DAG. 255 SchedImpl->schedNode(SU, IsTopNode); 256 257 updateQueues(SU, IsTopNode); 258 } 259 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); 260 261 placeDebugValues(); 262 263 LLVM_DEBUG({ 264 dbgs() << "*** Final schedule for " 265 << printMBBReference(*begin()->getParent()) << " ***\n"; 266 dumpSchedule(); 267 dbgs() << '\n'; 268 }); 269 } 270 271 void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { 272 DAG = static_cast<VLIWMachineScheduler *>(dag); 273 SchedModel = DAG->getSchedModel(); 274 275 Top.init(DAG, SchedModel); 276 Bot.init(DAG, SchedModel); 277 278 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or 279 // are disabled, then these HazardRecs will be disabled. 280 const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries(); 281 const TargetSubtargetInfo &STI = DAG->MF.getSubtarget(); 282 const TargetInstrInfo *TII = STI.getInstrInfo(); 283 delete Top.HazardRec; 284 delete Bot.HazardRec; 285 Top.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 286 Bot.HazardRec = TII->CreateTargetMIHazardRecognizer(Itin, DAG); 287 288 delete Top.ResourceModel; 289 delete Bot.ResourceModel; 290 Top.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); 291 Bot.ResourceModel = createVLIWResourceModel(STI, DAG->getSchedModel()); 292 293 const std::vector<unsigned> &MaxPressure = 294 DAG->getRegPressure().MaxSetPressure; 295 HighPressureSets.assign(MaxPressure.size(), false); 296 for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { 297 unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); 298 HighPressureSets[i] = 299 ((float)MaxPressure[i] > ((float)Limit * RPThreshold)); 300 } 301 302 assert((!ForceTopDown || !ForceBottomUp) && 303 "-misched-topdown incompatible with -misched-bottomup"); 304 } 305 306 VLIWResourceModel *ConvergingVLIWScheduler::createVLIWResourceModel( 307 const TargetSubtargetInfo &STI, const TargetSchedModel *SchedModel) const { 308 return new VLIWResourceModel(STI, SchedModel); 309 } 310 311 void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) { 312 for (const SDep &PI : SU->Preds) { 313 unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle; 314 unsigned MinLatency = PI.getLatency(); 315 #ifndef NDEBUG 316 Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); 317 #endif 318 if (SU->TopReadyCycle < PredReadyCycle + MinLatency) 319 SU->TopReadyCycle = PredReadyCycle + MinLatency; 320 } 321 322 if (!SU->isScheduled) 323 Top.releaseNode(SU, SU->TopReadyCycle); 324 } 325 326 void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) { 327 assert(SU->getInstr() && "Scheduled SUnit must have instr"); 328 329 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; 330 ++I) { 331 unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; 332 unsigned MinLatency = I->getLatency(); 333 #ifndef NDEBUG 334 Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); 335 #endif 336 if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) 337 SU->BotReadyCycle = SuccReadyCycle + MinLatency; 338 } 339 340 if (!SU->isScheduled) 341 Bot.releaseNode(SU, SU->BotReadyCycle); 342 } 343 344 ConvergingVLIWScheduler::VLIWSchedBoundary::~VLIWSchedBoundary() { 345 delete ResourceModel; 346 delete HazardRec; 347 } 348 349 /// Does this SU have a hazard within the current instruction group. 350 /// 351 /// The scheduler supports two modes of hazard recognition. The first is the 352 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that 353 /// supports highly complicated in-order reservation tables 354 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic. 355 /// 356 /// The second is a streamlined mechanism that checks for hazards based on 357 /// simple counters that the scheduler itself maintains. It explicitly checks 358 /// for instruction dispatch limitations, including the number of micro-ops that 359 /// can dispatch per cycle. 360 /// 361 /// TODO: Also check whether the SU must start a new group. 362 bool ConvergingVLIWScheduler::VLIWSchedBoundary::checkHazard(SUnit *SU) { 363 if (HazardRec->isEnabled()) 364 return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; 365 366 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); 367 if (IssueCount + uops > SchedModel->getIssueWidth()) 368 return true; 369 370 return false; 371 } 372 373 void ConvergingVLIWScheduler::VLIWSchedBoundary::releaseNode( 374 SUnit *SU, unsigned ReadyCycle) { 375 if (ReadyCycle < MinReadyCycle) 376 MinReadyCycle = ReadyCycle; 377 378 // Check for interlocks first. For the purpose of other heuristics, an 379 // instruction that cannot issue appears as if it's not in the ReadyQueue. 380 if (ReadyCycle > CurrCycle || checkHazard(SU)) 381 382 Pending.push(SU); 383 else 384 Available.push(SU); 385 } 386 387 /// Move the boundary of scheduled code by one cycle. 388 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { 389 unsigned Width = SchedModel->getIssueWidth(); 390 IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; 391 392 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() && 393 "MinReadyCycle uninitialized"); 394 unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); 395 396 if (!HazardRec->isEnabled()) { 397 // Bypass HazardRec virtual calls. 398 CurrCycle = NextCycle; 399 } else { 400 // Bypass getHazardType calls in case of long latency. 401 for (; CurrCycle != NextCycle; ++CurrCycle) { 402 if (isTop()) 403 HazardRec->AdvanceCycle(); 404 else 405 HazardRec->RecedeCycle(); 406 } 407 } 408 CheckPending = true; 409 410 LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " 411 << CurrCycle << '\n'); 412 } 413 414 /// Move the boundary of scheduled code by one SUnit. 415 void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { 416 bool startNewCycle = false; 417 418 // Update the reservation table. 419 if (HazardRec->isEnabled()) { 420 if (!isTop() && SU->isCall) { 421 // Calls are scheduled with their preceding instructions. For bottom-up 422 // scheduling, clear the pipeline state before emitting. 423 HazardRec->Reset(); 424 } 425 HazardRec->EmitInstruction(SU); 426 } 427 428 // Update DFA model. 429 startNewCycle = ResourceModel->reserveResources(SU, isTop()); 430 431 // Check the instruction group dispatch limit. 432 // TODO: Check if this SU must end a dispatch group. 433 IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); 434 if (startNewCycle) { 435 LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); 436 bumpCycle(); 437 } else 438 LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " 439 << CurrCycle << '\n'); 440 } 441 442 /// Release pending ready nodes in to the available queue. This makes them 443 /// visible to heuristics. 444 void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { 445 // If the available queue is empty, it is safe to reset MinReadyCycle. 446 if (Available.empty()) 447 MinReadyCycle = std::numeric_limits<unsigned>::max(); 448 449 // Check to see if any of the pending instructions are ready to issue. If 450 // so, add them to the available queue. 451 for (unsigned i = 0, e = Pending.size(); i != e; ++i) { 452 SUnit *SU = *(Pending.begin() + i); 453 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; 454 455 if (ReadyCycle < MinReadyCycle) 456 MinReadyCycle = ReadyCycle; 457 458 if (ReadyCycle > CurrCycle) 459 continue; 460 461 if (checkHazard(SU)) 462 continue; 463 464 Available.push(SU); 465 Pending.remove(Pending.begin() + i); 466 --i; 467 --e; 468 } 469 CheckPending = false; 470 } 471 472 /// Remove SU from the ready set for this boundary. 473 void ConvergingVLIWScheduler::VLIWSchedBoundary::removeReady(SUnit *SU) { 474 if (Available.isInQueue(SU)) 475 Available.remove(Available.find(SU)); 476 else { 477 assert(Pending.isInQueue(SU) && "bad ready count"); 478 Pending.remove(Pending.find(SU)); 479 } 480 } 481 482 /// If this queue only has one ready candidate, return it. As a side effect, 483 /// advance the cycle until at least one node is ready. If multiple instructions 484 /// are ready, return NULL. 485 SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { 486 if (CheckPending) 487 releasePending(); 488 489 auto AdvanceCycle = [this]() { 490 if (Available.empty()) 491 return true; 492 if (Available.size() == 1 && Pending.size() > 0) 493 return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) || 494 getWeakLeft(*Available.begin(), isTop()) != 0; 495 return false; 496 }; 497 for (unsigned i = 0; AdvanceCycle(); ++i) { 498 assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && 499 "permanent hazard"); 500 (void)i; 501 ResourceModel->reserveResources(nullptr, isTop()); 502 bumpCycle(); 503 releasePending(); 504 } 505 if (Available.size() == 1) 506 return *Available.begin(); 507 return nullptr; 508 } 509 510 #ifndef NDEBUG 511 void ConvergingVLIWScheduler::traceCandidate(const char *Label, 512 const ReadyQueue &Q, SUnit *SU, 513 int Cost, PressureChange P) { 514 dbgs() << Label << " " << Q.getName() << " "; 515 if (P.isValid()) 516 dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":" 517 << P.getUnitInc() << " "; 518 else 519 dbgs() << " "; 520 dbgs() << "cost(" << Cost << ")\t"; 521 DAG->dumpNode(*SU); 522 } 523 524 // Very detailed queue dump, to be used with higher verbosity levels. 525 void ConvergingVLIWScheduler::readyQueueVerboseDump( 526 const RegPressureTracker &RPTracker, SchedCandidate &Candidate, 527 ReadyQueue &Q) { 528 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 529 530 dbgs() << ">>> " << Q.getName() << "\n"; 531 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 532 RegPressureDelta RPDelta; 533 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 534 DAG->getRegionCriticalPSets(), 535 DAG->getRegPressure().MaxSetPressure); 536 std::stringstream dbgstr; 537 dbgstr << "SU(" << std::setw(3) << (*I)->NodeNum << ")"; 538 dbgs() << dbgstr.str(); 539 SchedulingCost(Q, *I, Candidate, RPDelta, true); 540 dbgs() << "\t"; 541 (*I)->getInstr()->dump(); 542 } 543 dbgs() << "\n"; 544 } 545 #endif 546 547 /// isSingleUnscheduledPred - If SU2 is the only unscheduled predecessor 548 /// of SU, return true (we may have duplicates) 549 static inline bool isSingleUnscheduledPred(SUnit *SU, SUnit *SU2) { 550 if (SU->NumPredsLeft == 0) 551 return false; 552 553 for (auto &Pred : SU->Preds) { 554 // We found an available, but not scheduled, predecessor. 555 if (!Pred.getSUnit()->isScheduled && (Pred.getSUnit() != SU2)) 556 return false; 557 } 558 559 return true; 560 } 561 562 /// isSingleUnscheduledSucc - If SU2 is the only unscheduled successor 563 /// of SU, return true (we may have duplicates) 564 static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { 565 if (SU->NumSuccsLeft == 0) 566 return false; 567 568 for (auto &Succ : SU->Succs) { 569 // We found an available, but not scheduled, successor. 570 if (!Succ.getSUnit()->isScheduled && (Succ.getSUnit() != SU2)) 571 return false; 572 } 573 return true; 574 } 575 576 /// Check if the instruction changes the register pressure of a register in the 577 /// high pressure set. The function returns a negative value if the pressure 578 /// decreases and a positive value is the pressure increases. If the instruction 579 /// doesn't use a high pressure register or doesn't change the register 580 /// pressure, then return 0. 581 int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { 582 PressureDiff &PD = DAG->getPressureDiff(SU); 583 for (auto &P : PD) { 584 if (!P.isValid()) 585 continue; 586 // The pressure differences are computed bottom-up, so the comparision for 587 // an increase is positive in the bottom direction, but negative in the 588 // top-down direction. 589 if (HighPressureSets[P.getPSet()]) 590 return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); 591 } 592 return 0; 593 } 594 595 /// Single point to compute overall scheduling cost. 596 /// TODO: More heuristics will be used soon. 597 int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, 598 SchedCandidate &Candidate, 599 RegPressureDelta &Delta, 600 bool verbose) { 601 // Initial trivial priority. 602 int ResCount = 1; 603 604 // Do not waste time on a node that is already scheduled. 605 if (!SU || SU->isScheduled) 606 return ResCount; 607 608 LLVM_DEBUG(if (verbose) dbgs() 609 << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); 610 // Forced priority is high. 611 if (SU->isScheduleHigh) { 612 ResCount += PriorityOne; 613 LLVM_DEBUG(dbgs() << "H|"); 614 } 615 616 unsigned IsAvailableAmt = 0; 617 // Critical path first. 618 if (Q.getID() == TopQID) { 619 if (Top.isLatencyBound(SU)) { 620 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 621 ResCount += (SU->getHeight() * ScaleTwo); 622 } 623 624 LLVM_DEBUG(if (verbose) { 625 std::stringstream dbgstr; 626 dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; 627 dbgs() << dbgstr.str(); 628 }); 629 630 // If resources are available for it, multiply the 631 // chance of scheduling. 632 if (Top.ResourceModel->isResourceAvailable(SU, true)) { 633 IsAvailableAmt = (PriorityTwo + PriorityThree); 634 ResCount += IsAvailableAmt; 635 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 636 } else 637 LLVM_DEBUG(if (verbose) dbgs() << " |"); 638 } else { 639 if (Bot.isLatencyBound(SU)) { 640 LLVM_DEBUG(if (verbose) dbgs() << "LB|"); 641 ResCount += (SU->getDepth() * ScaleTwo); 642 } 643 644 LLVM_DEBUG(if (verbose) { 645 std::stringstream dbgstr; 646 dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; 647 dbgs() << dbgstr.str(); 648 }); 649 650 // If resources are available for it, multiply the 651 // chance of scheduling. 652 if (Bot.ResourceModel->isResourceAvailable(SU, false)) { 653 IsAvailableAmt = (PriorityTwo + PriorityThree); 654 ResCount += IsAvailableAmt; 655 LLVM_DEBUG(if (verbose) dbgs() << "A|"); 656 } else 657 LLVM_DEBUG(if (verbose) dbgs() << " |"); 658 } 659 660 unsigned NumNodesBlocking = 0; 661 if (Q.getID() == TopQID) { 662 // How many SUs does it block from scheduling? 663 // Look at all of the successors of this node. 664 // Count the number of nodes that 665 // this node is the sole unscheduled node for. 666 if (Top.isLatencyBound(SU)) 667 for (const SDep &SI : SU->Succs) 668 if (isSingleUnscheduledPred(SI.getSUnit(), SU)) 669 ++NumNodesBlocking; 670 } else { 671 // How many unscheduled predecessors block this node? 672 if (Bot.isLatencyBound(SU)) 673 for (const SDep &PI : SU->Preds) 674 if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) 675 ++NumNodesBlocking; 676 } 677 ResCount += (NumNodesBlocking * ScaleTwo); 678 679 LLVM_DEBUG(if (verbose) { 680 std::stringstream dbgstr; 681 dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; 682 dbgs() << dbgstr.str(); 683 }); 684 685 // Factor in reg pressure as a heuristic. 686 if (!IgnoreBBRegPressure) { 687 // Decrease priority by the amount that register pressure exceeds the limit. 688 ResCount -= (Delta.Excess.getUnitInc() * PriorityOne); 689 // Decrease priority if register pressure exceeds the limit. 690 ResCount -= (Delta.CriticalMax.getUnitInc() * PriorityOne); 691 // Decrease priority slightly if register pressure would increase over the 692 // current maximum. 693 ResCount -= (Delta.CurrentMax.getUnitInc() * PriorityTwo); 694 // If there are register pressure issues, then we remove the value added for 695 // the instruction being available. The rationale is that we really don't 696 // want to schedule an instruction that causes a spill. 697 if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 && 698 (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || 699 Delta.CurrentMax.getUnitInc())) 700 ResCount -= IsAvailableAmt; 701 LLVM_DEBUG(if (verbose) { 702 dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" 703 << Delta.CriticalMax.getUnitInc() << "/" 704 << Delta.CurrentMax.getUnitInc() << ")|"; 705 }); 706 } 707 708 // Give preference to a zero latency instruction if the dependent 709 // instruction is in the current packet. 710 if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) { 711 for (const SDep &PI : SU->Preds) { 712 if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && 713 PI.getLatency() == 0 && 714 Top.ResourceModel->isInPacket(PI.getSUnit())) { 715 ResCount += PriorityThree; 716 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 717 } 718 } 719 } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) { 720 for (const SDep &SI : SU->Succs) { 721 if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && 722 SI.getLatency() == 0 && 723 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 724 ResCount += PriorityThree; 725 LLVM_DEBUG(if (verbose) dbgs() << "Z|"); 726 } 727 } 728 } 729 730 // If the instruction has a non-zero latency dependence with an instruction in 731 // the current packet, then it should not be scheduled yet. The case occurs 732 // when the dependent instruction is scheduled in a new packet, so the 733 // scheduler updates the current cycle and pending instructions become 734 // available. 735 if (CheckEarlyAvail) { 736 if (Q.getID() == TopQID) { 737 for (const auto &PI : SU->Preds) { 738 if (PI.getLatency() > 0 && 739 Top.ResourceModel->isInPacket(PI.getSUnit())) { 740 ResCount -= PriorityOne; 741 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 742 } 743 } 744 } else { 745 for (const auto &SI : SU->Succs) { 746 if (SI.getLatency() > 0 && 747 Bot.ResourceModel->isInPacket(SI.getSUnit())) { 748 ResCount -= PriorityOne; 749 LLVM_DEBUG(if (verbose) dbgs() << "D|"); 750 } 751 } 752 } 753 } 754 755 LLVM_DEBUG(if (verbose) { 756 std::stringstream dbgstr; 757 dbgstr << "Total " << std::setw(4) << ResCount << ")"; 758 dbgs() << dbgstr.str(); 759 }); 760 761 return ResCount; 762 } 763 764 /// Pick the best candidate from the top queue. 765 /// 766 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during 767 /// DAG building. To adjust for the current scheduling location we need to 768 /// maintain the number of vreg uses remaining to be top-scheduled. 769 ConvergingVLIWScheduler::CandResult 770 ConvergingVLIWScheduler::pickNodeFromQueue(VLIWSchedBoundary &Zone, 771 const RegPressureTracker &RPTracker, 772 SchedCandidate &Candidate) { 773 ReadyQueue &Q = Zone.Available; 774 LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) 775 readyQueueVerboseDump(RPTracker, Candidate, Q); 776 else Q.dump();); 777 778 // getMaxPressureDelta temporarily modifies the tracker. 779 RegPressureTracker &TempTracker = const_cast<RegPressureTracker &>(RPTracker); 780 781 // BestSU remains NULL if no top candidates beat the best existing candidate. 782 CandResult FoundCandidate = NoCand; 783 for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { 784 RegPressureDelta RPDelta; 785 TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta, 786 DAG->getRegionCriticalPSets(), 787 DAG->getRegPressure().MaxSetPressure); 788 789 int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false); 790 791 // Initialize the candidate if needed. 792 if (!Candidate.SU) { 793 LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); 794 Candidate.SU = *I; 795 Candidate.RPDelta = RPDelta; 796 Candidate.SCost = CurrentCost; 797 FoundCandidate = NodeOrder; 798 continue; 799 } 800 801 // Choose node order for negative cost candidates. There is no good 802 // candidate in this case. 803 if (CurrentCost < 0 && Candidate.SCost < 0) { 804 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || 805 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 806 LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost)); 807 Candidate.SU = *I; 808 Candidate.RPDelta = RPDelta; 809 Candidate.SCost = CurrentCost; 810 FoundCandidate = NodeOrder; 811 } 812 continue; 813 } 814 815 // Best cost. 816 if (CurrentCost > Candidate.SCost) { 817 LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); 818 Candidate.SU = *I; 819 Candidate.RPDelta = RPDelta; 820 Candidate.SCost = CurrentCost; 821 FoundCandidate = BestCost; 822 continue; 823 } 824 825 // Choose an instruction that does not depend on an artificial edge. 826 unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID)); 827 unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID)); 828 if (CurrWeak != CandWeak) { 829 if (CurrWeak < CandWeak) { 830 LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost)); 831 Candidate.SU = *I; 832 Candidate.RPDelta = RPDelta; 833 Candidate.SCost = CurrentCost; 834 FoundCandidate = Weak; 835 } 836 continue; 837 } 838 839 if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) { 840 unsigned CurrSize, CandSize; 841 if (Q.getID() == TopQID) { 842 CurrSize = (*I)->Succs.size(); 843 CandSize = Candidate.SU->Succs.size(); 844 } else { 845 CurrSize = (*I)->Preds.size(); 846 CandSize = Candidate.SU->Preds.size(); 847 } 848 if (CurrSize > CandSize) { 849 LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); 850 Candidate.SU = *I; 851 Candidate.RPDelta = RPDelta; 852 Candidate.SCost = CurrentCost; 853 FoundCandidate = BestCost; 854 } 855 // Keep the old candidate if it's a better candidate. That is, don't use 856 // the subsequent tie breaker. 857 if (CurrSize != CandSize) 858 continue; 859 } 860 861 // Tie breaker. 862 // To avoid scheduling indeterminism, we need a tie breaker 863 // for the case when cost is identical for two nodes. 864 if (UseNewerCandidate && CurrentCost == Candidate.SCost) { 865 if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) || 866 (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { 867 LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost)); 868 Candidate.SU = *I; 869 Candidate.RPDelta = RPDelta; 870 Candidate.SCost = CurrentCost; 871 FoundCandidate = NodeOrder; 872 continue; 873 } 874 } 875 876 // Fall through to original instruction order. 877 // Only consider node order if Candidate was chosen from this Q. 878 if (FoundCandidate == NoCand) 879 continue; 880 } 881 return FoundCandidate; 882 } 883 884 /// Pick the best candidate node from either the top or bottom queue. 885 SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { 886 // Schedule as far as possible in the direction of no choice. This is most 887 // efficient, but also provides the best heuristics for CriticalPSets. 888 if (SUnit *SU = Bot.pickOnlyChoice()) { 889 LLVM_DEBUG(dbgs() << "Picked only Bottom\n"); 890 IsTopNode = false; 891 return SU; 892 } 893 if (SUnit *SU = Top.pickOnlyChoice()) { 894 LLVM_DEBUG(dbgs() << "Picked only Top\n"); 895 IsTopNode = true; 896 return SU; 897 } 898 SchedCandidate BotCand; 899 // Prefer bottom scheduling when heuristics are silent. 900 CandResult BotResult = 901 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 902 assert(BotResult != NoCand && "failed to find the first candidate"); 903 904 // If either Q has a single candidate that provides the least increase in 905 // Excess pressure, we can immediately schedule from that Q. 906 // 907 // RegionCriticalPSets summarizes the pressure within the scheduled region and 908 // affects picking from either Q. If scheduling in one direction must 909 // increase pressure for one of the excess PSets, then schedule in that 910 // direction first to provide more freedom in the other direction. 911 if (BotResult == SingleExcess || BotResult == SingleCritical) { 912 LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n"); 913 IsTopNode = false; 914 return BotCand.SU; 915 } 916 // Check if the top Q has a better candidate. 917 SchedCandidate TopCand; 918 CandResult TopResult = 919 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 920 assert(TopResult != NoCand && "failed to find the first candidate"); 921 922 if (TopResult == SingleExcess || TopResult == SingleCritical) { 923 LLVM_DEBUG(dbgs() << "Prefered Top Node\n"); 924 IsTopNode = true; 925 return TopCand.SU; 926 } 927 // If either Q has a single candidate that minimizes pressure above the 928 // original region's pressure pick it. 929 if (BotResult == SingleMax) { 930 LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); 931 IsTopNode = false; 932 return BotCand.SU; 933 } 934 if (TopResult == SingleMax) { 935 LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); 936 IsTopNode = true; 937 return TopCand.SU; 938 } 939 if (TopCand.SCost > BotCand.SCost) { 940 LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n"); 941 IsTopNode = true; 942 return TopCand.SU; 943 } 944 // Otherwise prefer the bottom candidate in node order. 945 LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n"); 946 IsTopNode = false; 947 return BotCand.SU; 948 } 949 950 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. 951 SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { 952 if (DAG->top() == DAG->bottom()) { 953 assert(Top.Available.empty() && Top.Pending.empty() && 954 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); 955 return nullptr; 956 } 957 SUnit *SU; 958 if (ForceTopDown) { 959 SU = Top.pickOnlyChoice(); 960 if (!SU) { 961 SchedCandidate TopCand; 962 CandResult TopResult = 963 pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); 964 assert(TopResult != NoCand && "failed to find the first candidate"); 965 (void)TopResult; 966 SU = TopCand.SU; 967 } 968 IsTopNode = true; 969 } else if (ForceBottomUp) { 970 SU = Bot.pickOnlyChoice(); 971 if (!SU) { 972 SchedCandidate BotCand; 973 CandResult BotResult = 974 pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); 975 assert(BotResult != NoCand && "failed to find the first candidate"); 976 (void)BotResult; 977 SU = BotCand.SU; 978 } 979 IsTopNode = false; 980 } else { 981 SU = pickNodeBidrectional(IsTopNode); 982 } 983 if (SU->isTopReady()) 984 Top.removeReady(SU); 985 if (SU->isBottomReady()) 986 Bot.removeReady(SU); 987 988 LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") 989 << " Scheduling instruction in cycle " 990 << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" 991 << reportPackets() << ")\n"; 992 DAG->dumpNode(*SU)); 993 return SU; 994 } 995 996 /// Update the scheduler's state after scheduling a node. This is the same node 997 /// that was just returned by pickNode(). However, VLIWMachineScheduler needs 998 /// to update it's state based on the current cycle before MachineSchedStrategy 999 /// does. 1000 void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { 1001 if (IsTopNode) { 1002 Top.bumpNode(SU); 1003 SU->TopReadyCycle = Top.CurrCycle; 1004 } else { 1005 Bot.bumpNode(SU); 1006 SU->BotReadyCycle = Bot.CurrCycle; 1007 } 1008 } 1009