Lines Matching +full:max +full:- +full:reason

1 //===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
18 /// Generally, the reason for having multiple scheduling stages is to account
19 /// for the kernel-wide effect of register usage on occupancy. Usually, only a
24 //===----------------------------------------------------------------------===//
31 #define DEBUG_TYPE "machine-scheduler"
36 "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,
42 "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,
48 "amdgpu-schedule-metric-bias", cl::Hidden,
55 RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,
57 "bound (amdgpu-membound-threshold), or "
58 "Wave Limited (amdgpu-limit-wave-threshold)."),
70 MF = &DAG->MF;
72 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
75 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
77 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
79 SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
103 VGPRBudget = std::max(VGPRBudget, Granule);
108 SGPRCriticalLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRCriticalLimit);
109 VGPRCriticalLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRCriticalLimit);
110 SGPRExcessLimit -= std::min(SGPRLimitBias + ErrorMargin, SGPRExcessLimit);
111 VGPRExcessLimit -= std::min(VGPRLimitBias + ErrorMargin, VGPRExcessLimit);
124 /// - Explicit physical register operands
125 /// - Subregister definitions
141 for (const auto &Op : SU.getInstr()->operands()) {
156 // the tracker, so we need to pass those function a non-const copy.
159 TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
161 TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
173 if (!DAG->isTrackingPressure())
198 for (const auto &Diff : DAG->getPressureDiff(SU)) {
201 // PressureDiffs is always bottom-up so if we're working top-down we need
204 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
242 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
254 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
260 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
268 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
269 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
296 if (DAG->isTrackingPressure()) {
309 if (TryCand.Reason != NoCand) {
332 // Set the bottom-up policy based on the state of the current bottom zone and
336 // Set the top-down policy based on the state of the current top zone and
343 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
346 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand,
348 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
355 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand,
358 "Last pick result should correspond to re-picking right now");
365 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
368 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand,
370 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
377 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand,
380 "Last pick result should correspond to re-picking right now");
389 TopCand.Reason = NoCand;
391 if (TopCand.Reason != NoCand) {
403 if (DAG->top() == DAG->bottom()) {
415 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand,
417 assert(TopCand.Reason != NoCand && "failed to find a candidate");
426 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand,
428 assert(BotCand.Reason != NoCand && "failed to find a candidate");
435 } while (SU->isScheduled);
437 if (SU->isTopReady())
439 if (SU->isBottomReady())
442 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
443 << *SU->getInstr());
491 TryCand.Reason = NodeOrder;
496 if (DAG->isTrackingPressure() &&
498 RegExcess, TRI, DAG->MF))
499 return TryCand.Reason != NoCand;
504 return TryCand.Reason != NoCand;
509 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
510 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
511 return TryCand.Reason != NoCand;
517 return TryCand.Reason != NoCand;
521 return TryCand.Reason != NoCand;
525 return TryCand.Reason != NoCand;
530 return TryCand.Reason != NoCand;
536 // This is a best effort to set things up for a post-RA pass. Optimizations
540 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
542 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
545 return TryCand.Reason != NoCand;
547 // Avoid increasing the max critical pressure in the scheduled region.
548 if (DAG->isTrackingPressure() &&
550 TryCand, Cand, RegCritical, TRI, DAG->MF))
551 return TryCand.Reason != NoCand;
553 // Avoid increasing the max pressure of the entire region.
554 if (DAG->isTrackingPressure() &&
556 Cand, RegMax, TRI, DAG->MF))
557 return TryCand.Reason != NoCand;
561 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
562 (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
563 TryCand.Reason = NodeOrder;
620 // If the block has the only successor then live-ins of that successor are
621 // live-outs of the current block. We can reuse calculated live set if the
626 // a live-out register. Workaround that by sticking to one-to-one relationship
629 if (MBB->succ_size() == 1) {
630 auto *Candidate = *MBB->succ_begin();
631 if (!Candidate->empty() && Candidate->pred_size() == 1) {
632 SlotIndexes *Ind = LIS->getSlotIndexes();
633 if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))
641 if (Regions[CurRegion].first->getParent() != MBB)
643 --CurRegion;
645 auto I = MBB->begin();
650 auto LiveIn = std::move(LiveInIt->second);
651 RPTracker.reset(*MBB->begin(), &LiveIn);
672 if (CurRegion-- == RegionIdx)
680 if (I != MBB->end()) {
682 RPTracker.advance(MBB->end());
695 auto *BB = I->first->getParent();
697 auto *MI = &*skipDebugInstructionsForward(I->first, I->second);
701 } while (I != E && I->first->getParent() == BB);
735 if (!Stage->initGCNSchedStage())
742 if (!Stage->initGCNRegion()) {
743 Stage->advanceRegion();
749 Stage->finalizeGCNRegion();
752 Stage->finalizeGCNSchedStage();
760 OS << "Max Occupancy Initial Schedule";
769 OS << "Pre-RA Rematerialize";
772 OS << "Max ILP Initial Schedule";
894 if (DAG.RegionBegin->getParent() != CurrentMBB)
906 << " " << CurrentMBB->getName()
908 if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;
931 dbgs() << "Pressure before scheduling:\nRegion live-ins:"
933 << "Region live-in pressure: "
988 CurrentMBB = DAG.RegionBegin->getParent();
1004 // reason that the original schedule is better.
1045 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1063 // The maximum number of arch VGPR on non-unified register file, or the
1066 // The maximum number of arch VGPR for both unified and non-unified register
1098 MachineInstr *DefMI = D.getSUnit()->getInstr();
1100 unsigned DefReady = ReadyCycles[DAG.getSUnit(DefMI)->NodeNum];
1101 ReadyCycle = std::max(ReadyCycle, DefReady + Latency);
1120 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1128 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
1142 const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1149 SumBubbles += ReadyCycle - CurrCycle;
1175 const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1185 SumBubbles += ReadyCycle - CurrCycle;
1187 ReadyCyclesSorted.insert(std::make_pair(SU->getInstr(), ReadyCycle));
1313 if (MI->isDebugInstr()) {
1318 if (MI->getIterator() != DAG.RegionEnd) {
1319 DAG.BB->remove(MI);
1320 DAG.BB->insert(DAG.RegionEnd, MI);
1321 if (!MI->isDebugInstr())
1322 DAG.LIS->handleMove(*MI, true);
1325 // Reset read-undef flags and update them later.
1326 for (auto &Op : MI->all_defs())
1330 if (!MI->isDebugInstr()) {
1332 // Adjust liveness and add missing dead+read-undef flags.
1333 SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(*MI).getRegSlot();
1336 // Adjust for missing dead-def flags.
1340 DAG.RegionEnd = MI->getIterator();
1348 while (SkippedDebugInstr-- > 0)
1354 DAG.RegionBegin = Unsched.front()->getIterator();
1355 if (DAG.RegionBegin->isDebugInstr()) {
1357 if (MI->isDebugInstr())
1359 DAG.RegionBegin = MI->getIterator();
1375 if (!DAG.LIS->hasInterval(Reg))
1379 if (!SRI->isVGPRClass(DAG.MRI.getRegClass(Reg)) ||
1384 MachineInstr *Def = Op->getParent();
1385 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
1389 if (Def->getParent() == UseI->getParent())
1393 // live-through or used inside regions at MinOccupancy. This means that the
1394 // register must be in the live-in set for the region.
1398 if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1404 // Collect regions with rematerializable reg as live-in to avoid
1429 // Collect only regions that has a rematerializable def as a live-in.
1434 // Make copies of register pressure and live-ins cache that will be updated
1472 Register DefReg = Def->getOperand(0).getReg();
1476 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1489 Register Reg = Def->getOperand(0).getReg();
1494 TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
1495 Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
1497 LIS->InsertMachineInstrInMaps(*NewMI);
1498 LIS->removeInterval(Reg);
1499 LIS->createAndComputeVirtRegInterval(Reg);
1512 // Update RP for all regions that has this reg as a live-in and remove
1513 // the reg from all regions as a live-in.
1516 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
1517 // Def is live-through and not used in this block.
1554 Register Reg = MI->getOperand(0).getReg();
1555 LIS->RemoveMachineInstrFromMaps(*MI);
1556 MI->eraseFromParent();
1557 OldMI->clearRegisterDeads(Reg);
1558 LIS->removeInterval(Reg);
1559 LIS->createAndComputeVirtRegInterval(Reg);
1573 Register Reg = MI->getOperand(0).getReg();
1574 LIS->RemoveMachineInstrFromMaps(*OldMI);
1575 OldMI->eraseFromParent();
1576 LIS->removeInterval(Reg);
1577 LIS->createAndComputeVirtRegInterval(Reg);
1580 // Update live-ins, register pressure, and regions caches.
1584 DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent());
1597 if (!DAG.TII->isTriviallyReMaterializable(MI))
1617 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
1621 if (MI->getParent() != RegionBoundaries[I].first->getParent())
1629 std::pair(MI->getParent()->end(), MI->getParent()->end());
1651 DAG->begin(), DAG->end(), [](MachineBasicBlock::iterator MI) {
1652 unsigned Opc = MI->getOpcode();