Lines Matching +full:push +full:- +full:ci +full:- +full:container

1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 // This SMS implementation is a target-independent back-end pass. When enabled,
30 //===----------------------------------------------------------------------===//
73 #include "llvm/Config/llvm-config.h"
116 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
119 /// A command line option to enable SWP at -Os.
120 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
125 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
131 static cl::opt<int> SwpForceII("pipeliner-force-ii",
133 cl::Hidden, cl::init(-1));
137 SwpMaxStages("pipeliner-max-stages",
144 SwpPruneDeps("pipeliner-prune-deps",
151 SwpPruneLoopCarried("pipeliner-prune-loop-carried",
156 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
159 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
163 static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
165 static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
169 "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
172 "-modulo-schedule-test pass"));
175 "pipeliner-experimental-cg", cl::Hidden, cl::init(false),
179 static cl::opt<int> SwpIISearchRange("pipeliner-ii-search-range",
184 LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false),
188 RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden,
194 MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false),
200 cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
207 "pipeliner-force-issue-width",
209 cl::init(-1));
213 "window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On),
259 mf.getSubtarget().getInstrItineraryData()->isEmpty()))
266 TII = MF->getSubtarget().getInstrInfo();
297 ORE->emit([&]() {
328 const BasicBlock *BBLK = LBLK->getBasicBlock();
332 const Instruction *TI = BBLK->getTerminator();
336 MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
340 assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
341 assert(LoopID->getOperand(0) == LoopID && "invalid loop");
343 for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
349 MDString *S = dyn_cast<MDString>(MD->getOperand(0));
354 if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
355 assert(MD->getNumOperands() == 2 &&
358 mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
360 } else if (S->getString() == "llvm.loop.pipeline.disable") {
371 ORE->emit([&]() {
381 ORE->emit([&]() {
394 if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
397 ORE->emit([&]() {
407 LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock());
411 ORE->emit([&]() {
422 ORE->emit([&]() {
436 MachineRegisterInfo &MRI = MF->getRegInfo();
456 auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
484 unsigned size = MBB->size();
485 for (MachineBasicBlock::iterator I = MBB->getFirstTerminator(),
486 E = MBB->instr_end();
487 I != E; ++I, --size)
490 SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
517 Context.RegClassInfo->runOnMachineFunction(*MF);
597 Pass.ORE->emit([&]() {
606 if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
610 Pass.ORE->emit([&]() {
616 << "Refer to -pipeliner-max-mii.";
660 Pass.ORE->emit([&]() {
673 Pass.ORE->emit([&]() {
676 << "No need to pipeline - no overlapped iterations in schedule.";
680 // Check that the maximum stage count is less than user-defined limit.
681 if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
685 Pass.ORE->emit([&]() {
691 << ". Refer to -pipeliner-max-stages.";
696 Pass.ORE->emit([&]() {
708 OrderedInsts.push_back(SU->getInstr());
709 Cycles[SU->getInstr()] = Cycle;
710 Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
734 LoopPipelinerInfo->isMVEExpanderSupported() &&
789 for (const auto &SI : SU->Succs) {
818 if (!MI->hasOneMemOperand())
820 MachineMemOperand *MM = *MI->memoperands_begin();
821 if (!MM->getValue())
823 getUnderlyingObjects(MM->getValue(), Objs);
863 for (auto *Load : I->second) {
866 MachineInstr &LdMI = *Load->getInstr();
873 if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
875 TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
877 if (BaseOp1->isIdenticalTo(*BaseOp2) &&
880 assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
899 if (!MMO1->getValue() || !MMO2->getValue()) {
905 if (MMO1->getValue() == MMO2->getValue() &&
906 MMO1->getOffset() <= MMO2->getOffset()) {
912 if (!AA->isNoAlias(
913 MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
914 MemoryLocation::getAfter(MMO2->getValue(),
915 MMO2->getAAInfo()))) {
944 for (const MachineOperand &MO : MI->operands()) {
956 if (SU != nullptr && UseMI->isPHI()) {
957 if (!MI->isPHI()) {
965 if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
976 if (SU != nullptr && DefMI->isPHI()) {
977 if (!MI->isPHI()) {
987 if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
997 MachineInstr *PMI = PI.getSUnit()->getInstr();
998 if (PMI->isPHI() && PI.getKind() == SDep::Order) {
999 if (I.getInstr()->isPHI()) {
1000 if (PMI->getOperand(0).getReg() == HasPhiUse)
1002 if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
1027 Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
1056 for (auto &P : LastSU->Preds)
1061 LastSU->removePred(D);
1068 LastSU->addPred(Dep);
1078 /// that this doesn't have any side-effect to SwingSchedulerDAG. That is, this
1080 /// non-const operations removed.
1104 MachineInstr *MI = SU->getInstr();
1113 // FuncUnitSorter - Comparison operator used to sort instructions by
1128 unsigned SchedClass = Inst->getDesc().getSchedClass();
1130 if (InstrItins && !InstrItins->isEmpty()) {
1132 make_range(InstrItins->beginStage(SchedClass),
1133 InstrItins->endStage(SchedClass))) {
1143 if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1145 STI->getSchedModel().getSchedClassDesc(SchedClass);
1146 if (!SCDesc->isValid())
1152 make_range(STI->getWriteProcResBegin(SCDesc),
1153 STI->getWriteProcResEnd(SCDesc))) {
1157 STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
1158 unsigned NumUnits = ProcResource->NumUnits;
1166 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1176 if (InstrItins && !InstrItins->isEmpty()) {
1178 make_range(InstrItins->beginStage(SchedClass),
1179 InstrItins->endStage(SchedClass))) {
1186 if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1188 STI->getSchedModel().getSchedClassDesc(SchedClass);
1189 if (!SCDesc->isValid())
1195 make_range(STI->getWriteProcResBegin(SCDesc),
1196 STI->getWriteProcResEnd(SCDesc))) {
1203 llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1226 // InitSetPressure takes into account the register pressure of live-in
1282 P -= Weight;
1292 return Reg.isVirtual() && MRI.getVRegDef(Reg)->getParent() == OrigMBB;
1295 // Search for live-in variables. They are factored into the register pressure
1296 // from the begining. Live-in variables used by every iteration should be
1332 // There are two patterns of last-use.
1333 // - by an instruction of the current iteration
1334 // - by a phi instruction of the next iteration (loop carried value)
1338 // - next iteration's phi instructions in i-th stage
1339 // - current iteration's instructions in i+1-th stage
1341 // This function calculates the last-use of each register while taking into
1347 // - live-in one
1348 // - defined but not used in the loop (potentially live-out)
1355 if (MI->isPHI()) {
1359 for (auto &Use : ROMap.find(MI)->getSecond().Uses)
1365 return Stages[MI] + MI->isPHI();
1370 for (auto &Use : ROMap.find(MI)->getSecond().Uses) {
1376 MachineInstr *Orig = Ite->second;
1379 Ite->second = New;
1396 // ------------------------
1400 // Stage 3 2 1 0 <- All stages overlap
1420 MI->dump();
1443 // live-in register
1459 const unsigned Iter = I - Stage;
1461 for (auto &Def : ROMap.find(MI)->getSecond().Defs)
1465 if (MI->isPHI()) {
1467 EraseReg(LiveRegSets[Iter - 1], LastUse);
1481 MI->dump();
1494 PSetNum(TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1561 /// Calculate the recurrence-constrainted minimum initiation interval.
1564 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1578 unsigned CurMII = (Delay + Distance - 1) / Distance;
1595 for (auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1596 // Only create a back-edge on the first and last nodes of a dependence
1599 int N = OE.getDst()->NodeNum;
1603 BackEdge = Dep->second;
1609 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
1613 // regression. This condition means that anti-dependnecies within an
1621 int N = OE.getDst()->NodeNum;
1627 // A chain edge between a store and a load is treated as a back-edge in the
1629 for (auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
1632 if (!Dst->getInstr()->mayStore() || !DAG->isLoopCarriedDep(IE))
1634 if (IE.isOrderDep() && Src->getInstr()->mayLoad()) {
1635 int N = Src->NodeNum;
1643 // Add back-edges in the adjacency matrix for the output dependences.
1675 Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1702 if (Blocked.test(W->NodeNum))
1703 unblock(W->NodeNum);
1720 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1724 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1725 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1726 // PHI-------True-Dep------> USEOfPhi
1729 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1735 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1738 for (SUnit &SU : DAG->SUnits) {
1740 if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1750 MachineInstr *TmpMI = TmpSU->getInstr();
1753 if (DepKind == SDep::Anti && TmpMI->isPHI())
1756 // If the source has no pre-decessors, we will end up creating cycles.
1757 else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1764 // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1765 // SUnit to the container.
1767 // Do not use iterator based loop here as we are updating the container.
1769 for (auto &Dep : PHISUs[Index]->Succs) {
1774 MachineInstr *TmpMI = TmpSU->getInstr();
1775 if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1790 if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1791 Src->addPred(SDep(I, SDep::Artificial));
1792 SDAG->Topo.AddPred(Src, I);
1800 /// ASAP - Earliest time to schedule a node.
1801 /// ALAP - Latest time to schedule a node.
1802 /// MOV - Mobility function, difference between ALAP and ASAP.
1803 /// D - Depth of each node.
1804 /// H - Height of each node.
1821 for (const auto &IE : DDG->getInEdges(SU)) {
1828 asap = std::max(asap, (int)(getASAP(Pred) + IE.getLatency() -
1841 for (const auto &OE : DDG->getOutEdges(SU)) {
1843 if (Succ->isBoundaryNode())
1850 alap = std::min(alap, (int)(getALAP(Succ) - OE.getLatency() +
1885 for (const auto &IE : DDG->getInEdges(SU)) {
1887 if (S && S->count(PredSU) == 0)
1895 // FIXME: The following loop-carried dependencies may also need to be
1897 // - Physical register dependencies (true-dependence and WAW).
1898 // - Memory dependencies.
1899 for (const auto &OE : DDG->getOutEdges(SU)) {
1903 if (S && S->count(SuccSU) == 0)
1921 for (const auto &OE : DDG->getOutEdges(SU)) {
1923 if (S && S->count(SuccSU) == 0)
1931 // FIXME: The following loop-carried dependencies may also need to be
1933 // - Physical register dependnecies (true-dependnece and WAW).
1934 // - Memory dependencies.
1935 for (const auto &IE : DDG->getInEdges(SU)) {
1939 if (S && S->count(PredSU) == 0)
1955 if (Cur->isBoundaryNode())
1964 for (const auto &OE : DDG->getOutEdges(Cur))
1968 for (const auto &IE : DDG->getInEdges(Cur))
1977 /// Compute the live-out registers for the instructions in a node-set.
1978 /// The live-out registers are those that are defined in the node-set,
1987 const MachineInstr *MI = SU->getInstr();
1988 if (MI->isPHI())
1990 for (const MachineOperand &MO : MI->all_uses()) {
1995 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
2000 for (const MachineOperand &MO : SU->getInstr()->all_defs())
2007 for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
2015 /// A heuristic to filter nodes in recurrent node-sets if the register
2019 // Skip small node-sets since they won't cause register pressure problems.
2024 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
2030 return A->NodeNum > B->NodeNum;
2036 // instruction in the node-set. The tracker is set to the instruction
2038 MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
2043 RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
2048 dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
2049 << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
2084 /// Check if the existing node-sets are profitable. If not, then ignore the
2085 /// recurrent node-sets, and attempt to schedule all nodes together. This is
2086 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
2088 /// starting with the recurrent node-sets.
2093 // Check if the node-set contains only a simple add recurrence.
2101 LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
2173 for (auto &OE : DDG->getOutEdges(SU)) {
2175 if (!OE.isArtificial() && !Successor->isBoundaryNode() &&
2179 for (auto &IE : DDG->getInEdges(SU)) {
2187 /// are returned in a different container.
2205 if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
2209 I->insert(SU);
2224 J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
2226 if (J->empty()) {
2237 /// two-level algorithm. First, a partial order is created, which
2257 // If some of the successors are in the existing node-set, then use the
2258 // top-down ordering.
2263 if (N->Succs.size() == 0)
2272 (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
2300 LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
2302 for (const auto &OE : DDG->getOutEdges(maxHeight)) {
2313 // FIXME: The following loop-carried dependencies may also need to be
2315 // - Physical register dependnecies (true-dependnece and WAW).
2316 // - Memory dependencies.
2317 for (const auto &IE : DDG->getInEdges(maxHeight)) {
2351 LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
2359 for (const auto &IE : DDG->getInEdges(maxDepth)) {
2368 // FIXME: The following loop-carried dependencies may also need to be
2370 // - Physical register dependnecies (true-dependnece and WAW).
2371 // - Memory dependencies.
2372 for (const auto &OE : DDG->getOutEdges(maxDepth)) {
2396 dbgs() << " " << I->NodeNum << " ";
2415 HRPDetector->init(RegClassInfo);
2435 dbgs() << "Inst (" << SU->NodeNum << ") ";
2436 SU->getInstr()->dump();
2446 Schedule.insert(SU, EarlyStart, EarlyStart + (int)II - 1, II);
2449 Schedule.insert(SU, LateStart, LateStart - (int)II + 1, II);
2451 LateStart = std::min(LateStart, EarlyStart + (int)II - 1);
2456 // loop-carried output/order dependencies. Empirically, there are also
2458 if (SU->getInstr()->isPHI() ||
2459 Schedule.onlyHasLoopCarriedOutputOrOrderPreds(SU, this->getDDG()))
2466 FirstCycle + getASAP(SU) + II - 1, II);
2472 if (SwpMaxStages > -1 &&
2482 // If a schedule is found, ensure non-pipelined instructions are in stage 0
2495 !HRPDetector->detect(this, Schedule, Schedule.getMaxStageCount());
2503 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*this, Schedule);
2510 Pass.ORE->emit([&]() {
2531 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
2534 // FIXME: This algorithm assumes instructions have fixed-size offsets.
2538 if (!BaseOp->isReg())
2541 Register BaseReg = BaseOp->getReg();
2546 if (BaseDef && BaseDef->isPHI()) {
2554 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2574 if (TII->isPostIncrement(*MI))
2577 if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
2579 Register BaseReg = MI->getOperand(BasePosLd).getReg();
2582 MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
2584 if (!Phi || !Phi->isPHI())
2587 unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
2591 // Check for the post-increment load/store instruction.
2596 if (!TII->isPostIncrement(*PrevDef))
2600 if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
2605 int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
2606 int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
2608 NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
2609 bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
2630 std::pair<unsigned, int64_t> RegAndOffset = It->second;
2632 if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2634 Register BaseReg = MI->getOperand(BasePos).getReg();
2642 int OffsetDiff = DefStageNum - BaseStageNum;
2644 NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
2646 --OffsetDiff;
2649 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2650 NewMI->getOperand(OffsetPos).setImm(NewOffset);
2651 SU->setInstr(NewMI);
2664 while (Def->isPHI()) {
2667 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2668 if (Def->getOperand(i + 1).getMBB() == BB) {
2669 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
2682 Edge.getDst()->isBoundaryNode())
2691 MachineInstr *SI = Edge.getSrc()->getInstr();
2692 MachineInstr *DI = Edge.getDst()->getInstr();
2696 if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
2697 SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
2698 SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
2701 if (!DI->mayLoadOrStore() || !SI->mayLoadOrStore())
2715 if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2717 !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2724 MachineInstr *DefS = MRI.getVRegDef(BaseOpS->getReg());
2725 MachineInstr *DefD = MRI.getVRegDef(BaseOpD->getReg());
2726 if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI())
2738 if (!InitDefS->isIdenticalTo(*InitDefD))
2745 if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D))
2748 LocationSize AccessSizeS = (*SI->memoperands_begin())->getSize();
2749 LocationSize AccessSizeD = (*DI->memoperands_begin())->getSize();
2766 M->apply(this);
2784 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2786 forward ? ++curCycle : --curCycle) {
2788 if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
2792 SU->getInstr()->dump();
2795 if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
2807 SU->getInstr()->dump();
2828 EarlyCycle = std::min(EarlyCycle, it->second);
2829 for (const auto &IE : DDG->getInEdges(PrevSU))
2847 if (Visited.count(SuccSU) || SuccSU->isBoundaryNode())
2852 LateCycle = std::max(LateCycle, it->second);
2853 for (const auto &OE : DDG->getOutEdges(SuccSU))
2862 /// return true. These instructions are characterized by having a back-ege
2865 for (auto &P : SU->Preds)
2866 if (P.getKind() == SDep::Anti && P.getSUnit()->getInstr()->isPHI())
2867 for (auto &S : P.getSUnit()->Succs)
2868 if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
2877 const SwingSchedulerDDG *DDG = DAG->getDDG();
2884 for (const auto &IE : DDG->getInEdges(SU)) {
2888 if (DAG->isLoopCarriedDep(IE)) {
2889 int End = earliestCycleInChain(IE, DDG) + (II - 1);
2892 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() * II;
2897 for (const auto &OE : DDG->getOutEdges(SU)) {
2901 if (DAG->isLoopCarriedDep(OE)) {
2902 int Start = latestCycleInChain(OE, DDG) + 1 - II;
2905 int LateStart = cycle - OE.getLatency() + OE.getDistance() * II;
2911 for (const auto &Dep : SU->Preds) {
2914 if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
2915 !SU->isPred(I))
2927 MachineInstr *MI = SU->getInstr();
2934 const SwingSchedulerDDG *DDG = SSD->getDDG();
2939 for (MachineOperand &MO : MI->operands()) {
2945 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2946 if (MI->getOperand(BasePos).getReg() == Reg)
2947 if (unsigned NewReg = SSD->getInstrBaseReg(SU))
2951 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2961 if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
2975 MoveDef = Pos - 1;
2983 isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
2992 for (auto &OE : DDG->getOutEdges(SU)) {
3010 for (auto &IE : DDG->getInEdges(SU)) {
3026 // to a loop-carried dependence.
3061 SUnit *DefSU = SSD->getSUnit(&Phi);
3068 SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
3071 if (UseSU->getInstr()->isPHI())
3090 if (Def->isPHI())
3093 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3097 unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
3098 for (MachineOperand &DMO : Def->all_defs()) {
3105 /// Return true if all scheduled predecessors are loop-carried output/order
3109 for (const auto &IE : DDG->getInEdges(SU))
3121 for (auto &SU : SSD->SUnits)
3122 if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr()))
3125 const SwingSchedulerDDG *DDG = SSD->getDDG();
3130 LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");
3132 for (const auto &IE : DDG->getInEdges(SU))
3137 for (const auto &OE : DDG->getOutEdges(SU))
3151 for (SUnit &SU : SSD->SUnits) {
3159 // Put the non-pipelined instruction as early as possible in the schedule
3161 for (const auto &IE : SSD->getDDG()->getInEdges(&SU))
3167 for (auto &OE : SSD->getDDG()->getOutEdges(&SU))
3194 // earlier and same-cycle use to be more robust.)
3196 for (SUnit &SU : SSD->SUnits) {
3201 assert(StageDef != -1 && "Instruction should have been scheduled.");
3202 for (auto &OE : SSD->getDDG()->getOutEdges(&SU)) {
3204 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3216 /// A property of the node order in swing-modulo-scheduling is
3261 for (const auto &IE : DDG->getInEdges(SU)) {
3265 if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
3272 for (const auto &OE : DDG->getOutEdges(SU)) {
3277 if (SuccSU->isBoundaryNode())
3281 if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
3288 if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
3300 LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
3301 << " are scheduled before node " << SU->NodeNum
3322 MachineInstr *MI = SU->getInstr();
3323 for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3324 const MachineOperand &MO = MI->getOperand(i);
3335 if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
3337 NewMI->getOperand(BasePos).setReg(NewBaseReg);
3339 MI->getOperand(OffsetPos).getImm() - It->second.second;
3340 NewMI->getOperand(OffsetPos).setImm(NewOffset);
3341 SU->setInstr(NewMI);
3353 if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3355 OverlapReg = MI->getOperand(TiedUseIdx).getReg();
3357 NewBaseReg = MI->getOperand(i).getReg();
3369 if (SU->getInstr()->isPHI())
3374 if (!SU->getInstr()->isPHI())
3403 for (const SUnit &SU : SSD->SUnits)
3404 SSD->applyInstrChange(SU.getInstr(), *this);
3411 SSD->fixupRegisterOverlaps(cycleInstrs);
3421 os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
3432 for (SUnit *CI : cycleInstrs->second) {
3433 os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
3434 os << "(" << CI->NodeNum << ") ";
3435 CI->getInstr()->print(os);
3501 ProcResource->Name, I, Masks[I],
3502 ProcResource->NumUnits);
3504 dbgs() << " -----------------\n";
3516 ->canReserveResources(&SU.getInstr()->getDesc());
3518 const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
3519 if (!SCDesc->isValid()) {
3522 dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
3542 ->reserveResources(&SU.getInstr()->getDesc());
3544 const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
3545 if (!SCDesc->isValid()) {
3548 dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
3567 STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
3571 for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
3579 STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
3581 --MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
3583 for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
3584 --NumScheduledMops[positiveModulo(C, InitiationInterval)];
3592 if (MRT[Slot][I] > Desc->NumUnits)
3607 for (SUnit &SU : DAG->SUnits)
3612 for (SUnit &SU : DAG->SUnits)
3613 FuncUnitOrder.push(SU.getInstr());
3617 std::unique_ptr<DFAPacketizer>(TII->CreateTargetScheduleState(*ST)));
3622 if (TII->isZeroCost(MI->getOpcode()))
3627 unsigned NumCycles = DAG->getSUnit(MI)->Latency;
3634 MI->dump();
3638 if ((*RI)->canReserveResources(*MI)) {
3639 (*RI)->reserveResources(*MI);
3652 auto *NewResource = TII->CreateTargetScheduleState(*ST);
3653 assert(NewResource->canReserveResources(*MI) && "Reserve error.");
3654 NewResource->reserveResources(*MI);
3673 for (SUnit &SU : DAG->SUnits) {
3674 if (TII->isZeroCost(SU.getInstr()->getOpcode()))
3677 const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
3678 if (!SCDesc->isValid())
3683 DAG->dumpNode(SU);
3684 dbgs() << " #Mops: " << SCDesc->NumMicroOps << "\n"
3688 NumMops += SCDesc->NumMicroOps;
3690 make_range(STI->getWriteProcResBegin(SCDesc),
3691 STI->getWriteProcResEnd(SCDesc))) {
3696 dbgs() << Desc->Name << ": " << PRE.ReleaseAtCycle << ", ";
3704 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
3723 int Cycles = (ResourceCount[I] + Desc->NumUnits - 1) / Desc->NumUnits;
3727 SS << std::setw(2) << I << std::setw(16) << Desc->Name << std::setw(10)
3728 << Desc->NumUnits << std::setw(10) << ResourceCount[I]
3744 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
3752 if (Pred.isArtificial() || Dst->isBoundaryNode())
3754 // Currently, dependence that is an anti-dependences but not a loop-carried is
3766 return EdgesVec[SU->NodeNum];
3775 return EdgesVec[SU->NodeNum];
3788 for (const auto &PI : SU->Preds) {
3793 for (const auto &SI : SU->Succs) {