xref: /freebsd-src/contrib/llvm-project/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file This implements the ScheduleDAGInstrs class, which implements
100b57cec5SDimitry Andric /// re-scheduling of MachineInstrs.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAGInstrs.h"
15bdd1243dSDimitry Andric 
160b57cec5SDimitry Andric #include "llvm/ADT/IntEqClasses.h"
170b57cec5SDimitry Andric #include "llvm/ADT/MapVector.h"
180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
190b57cec5SDimitry Andric #include "llvm/ADT/SparseSet.h"
200b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h"
215ffd83dbSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
230b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
240b57cec5SDimitry Andric #include "llvm/CodeGen/LivePhysRegs.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
290b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBundle.h"
300b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h"
310b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
320b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
330b57cec5SDimitry Andric #include "llvm/CodeGen/PseudoSourceValue.h"
340b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterPressure.h"
350b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h"
360b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDFS.h"
370b57cec5SDimitry Andric #include "llvm/CodeGen/SlotIndexes.h"
380b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
390b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
400b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h"
410b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
420b57cec5SDimitry Andric #include "llvm/IR/Function.h"
430b57cec5SDimitry Andric #include "llvm/IR/Type.h"
440b57cec5SDimitry Andric #include "llvm/IR/Value.h"
450b57cec5SDimitry Andric #include "llvm/MC/LaneBitmask.h"
460b57cec5SDimitry Andric #include "llvm/MC/MCRegisterInfo.h"
470b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
480b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
490b57cec5SDimitry Andric #include "llvm/Support/Compiler.h"
500b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
510b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
520b57cec5SDimitry Andric #include "llvm/Support/Format.h"
530b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
540b57cec5SDimitry Andric #include <algorithm>
550b57cec5SDimitry Andric #include <cassert>
560b57cec5SDimitry Andric #include <iterator>
570b57cec5SDimitry Andric #include <utility>
580b57cec5SDimitry Andric #include <vector>
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric using namespace llvm;
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler"
630b57cec5SDimitry Andric 
6481ad6265SDimitry Andric static cl::opt<bool>
6581ad6265SDimitry Andric     EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
660b57cec5SDimitry Andric                     cl::desc("Enable use of AA during MI DAG construction"));
670b57cec5SDimitry Andric 
680b57cec5SDimitry Andric static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
690b57cec5SDimitry Andric     cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric // Note: the two options below might be used in tuning compile time vs
720b57cec5SDimitry Andric // output quality. Setting HugeRegion so large that it will never be
730b57cec5SDimitry Andric // reached means best-effort, but may be slow.
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric // When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
760b57cec5SDimitry Andric // together hold this many SUs, a reduction of maps will be done.
770b57cec5SDimitry Andric static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
780b57cec5SDimitry Andric     cl::init(1000), cl::desc("The limit to use while constructing the DAG "
790b57cec5SDimitry Andric                              "prior to scheduling, at which point a trade-off "
800b57cec5SDimitry Andric                              "is made to avoid excessive compile time."));
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric static cl::opt<unsigned> ReductionSize(
830b57cec5SDimitry Andric     "dag-maps-reduction-size", cl::Hidden,
840b57cec5SDimitry Andric     cl::desc("A huge scheduling region will have maps reduced by this many "
850b57cec5SDimitry Andric              "nodes at a time. Defaults to HugeRegion / 2."));
860b57cec5SDimitry Andric 
87bdd1243dSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
88bdd1243dSDimitry Andric static cl::opt<bool> SchedPrintCycles(
89bdd1243dSDimitry Andric     "sched-print-cycles", cl::Hidden, cl::init(false),
90bdd1243dSDimitry Andric     cl::desc("Report top/bottom cycles when dumping SUnit instances"));
91bdd1243dSDimitry Andric #endif
92bdd1243dSDimitry Andric 
930b57cec5SDimitry Andric static unsigned getReductionSize() {
940b57cec5SDimitry Andric   // Always reduce a huge region with half of the elements, except
950b57cec5SDimitry Andric   // when user sets this number explicitly.
960b57cec5SDimitry Andric   if (ReductionSize.getNumOccurrences() == 0)
970b57cec5SDimitry Andric     return HugeRegion / 2;
980b57cec5SDimitry Andric   return ReductionSize;
990b57cec5SDimitry Andric }
1000b57cec5SDimitry Andric 
101bdd1243dSDimitry Andric static void dumpSUList(const ScheduleDAGInstrs::SUList &L) {
1020b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1030b57cec5SDimitry Andric   dbgs() << "{ ";
104bdd1243dSDimitry Andric   for (const SUnit *SU : L) {
105bdd1243dSDimitry Andric     dbgs() << "SU(" << SU->NodeNum << ")";
106bdd1243dSDimitry Andric     if (SU != L.back())
1070b57cec5SDimitry Andric       dbgs() << ", ";
1080b57cec5SDimitry Andric   }
1090b57cec5SDimitry Andric   dbgs() << "}\n";
1100b57cec5SDimitry Andric #endif
1110b57cec5SDimitry Andric }
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
1140b57cec5SDimitry Andric                                      const MachineLoopInfo *mli,
1150b57cec5SDimitry Andric                                      bool RemoveKillFlags)
1160b57cec5SDimitry Andric     : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
1170b57cec5SDimitry Andric       RemoveKillFlags(RemoveKillFlags),
1180b57cec5SDimitry Andric       UnknownValue(UndefValue::get(
1190b57cec5SDimitry Andric                              Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
1200b57cec5SDimitry Andric   DbgValues.clear();
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric   const TargetSubtargetInfo &ST = mf.getSubtarget();
1230b57cec5SDimitry Andric   SchedModel.init(&ST);
1240b57cec5SDimitry Andric }
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric /// If this machine instr has memory reference information and it can be
1270b57cec5SDimitry Andric /// tracked to a normal reference to a known object, return the Value
1280b57cec5SDimitry Andric /// for that object. This function returns false the memory location is
1290b57cec5SDimitry Andric /// unknown or may alias anything.
1300b57cec5SDimitry Andric static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
1310b57cec5SDimitry Andric                                          const MachineFrameInfo &MFI,
1320b57cec5SDimitry Andric                                          UnderlyingObjectsVector &Objects,
1330b57cec5SDimitry Andric                                          const DataLayout &DL) {
134bdd1243dSDimitry Andric   auto AllMMOsOkay = [&]() {
1350b57cec5SDimitry Andric     for (const MachineMemOperand *MMO : MI->memoperands()) {
1360b57cec5SDimitry Andric       // TODO: Figure out whether isAtomic is really necessary (see D57601).
1370b57cec5SDimitry Andric       if (MMO->isVolatile() || MMO->isAtomic())
1380b57cec5SDimitry Andric         return false;
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric       if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
1410b57cec5SDimitry Andric         // Function that contain tail calls don't have unique PseudoSourceValue
1420b57cec5SDimitry Andric         // objects. Two PseudoSourceValues might refer to the same or
1430b57cec5SDimitry Andric         // overlapping locations. The client code calling this function assumes
1440b57cec5SDimitry Andric         // this is not the case. So return a conservative answer of no known
1450b57cec5SDimitry Andric         // object.
1460b57cec5SDimitry Andric         if (MFI.hasTailCall())
1470b57cec5SDimitry Andric           return false;
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric         // For now, ignore PseudoSourceValues which may alias LLVM IR values
1500b57cec5SDimitry Andric         // because the code that uses this function has no way to cope with
1510b57cec5SDimitry Andric         // such aliases.
1520b57cec5SDimitry Andric         if (PSV->isAliased(&MFI))
1530b57cec5SDimitry Andric           return false;
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric         bool MayAlias = PSV->mayAlias(&MFI);
156bdd1243dSDimitry Andric         Objects.emplace_back(PSV, MayAlias);
1570b57cec5SDimitry Andric       } else if (const Value *V = MMO->getValue()) {
1580b57cec5SDimitry Andric         SmallVector<Value *, 4> Objs;
159e8d8bef9SDimitry Andric         if (!getUnderlyingObjectsForCodeGen(V, Objs))
1600b57cec5SDimitry Andric           return false;
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric         for (Value *V : Objs) {
1630b57cec5SDimitry Andric           assert(isIdentifiedObject(V));
164bdd1243dSDimitry Andric           Objects.emplace_back(V, true);
1650b57cec5SDimitry Andric         }
1660b57cec5SDimitry Andric       } else
1670b57cec5SDimitry Andric         return false;
1680b57cec5SDimitry Andric     }
1690b57cec5SDimitry Andric     return true;
1700b57cec5SDimitry Andric   };
1710b57cec5SDimitry Andric 
172bdd1243dSDimitry Andric   if (!AllMMOsOkay()) {
1730b57cec5SDimitry Andric     Objects.clear();
1740b57cec5SDimitry Andric     return false;
1750b57cec5SDimitry Andric   }
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric   return true;
1780b57cec5SDimitry Andric }
1790b57cec5SDimitry Andric 
1800b57cec5SDimitry Andric void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
1810b57cec5SDimitry Andric   BB = bb;
1820b57cec5SDimitry Andric }
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric void ScheduleDAGInstrs::finishBlock() {
1850b57cec5SDimitry Andric   // Subclasses should no longer refer to the old block.
1860b57cec5SDimitry Andric   BB = nullptr;
1870b57cec5SDimitry Andric }
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
1900b57cec5SDimitry Andric                                     MachineBasicBlock::iterator begin,
1910b57cec5SDimitry Andric                                     MachineBasicBlock::iterator end,
1920b57cec5SDimitry Andric                                     unsigned regioninstrs) {
1930b57cec5SDimitry Andric   assert(bb == BB && "startBlock should set BB");
1940b57cec5SDimitry Andric   RegionBegin = begin;
1950b57cec5SDimitry Andric   RegionEnd = end;
1960b57cec5SDimitry Andric   NumRegionInstrs = regioninstrs;
1970b57cec5SDimitry Andric }
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric void ScheduleDAGInstrs::exitRegion() {
2000b57cec5SDimitry Andric   // Nothing to do.
2010b57cec5SDimitry Andric }
2020b57cec5SDimitry Andric 
2030b57cec5SDimitry Andric void ScheduleDAGInstrs::addSchedBarrierDeps() {
204e8d8bef9SDimitry Andric   MachineInstr *ExitMI =
205e8d8bef9SDimitry Andric       RegionEnd != BB->end()
206e8d8bef9SDimitry Andric           ? &*skipDebugInstructionsBackward(RegionEnd, RegionBegin)
207e8d8bef9SDimitry Andric           : nullptr;
2080b57cec5SDimitry Andric   ExitSU.setInstr(ExitMI);
2090b57cec5SDimitry Andric   // Add dependencies on the defs and uses of the instruction.
2100b57cec5SDimitry Andric   if (ExitMI) {
21106c3fb27SDimitry Andric     for (const MachineOperand &MO : ExitMI->all_uses()) {
2128bcb0991SDimitry Andric       Register Reg = MO.getReg();
213bdd1243dSDimitry Andric       if (Reg.isPhysical()) {
2145f757f3fSDimitry Andric         for (MCRegUnit Unit : TRI->regunits(Reg))
2155f757f3fSDimitry Andric           Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
216bdd1243dSDimitry Andric       } else if (Reg.isVirtual() && MO.readsReg()) {
21706c3fb27SDimitry Andric         addVRegUseDeps(&ExitSU, MO.getOperandNo());
2180b57cec5SDimitry Andric       }
2190b57cec5SDimitry Andric     }
2200b57cec5SDimitry Andric   }
2210b57cec5SDimitry Andric   if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
2220b57cec5SDimitry Andric     // For others, e.g. fallthrough, conditional branch, assume the exit
2230b57cec5SDimitry Andric     // uses all the registers that are livein to the successor blocks.
2240b57cec5SDimitry Andric     for (const MachineBasicBlock *Succ : BB->successors()) {
2250b57cec5SDimitry Andric       for (const auto &LI : Succ->liveins()) {
2265f757f3fSDimitry Andric         for (MCRegUnitMaskIterator U(LI.PhysReg, TRI); U.isValid(); ++U) {
2275f757f3fSDimitry Andric           auto [Unit, Mask] = *U;
2285f757f3fSDimitry Andric           if ((Mask & LI.LaneMask).any() && !Uses.contains(Unit))
2295f757f3fSDimitry Andric             Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit));
2305f757f3fSDimitry Andric         }
2310b57cec5SDimitry Andric       }
2320b57cec5SDimitry Andric     }
2330b57cec5SDimitry Andric   }
2340b57cec5SDimitry Andric }
2350b57cec5SDimitry Andric 
2360b57cec5SDimitry Andric /// MO is an operand of SU's instruction that defines a physical register. Adds
2370b57cec5SDimitry Andric /// data dependencies from SU to any uses of the physical register.
2380b57cec5SDimitry Andric void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
2390b57cec5SDimitry Andric   const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
2400b57cec5SDimitry Andric   assert(MO.isDef() && "expect physreg def");
2415f757f3fSDimitry Andric   Register Reg = MO.getReg();
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric   // Ask the target if address-backscheduling is desirable, and if so how much.
2440b57cec5SDimitry Andric   const TargetSubtargetInfo &ST = MF.getSubtarget();
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   // Only use any non-zero latency for real defs/uses, in contrast to
2470b57cec5SDimitry Andric   // "fake" operands added by regalloc.
2485f757f3fSDimitry Andric   const MCInstrDesc &DefMIDesc = SU->getInstr()->getDesc();
2495f757f3fSDimitry Andric   bool ImplicitPseudoDef = (OperIdx >= DefMIDesc.getNumOperands() &&
2505f757f3fSDimitry Andric                             !DefMIDesc.hasImplicitDefOfPhysReg(Reg));
2515f757f3fSDimitry Andric   for (MCRegUnit Unit : TRI->regunits(Reg)) {
2525f757f3fSDimitry Andric     for (RegUnit2SUnitsMap::iterator I = Uses.find(Unit); I != Uses.end();
2535f757f3fSDimitry Andric          ++I) {
2540b57cec5SDimitry Andric       SUnit *UseSU = I->SU;
2550b57cec5SDimitry Andric       if (UseSU == SU)
2560b57cec5SDimitry Andric         continue;
2570b57cec5SDimitry Andric 
2580b57cec5SDimitry Andric       // Adjust the dependence latency using operand def/use information,
2590b57cec5SDimitry Andric       // then allow the target to perform its own adjustments.
2605f757f3fSDimitry Andric       MachineInstr *UseInstr = nullptr;
2615f757f3fSDimitry Andric       int UseOpIdx = I->OpIdx;
2625f757f3fSDimitry Andric       bool ImplicitPseudoUse = false;
2630b57cec5SDimitry Andric       SDep Dep;
2645f757f3fSDimitry Andric       if (UseOpIdx < 0) {
2650b57cec5SDimitry Andric         Dep = SDep(SU, SDep::Artificial);
2665f757f3fSDimitry Andric       } else {
2670b57cec5SDimitry Andric         // Set the hasPhysRegDefs only for physreg defs that have a use within
2680b57cec5SDimitry Andric         // the scheduling region.
2690b57cec5SDimitry Andric         SU->hasPhysRegDefs = true;
2705f757f3fSDimitry Andric 
2715f757f3fSDimitry Andric         UseInstr = UseSU->getInstr();
2725f757f3fSDimitry Andric         Register UseReg = UseInstr->getOperand(UseOpIdx).getReg();
2735f757f3fSDimitry Andric         const MCInstrDesc &UseMIDesc = UseInstr->getDesc();
2745f757f3fSDimitry Andric         ImplicitPseudoUse = UseOpIdx >= ((int)UseMIDesc.getNumOperands()) &&
2755f757f3fSDimitry Andric                             !UseMIDesc.hasImplicitUseOfPhysReg(UseReg);
2765f757f3fSDimitry Andric 
2775f757f3fSDimitry Andric         Dep = SDep(SU, SDep::Data, UseReg);
2780b57cec5SDimitry Andric       }
2790b57cec5SDimitry Andric       if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
2800b57cec5SDimitry Andric         Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
2815f757f3fSDimitry Andric                                                         UseInstr, UseOpIdx));
282480093f4SDimitry Andric       } else {
2830b57cec5SDimitry Andric         Dep.setLatency(0);
284480093f4SDimitry Andric       }
285*0fca6ea1SDimitry Andric       ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOpIdx, Dep, &SchedModel);
2860b57cec5SDimitry Andric       UseSU->addPred(Dep);
2870b57cec5SDimitry Andric     }
2880b57cec5SDimitry Andric   }
2890b57cec5SDimitry Andric }
2900b57cec5SDimitry Andric 
2910b57cec5SDimitry Andric /// Adds register dependencies (data, anti, and output) from this SUnit
2920b57cec5SDimitry Andric /// to following instructions in the same scheduling region that depend the
2930b57cec5SDimitry Andric /// physical register referenced at OperIdx.
2940b57cec5SDimitry Andric void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
2950b57cec5SDimitry Andric   MachineInstr *MI = SU->getInstr();
2960b57cec5SDimitry Andric   MachineOperand &MO = MI->getOperand(OperIdx);
2978bcb0991SDimitry Andric   Register Reg = MO.getReg();
2980b57cec5SDimitry Andric   // We do not need to track any dependencies for constant registers.
2990b57cec5SDimitry Andric   if (MRI.isConstantPhysReg(Reg))
3000b57cec5SDimitry Andric     return;
3010b57cec5SDimitry Andric 
3025ffd83dbSDimitry Andric   const TargetSubtargetInfo &ST = MF.getSubtarget();
3035ffd83dbSDimitry Andric 
3040b57cec5SDimitry Andric   // Optionally add output and anti dependencies. For anti
3050b57cec5SDimitry Andric   // dependencies we use a latency of 0 because for a multi-issue
3060b57cec5SDimitry Andric   // target we want to allow the defining instruction to issue
3070b57cec5SDimitry Andric   // in the same cycle as the using instruction.
3080b57cec5SDimitry Andric   // TODO: Using a latency of 1 here for output dependencies assumes
3090b57cec5SDimitry Andric   //       there's no cost for reusing registers.
3100b57cec5SDimitry Andric   SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
3115f757f3fSDimitry Andric   for (MCRegUnit Unit : TRI->regunits(Reg)) {
3125f757f3fSDimitry Andric     for (RegUnit2SUnitsMap::iterator I = Defs.find(Unit); I != Defs.end();
3135f757f3fSDimitry Andric          ++I) {
3140b57cec5SDimitry Andric       SUnit *DefSU = I->SU;
3150b57cec5SDimitry Andric       if (DefSU == &ExitSU)
3160b57cec5SDimitry Andric         continue;
3175f757f3fSDimitry Andric       MachineInstr *DefInstr = DefSU->getInstr();
3185f757f3fSDimitry Andric       MachineOperand &DefMO = DefInstr->getOperand(I->OpIdx);
3190b57cec5SDimitry Andric       if (DefSU != SU &&
3205f757f3fSDimitry Andric           (Kind != SDep::Output || !MO.isDead() || !DefMO.isDead())) {
3215f757f3fSDimitry Andric         SDep Dep(SU, Kind, DefMO.getReg());
3225f757f3fSDimitry Andric         if (Kind != SDep::Anti) {
3230b57cec5SDimitry Andric           Dep.setLatency(
3245f757f3fSDimitry Andric               SchedModel.computeOutputLatency(MI, OperIdx, DefInstr));
3255f757f3fSDimitry Andric         }
326*0fca6ea1SDimitry Andric         ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep,
327*0fca6ea1SDimitry Andric                                  &SchedModel);
3280b57cec5SDimitry Andric         DefSU->addPred(Dep);
3290b57cec5SDimitry Andric       }
3300b57cec5SDimitry Andric     }
3310b57cec5SDimitry Andric   }
3320b57cec5SDimitry Andric 
3335f757f3fSDimitry Andric   if (MO.isUse()) {
3340b57cec5SDimitry Andric     SU->hasPhysRegUses = true;
3350b57cec5SDimitry Andric     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
3360b57cec5SDimitry Andric     // retrieve the existing SUnits list for this register's uses.
3370b57cec5SDimitry Andric     // Push this SUnit on the use list.
3385f757f3fSDimitry Andric     for (MCRegUnit Unit : TRI->regunits(Reg))
3395f757f3fSDimitry Andric       Uses.insert(PhysRegSUOper(SU, OperIdx, Unit));
3400b57cec5SDimitry Andric     if (RemoveKillFlags)
3410b57cec5SDimitry Andric       MO.setIsKill(false);
3420b57cec5SDimitry Andric   } else {
3430b57cec5SDimitry Andric     addPhysRegDataDeps(SU, OperIdx);
3440b57cec5SDimitry Andric 
3455f757f3fSDimitry Andric     // Clear previous uses and defs of this register and its subregisters.
3465f757f3fSDimitry Andric     for (MCRegUnit Unit : TRI->regunits(Reg)) {
3475f757f3fSDimitry Andric       Uses.eraseAll(Unit);
3480b57cec5SDimitry Andric       if (!MO.isDead())
3495f757f3fSDimitry Andric         Defs.eraseAll(Unit);
3500b57cec5SDimitry Andric     }
3515f757f3fSDimitry Andric 
3520b57cec5SDimitry Andric     if (MO.isDead() && SU->isCall) {
3530b57cec5SDimitry Andric       // Calls will not be reordered because of chain dependencies (see
3540b57cec5SDimitry Andric       // below). Since call operands are dead, calls may continue to be added
3550b57cec5SDimitry Andric       // to the DefList making dependence checking quadratic in the size of
3560b57cec5SDimitry Andric       // the block. Instead, we leave only one call at the back of the
3570b57cec5SDimitry Andric       // DefList.
3585f757f3fSDimitry Andric       for (MCRegUnit Unit : TRI->regunits(Reg)) {
3595f757f3fSDimitry Andric         RegUnit2SUnitsMap::RangePair P = Defs.equal_range(Unit);
3605f757f3fSDimitry Andric         RegUnit2SUnitsMap::iterator B = P.first;
3615f757f3fSDimitry Andric         RegUnit2SUnitsMap::iterator I = P.second;
3620b57cec5SDimitry Andric         for (bool isBegin = I == B; !isBegin; /* empty */) {
3630b57cec5SDimitry Andric           isBegin = (--I) == B;
3640b57cec5SDimitry Andric           if (!I->SU->isCall)
3650b57cec5SDimitry Andric             break;
3660b57cec5SDimitry Andric           I = Defs.erase(I);
3670b57cec5SDimitry Andric         }
3680b57cec5SDimitry Andric       }
3695f757f3fSDimitry Andric     }
3700b57cec5SDimitry Andric 
3710b57cec5SDimitry Andric     // Defs are pushed in the order they are visited and never reordered.
3725f757f3fSDimitry Andric     for (MCRegUnit Unit : TRI->regunits(Reg))
3735f757f3fSDimitry Andric       Defs.insert(PhysRegSUOper(SU, OperIdx, Unit));
3740b57cec5SDimitry Andric   }
3750b57cec5SDimitry Andric }
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
3780b57cec5SDimitry Andric {
3798bcb0991SDimitry Andric   Register Reg = MO.getReg();
3800b57cec5SDimitry Andric   // No point in tracking lanemasks if we don't have interesting subregisters.
3810b57cec5SDimitry Andric   const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
3820b57cec5SDimitry Andric   if (!RC.HasDisjunctSubRegs)
3830b57cec5SDimitry Andric     return LaneBitmask::getAll();
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric   unsigned SubReg = MO.getSubReg();
3860b57cec5SDimitry Andric   if (SubReg == 0)
3870b57cec5SDimitry Andric     return RC.getLaneMask();
3880b57cec5SDimitry Andric   return TRI->getSubRegIndexLaneMask(SubReg);
3890b57cec5SDimitry Andric }
3900b57cec5SDimitry Andric 
3918bcb0991SDimitry Andric bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) {
3928bcb0991SDimitry Andric   auto RegUse = CurrentVRegUses.find(MO.getReg());
3938bcb0991SDimitry Andric   if (RegUse == CurrentVRegUses.end())
3948bcb0991SDimitry Andric     return true;
3958bcb0991SDimitry Andric   return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
3968bcb0991SDimitry Andric }
3978bcb0991SDimitry Andric 
3980b57cec5SDimitry Andric /// Adds register output and data dependencies from this SUnit to instructions
3990b57cec5SDimitry Andric /// that occur later in the same scheduling region if they read from or write to
4000b57cec5SDimitry Andric /// the virtual register defined at OperIdx.
4010b57cec5SDimitry Andric ///
4020b57cec5SDimitry Andric /// TODO: Hoist loop induction variable increments. This has to be
4030b57cec5SDimitry Andric /// reevaluated. Generally, IV scheduling should be done before coalescing.
4040b57cec5SDimitry Andric void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
4050b57cec5SDimitry Andric   MachineInstr *MI = SU->getInstr();
4060b57cec5SDimitry Andric   MachineOperand &MO = MI->getOperand(OperIdx);
4078bcb0991SDimitry Andric   Register Reg = MO.getReg();
4080b57cec5SDimitry Andric 
4090b57cec5SDimitry Andric   LaneBitmask DefLaneMask;
4100b57cec5SDimitry Andric   LaneBitmask KillLaneMask;
4110b57cec5SDimitry Andric   if (TrackLaneMasks) {
4120b57cec5SDimitry Andric     bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
4130b57cec5SDimitry Andric     DefLaneMask = getLaneMaskForMO(MO);
4140b57cec5SDimitry Andric     // If we have a <read-undef> flag, none of the lane values comes from an
4150b57cec5SDimitry Andric     // earlier instruction.
4160b57cec5SDimitry Andric     KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
4170b57cec5SDimitry Andric 
4188bcb0991SDimitry Andric     if (MO.getSubReg() != 0 && MO.isUndef()) {
4198bcb0991SDimitry Andric       // There may be other subregister defs on the same instruction of the same
4208bcb0991SDimitry Andric       // register in later operands. The lanes of other defs will now be live
4218bcb0991SDimitry Andric       // after this instruction, so these should not be treated as killed by the
4228bcb0991SDimitry Andric       // instruction even though they appear to be killed in this one operand.
4234824e7fdSDimitry Andric       for (const MachineOperand &OtherMO :
4244824e7fdSDimitry Andric            llvm::drop_begin(MI->operands(), OperIdx + 1))
4258bcb0991SDimitry Andric         if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
4268bcb0991SDimitry Andric           KillLaneMask &= ~getLaneMaskForMO(OtherMO);
4278bcb0991SDimitry Andric     }
4288bcb0991SDimitry Andric 
4290b57cec5SDimitry Andric     // Clear undef flag, we'll re-add it later once we know which subregister
4300b57cec5SDimitry Andric     // Def is first.
4310b57cec5SDimitry Andric     MO.setIsUndef(false);
4320b57cec5SDimitry Andric   } else {
4330b57cec5SDimitry Andric     DefLaneMask = LaneBitmask::getAll();
4340b57cec5SDimitry Andric     KillLaneMask = LaneBitmask::getAll();
4350b57cec5SDimitry Andric   }
4360b57cec5SDimitry Andric 
4370b57cec5SDimitry Andric   if (MO.isDead()) {
4388bcb0991SDimitry Andric     assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
4390b57cec5SDimitry Andric   } else {
4400b57cec5SDimitry Andric     // Add data dependence to all uses we found so far.
4410b57cec5SDimitry Andric     const TargetSubtargetInfo &ST = MF.getSubtarget();
4420b57cec5SDimitry Andric     for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg),
4430b57cec5SDimitry Andric          E = CurrentVRegUses.end(); I != E; /*empty*/) {
4440b57cec5SDimitry Andric       LaneBitmask LaneMask = I->LaneMask;
4450b57cec5SDimitry Andric       // Ignore uses of other lanes.
4460b57cec5SDimitry Andric       if ((LaneMask & KillLaneMask).none()) {
4470b57cec5SDimitry Andric         ++I;
4480b57cec5SDimitry Andric         continue;
4490b57cec5SDimitry Andric       }
4500b57cec5SDimitry Andric 
4510b57cec5SDimitry Andric       if ((LaneMask & DefLaneMask).any()) {
4520b57cec5SDimitry Andric         SUnit *UseSU = I->SU;
4530b57cec5SDimitry Andric         MachineInstr *Use = UseSU->getInstr();
4540b57cec5SDimitry Andric         SDep Dep(SU, SDep::Data, Reg);
4550b57cec5SDimitry Andric         Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
4560b57cec5SDimitry Andric                                                         I->OperandIndex));
457*0fca6ea1SDimitry Andric         ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep,
458*0fca6ea1SDimitry Andric                                  &SchedModel);
4590b57cec5SDimitry Andric         UseSU->addPred(Dep);
4600b57cec5SDimitry Andric       }
4610b57cec5SDimitry Andric 
4620b57cec5SDimitry Andric       LaneMask &= ~KillLaneMask;
4630b57cec5SDimitry Andric       // If we found a Def for all lanes of this use, remove it from the list.
4640b57cec5SDimitry Andric       if (LaneMask.any()) {
4650b57cec5SDimitry Andric         I->LaneMask = LaneMask;
4660b57cec5SDimitry Andric         ++I;
4670b57cec5SDimitry Andric       } else
4680b57cec5SDimitry Andric         I = CurrentVRegUses.erase(I);
4690b57cec5SDimitry Andric     }
4700b57cec5SDimitry Andric   }
4710b57cec5SDimitry Andric 
4720b57cec5SDimitry Andric   // Shortcut: Singly defined vregs do not have output/anti dependencies.
4730b57cec5SDimitry Andric   if (MRI.hasOneDef(Reg))
4740b57cec5SDimitry Andric     return;
4750b57cec5SDimitry Andric 
4760b57cec5SDimitry Andric   // Add output dependence to the next nearest defs of this vreg.
4770b57cec5SDimitry Andric   //
4780b57cec5SDimitry Andric   // Unless this definition is dead, the output dependence should be
4790b57cec5SDimitry Andric   // transitively redundant with antidependencies from this definition's
4800b57cec5SDimitry Andric   // uses. We're conservative for now until we have a way to guarantee the uses
4810b57cec5SDimitry Andric   // are not eliminated sometime during scheduling. The output dependence edge
4820b57cec5SDimitry Andric   // is also useful if output latency exceeds def-use latency.
4830b57cec5SDimitry Andric   LaneBitmask LaneMask = DefLaneMask;
4840b57cec5SDimitry Andric   for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
4850b57cec5SDimitry Andric                                      CurrentVRegDefs.end())) {
4860b57cec5SDimitry Andric     // Ignore defs for other lanes.
4870b57cec5SDimitry Andric     if ((V2SU.LaneMask & LaneMask).none())
4880b57cec5SDimitry Andric       continue;
4890b57cec5SDimitry Andric     // Add an output dependence.
4900b57cec5SDimitry Andric     SUnit *DefSU = V2SU.SU;
4910b57cec5SDimitry Andric     // Ignore additional defs of the same lanes in one instruction. This can
4920b57cec5SDimitry Andric     // happen because lanemasks are shared for targets with too many
4930b57cec5SDimitry Andric     // subregisters. We also use some representration tricks/hacks where we
4940b57cec5SDimitry Andric     // add super-register defs/uses, to imply that although we only access parts
4950b57cec5SDimitry Andric     // of the reg we care about the full one.
4960b57cec5SDimitry Andric     if (DefSU == SU)
4970b57cec5SDimitry Andric       continue;
4980b57cec5SDimitry Andric     SDep Dep(SU, SDep::Output, Reg);
4990b57cec5SDimitry Andric     Dep.setLatency(
5000b57cec5SDimitry Andric       SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
5010b57cec5SDimitry Andric     DefSU->addPred(Dep);
5020b57cec5SDimitry Andric 
5030b57cec5SDimitry Andric     // Update current definition. This can get tricky if the def was about a
5040b57cec5SDimitry Andric     // bigger lanemask before. We then have to shrink it and create a new
5050b57cec5SDimitry Andric     // VReg2SUnit for the non-overlapping part.
5060b57cec5SDimitry Andric     LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
5070b57cec5SDimitry Andric     LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
5080b57cec5SDimitry Andric     V2SU.SU = SU;
5090b57cec5SDimitry Andric     V2SU.LaneMask = OverlapMask;
5100b57cec5SDimitry Andric     if (NonOverlapMask.any())
5110b57cec5SDimitry Andric       CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
5120b57cec5SDimitry Andric   }
5130b57cec5SDimitry Andric   // If there was no CurrentVRegDefs entry for some lanes yet, create one.
5140b57cec5SDimitry Andric   if (LaneMask.any())
5150b57cec5SDimitry Andric     CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
5160b57cec5SDimitry Andric }
5170b57cec5SDimitry Andric 
5180b57cec5SDimitry Andric /// Adds a register data dependency if the instruction that defines the
5190b57cec5SDimitry Andric /// virtual register used at OperIdx is mapped to an SUnit. Add a register
5200b57cec5SDimitry Andric /// antidependency from this SUnit to instructions that occur later in the same
5210b57cec5SDimitry Andric /// scheduling region if they write the virtual register.
5220b57cec5SDimitry Andric ///
5230b57cec5SDimitry Andric /// TODO: Handle ExitSU "uses" properly.
5240b57cec5SDimitry Andric void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
5250b57cec5SDimitry Andric   const MachineInstr *MI = SU->getInstr();
526fe6060f1SDimitry Andric   assert(!MI->isDebugOrPseudoInstr());
527e8d8bef9SDimitry Andric 
5280b57cec5SDimitry Andric   const MachineOperand &MO = MI->getOperand(OperIdx);
5298bcb0991SDimitry Andric   Register Reg = MO.getReg();
5300b57cec5SDimitry Andric 
5310b57cec5SDimitry Andric   // Remember the use. Data dependencies will be added when we find the def.
5320b57cec5SDimitry Andric   LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO)
5330b57cec5SDimitry Andric                                         : LaneBitmask::getAll();
5340b57cec5SDimitry Andric   CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric   // Add antidependences to the following defs of the vreg.
5370b57cec5SDimitry Andric   for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
5380b57cec5SDimitry Andric                                      CurrentVRegDefs.end())) {
5390b57cec5SDimitry Andric     // Ignore defs for unrelated lanes.
5400b57cec5SDimitry Andric     LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
5410b57cec5SDimitry Andric     if ((PrevDefLaneMask & LaneMask).none())
5420b57cec5SDimitry Andric       continue;
5430b57cec5SDimitry Andric     if (V2SU.SU == SU)
5440b57cec5SDimitry Andric       continue;
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric     V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
5470b57cec5SDimitry Andric   }
5480b57cec5SDimitry Andric }
5490b57cec5SDimitry Andric 
5500b57cec5SDimitry Andric /// Returns true if MI is an instruction we are unable to reason about
5510b57cec5SDimitry Andric /// (like a call or something with unmodeled side effects).
552fcaf7f86SDimitry Andric static inline bool isGlobalMemoryObject(MachineInstr *MI) {
5530b57cec5SDimitry Andric   return MI->isCall() || MI->hasUnmodeledSideEffects() ||
554fcaf7f86SDimitry Andric          (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
5550b57cec5SDimitry Andric }
5560b57cec5SDimitry Andric 
5570b57cec5SDimitry Andric void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
5580b57cec5SDimitry Andric                                             unsigned Latency) {
5590b57cec5SDimitry Andric   if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
5600b57cec5SDimitry Andric     SDep Dep(SUa, SDep::MayAliasMem);
5610b57cec5SDimitry Andric     Dep.setLatency(Latency);
5620b57cec5SDimitry Andric     SUb->addPred(Dep);
5630b57cec5SDimitry Andric   }
5640b57cec5SDimitry Andric }
5650b57cec5SDimitry Andric 
5660b57cec5SDimitry Andric /// Creates an SUnit for each real instruction, numbered in top-down
5670b57cec5SDimitry Andric /// topological order. The instruction order A < B, implies that no edge exists
5680b57cec5SDimitry Andric /// from B to A.
5690b57cec5SDimitry Andric ///
5700b57cec5SDimitry Andric /// Map each real instruction to its SUnit.
5710b57cec5SDimitry Andric ///
5720b57cec5SDimitry Andric /// After initSUnits, the SUnits vector cannot be resized and the scheduler may
5730b57cec5SDimitry Andric /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
5740b57cec5SDimitry Andric /// instead of pointers.
5750b57cec5SDimitry Andric ///
5760b57cec5SDimitry Andric /// MachineScheduler relies on initSUnits numbering the nodes by their order in
5770b57cec5SDimitry Andric /// the original instruction list.
5780b57cec5SDimitry Andric void ScheduleDAGInstrs::initSUnits() {
5790b57cec5SDimitry Andric   // We'll be allocating one SUnit for each real instruction in the region,
5800b57cec5SDimitry Andric   // which is contained within a basic block.
5810b57cec5SDimitry Andric   SUnits.reserve(NumRegionInstrs);
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric   for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) {
584fe6060f1SDimitry Andric     if (MI.isDebugOrPseudoInstr())
5850b57cec5SDimitry Andric       continue;
5860b57cec5SDimitry Andric 
5870b57cec5SDimitry Andric     SUnit *SU = newSUnit(&MI);
5880b57cec5SDimitry Andric     MISUnitMap[&MI] = SU;
5890b57cec5SDimitry Andric 
5900b57cec5SDimitry Andric     SU->isCall = MI.isCall();
5910b57cec5SDimitry Andric     SU->isCommutable = MI.isCommutable();
5920b57cec5SDimitry Andric 
5930b57cec5SDimitry Andric     // Assign the Latency field of SU using target-provided information.
5940b57cec5SDimitry Andric     SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
5950b57cec5SDimitry Andric 
5960b57cec5SDimitry Andric     // If this SUnit uses a reserved or unbuffered resource, mark it as such.
5970b57cec5SDimitry Andric     //
5980b57cec5SDimitry Andric     // Reserved resources block an instruction from issuing and stall the
5990b57cec5SDimitry Andric     // entire pipeline. These are identified by BufferSize=0.
6000b57cec5SDimitry Andric     //
6010b57cec5SDimitry Andric     // Unbuffered resources prevent execution of subsequent instructions that
6020b57cec5SDimitry Andric     // require the same resources. This is used for in-order execution pipelines
6030b57cec5SDimitry Andric     // within an out-of-order core. These are identified by BufferSize=1.
6040b57cec5SDimitry Andric     if (SchedModel.hasInstrSchedModel()) {
6050b57cec5SDimitry Andric       const MCSchedClassDesc *SC = getSchedClass(SU);
6060b57cec5SDimitry Andric       for (const MCWriteProcResEntry &PRE :
6070b57cec5SDimitry Andric            make_range(SchedModel.getWriteProcResBegin(SC),
6080b57cec5SDimitry Andric                       SchedModel.getWriteProcResEnd(SC))) {
6090b57cec5SDimitry Andric         switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
6100b57cec5SDimitry Andric         case 0:
6110b57cec5SDimitry Andric           SU->hasReservedResource = true;
6120b57cec5SDimitry Andric           break;
6130b57cec5SDimitry Andric         case 1:
6140b57cec5SDimitry Andric           SU->isUnbuffered = true;
6150b57cec5SDimitry Andric           break;
6160b57cec5SDimitry Andric         default:
6170b57cec5SDimitry Andric           break;
6180b57cec5SDimitry Andric         }
6190b57cec5SDimitry Andric       }
6200b57cec5SDimitry Andric     }
6210b57cec5SDimitry Andric   }
6220b57cec5SDimitry Andric }
6230b57cec5SDimitry Andric 
6240b57cec5SDimitry Andric class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
6250b57cec5SDimitry Andric   /// Current total number of SUs in map.
6260b57cec5SDimitry Andric   unsigned NumNodes = 0;
6270b57cec5SDimitry Andric 
6280b57cec5SDimitry Andric   /// 1 for loads, 0 for stores. (see comment in SUList)
6290b57cec5SDimitry Andric   unsigned TrueMemOrderLatency;
6300b57cec5SDimitry Andric 
6310b57cec5SDimitry Andric public:
6320b57cec5SDimitry Andric   Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
6330b57cec5SDimitry Andric 
6340b57cec5SDimitry Andric   /// To keep NumNodes up to date, insert() is used instead of
6350b57cec5SDimitry Andric   /// this operator w/ push_back().
6360b57cec5SDimitry Andric   ValueType &operator[](const SUList &Key) {
6370b57cec5SDimitry Andric     llvm_unreachable("Don't use. Use insert() instead."); };
6380b57cec5SDimitry Andric 
6390b57cec5SDimitry Andric   /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
6400b57cec5SDimitry Andric   /// reduce().
6410b57cec5SDimitry Andric   void inline insert(SUnit *SU, ValueType V) {
6420b57cec5SDimitry Andric     MapVector::operator[](V).push_back(SU);
6430b57cec5SDimitry Andric     NumNodes++;
6440b57cec5SDimitry Andric   }
6450b57cec5SDimitry Andric 
6460b57cec5SDimitry Andric   /// Clears the list of SUs mapped to V.
6470b57cec5SDimitry Andric   void inline clearList(ValueType V) {
6480b57cec5SDimitry Andric     iterator Itr = find(V);
6490b57cec5SDimitry Andric     if (Itr != end()) {
6500b57cec5SDimitry Andric       assert(NumNodes >= Itr->second.size());
6510b57cec5SDimitry Andric       NumNodes -= Itr->second.size();
6520b57cec5SDimitry Andric 
6530b57cec5SDimitry Andric       Itr->second.clear();
6540b57cec5SDimitry Andric     }
6550b57cec5SDimitry Andric   }
6560b57cec5SDimitry Andric 
6570b57cec5SDimitry Andric   /// Clears map from all contents.
6580b57cec5SDimitry Andric   void clear() {
6590b57cec5SDimitry Andric     MapVector<ValueType, SUList>::clear();
6600b57cec5SDimitry Andric     NumNodes = 0;
6610b57cec5SDimitry Andric   }
6620b57cec5SDimitry Andric 
6630b57cec5SDimitry Andric   unsigned inline size() const { return NumNodes; }
6640b57cec5SDimitry Andric 
6650b57cec5SDimitry Andric   /// Counts the number of SUs in this map after a reduction.
6660b57cec5SDimitry Andric   void reComputeSize() {
6670b57cec5SDimitry Andric     NumNodes = 0;
6680b57cec5SDimitry Andric     for (auto &I : *this)
6690b57cec5SDimitry Andric       NumNodes += I.second.size();
6700b57cec5SDimitry Andric   }
6710b57cec5SDimitry Andric 
6720b57cec5SDimitry Andric   unsigned inline getTrueMemOrderLatency() const {
6730b57cec5SDimitry Andric     return TrueMemOrderLatency;
6740b57cec5SDimitry Andric   }
6750b57cec5SDimitry Andric 
6760b57cec5SDimitry Andric   void dump();
6770b57cec5SDimitry Andric };
6780b57cec5SDimitry Andric 
6790b57cec5SDimitry Andric void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
6800b57cec5SDimitry Andric                                              Value2SUsMap &Val2SUsMap) {
6810b57cec5SDimitry Andric   for (auto &I : Val2SUsMap)
6820b57cec5SDimitry Andric     addChainDependencies(SU, I.second,
6830b57cec5SDimitry Andric                          Val2SUsMap.getTrueMemOrderLatency());
6840b57cec5SDimitry Andric }
6850b57cec5SDimitry Andric 
6860b57cec5SDimitry Andric void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
6870b57cec5SDimitry Andric                                              Value2SUsMap &Val2SUsMap,
6880b57cec5SDimitry Andric                                              ValueType V) {
6890b57cec5SDimitry Andric   Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
6900b57cec5SDimitry Andric   if (Itr != Val2SUsMap.end())
6910b57cec5SDimitry Andric     addChainDependencies(SU, Itr->second,
6920b57cec5SDimitry Andric                          Val2SUsMap.getTrueMemOrderLatency());
6930b57cec5SDimitry Andric }
6940b57cec5SDimitry Andric 
6950b57cec5SDimitry Andric void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
6960b57cec5SDimitry Andric   assert(BarrierChain != nullptr);
6970b57cec5SDimitry Andric 
698bdd1243dSDimitry Andric   for (auto &[V, SUs] : map) {
699bdd1243dSDimitry Andric     (void)V;
700bdd1243dSDimitry Andric     for (auto *SU : SUs)
7010b57cec5SDimitry Andric       SU->addPredBarrier(BarrierChain);
7020b57cec5SDimitry Andric   }
7030b57cec5SDimitry Andric   map.clear();
7040b57cec5SDimitry Andric }
7050b57cec5SDimitry Andric 
7060b57cec5SDimitry Andric void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
7070b57cec5SDimitry Andric   assert(BarrierChain != nullptr);
7080b57cec5SDimitry Andric 
7090b57cec5SDimitry Andric   // Go through all lists of SUs.
7100b57cec5SDimitry Andric   for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
7110b57cec5SDimitry Andric     Value2SUsMap::iterator CurrItr = I++;
7120b57cec5SDimitry Andric     SUList &sus = CurrItr->second;
7130b57cec5SDimitry Andric     SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
7140b57cec5SDimitry Andric     for (; SUItr != SUEE; ++SUItr) {
7150b57cec5SDimitry Andric       // Stop on BarrierChain or any instruction above it.
7160b57cec5SDimitry Andric       if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
7170b57cec5SDimitry Andric         break;
7180b57cec5SDimitry Andric 
7190b57cec5SDimitry Andric       (*SUItr)->addPredBarrier(BarrierChain);
7200b57cec5SDimitry Andric     }
7210b57cec5SDimitry Andric 
7220b57cec5SDimitry Andric     // Remove also the BarrierChain from list if present.
7230b57cec5SDimitry Andric     if (SUItr != SUEE && *SUItr == BarrierChain)
7240b57cec5SDimitry Andric       SUItr++;
7250b57cec5SDimitry Andric 
7260b57cec5SDimitry Andric     // Remove all SUs that are now successors of BarrierChain.
7270b57cec5SDimitry Andric     if (SUItr != sus.begin())
7280b57cec5SDimitry Andric       sus.erase(sus.begin(), SUItr);
7290b57cec5SDimitry Andric   }
7300b57cec5SDimitry Andric 
7310b57cec5SDimitry Andric   // Remove all entries with empty su lists.
7320b57cec5SDimitry Andric   map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
7330b57cec5SDimitry Andric       return (mapEntry.second.empty()); });
7340b57cec5SDimitry Andric 
7350b57cec5SDimitry Andric   // Recompute the size of the map (NumNodes).
7360b57cec5SDimitry Andric   map.reComputeSize();
7370b57cec5SDimitry Andric }
7380b57cec5SDimitry Andric 
7398bcb0991SDimitry Andric void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
7400b57cec5SDimitry Andric                                         RegPressureTracker *RPTracker,
7410b57cec5SDimitry Andric                                         PressureDiffs *PDiffs,
7420b57cec5SDimitry Andric                                         LiveIntervals *LIS,
7430b57cec5SDimitry Andric                                         bool TrackLaneMasks) {
7440b57cec5SDimitry Andric   const TargetSubtargetInfo &ST = MF.getSubtarget();
7450b57cec5SDimitry Andric   bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
7460b57cec5SDimitry Andric                                                        : ST.useAA();
7470b57cec5SDimitry Andric   AAForDep = UseAA ? AA : nullptr;
7480b57cec5SDimitry Andric 
7490b57cec5SDimitry Andric   BarrierChain = nullptr;
7500b57cec5SDimitry Andric 
7510b57cec5SDimitry Andric   this->TrackLaneMasks = TrackLaneMasks;
7520b57cec5SDimitry Andric   MISUnitMap.clear();
7530b57cec5SDimitry Andric   ScheduleDAG::clearDAG();
7540b57cec5SDimitry Andric 
7550b57cec5SDimitry Andric   // Create an SUnit for each real instruction.
7560b57cec5SDimitry Andric   initSUnits();
7570b57cec5SDimitry Andric 
7580b57cec5SDimitry Andric   if (PDiffs)
7590b57cec5SDimitry Andric     PDiffs->init(SUnits.size());
7600b57cec5SDimitry Andric 
7610b57cec5SDimitry Andric   // We build scheduling units by walking a block's instruction list
7620b57cec5SDimitry Andric   // from bottom to top.
7630b57cec5SDimitry Andric 
7640b57cec5SDimitry Andric   // Each MIs' memory operand(s) is analyzed to a list of underlying
7650b57cec5SDimitry Andric   // objects. The SU is then inserted in the SUList(s) mapped from the
7660b57cec5SDimitry Andric   // Value(s). Each Value thus gets mapped to lists of SUs depending
7670b57cec5SDimitry Andric   // on it, stores and loads kept separately. Two SUs are trivially
7680b57cec5SDimitry Andric   // non-aliasing if they both depend on only identified Values and do
7690b57cec5SDimitry Andric   // not share any common Value.
7700b57cec5SDimitry Andric   Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
7710b57cec5SDimitry Andric 
7720b57cec5SDimitry Andric   // Certain memory accesses are known to not alias any SU in Stores
7730b57cec5SDimitry Andric   // or Loads, and have therefore their own 'NonAlias'
7740b57cec5SDimitry Andric   // domain. E.g. spill / reload instructions never alias LLVM I/R
7750b57cec5SDimitry Andric   // Values. It would be nice to assume that this type of memory
7760b57cec5SDimitry Andric   // accesses always have a proper memory operand modelling, and are
7770b57cec5SDimitry Andric   // therefore never unanalyzable, but this is conservatively not
7780b57cec5SDimitry Andric   // done.
7790b57cec5SDimitry Andric   Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
7800b57cec5SDimitry Andric 
7810b57cec5SDimitry Andric   // Track all instructions that may raise floating-point exceptions.
7820b57cec5SDimitry Andric   // These do not depend on one other (or normal loads or stores), but
7830b57cec5SDimitry Andric   // must not be rescheduled across global barriers.  Note that we don't
7840b57cec5SDimitry Andric   // really need a "map" here since we don't track those MIs by value;
7850b57cec5SDimitry Andric   // using the same Value2SUsMap data type here is simply a matter of
7860b57cec5SDimitry Andric   // convenience.
7870b57cec5SDimitry Andric   Value2SUsMap FPExceptions;
7880b57cec5SDimitry Andric 
7890b57cec5SDimitry Andric   // Remove any stale debug info; sometimes BuildSchedGraph is called again
7900b57cec5SDimitry Andric   // without emitting the info from the previous call.
7910b57cec5SDimitry Andric   DbgValues.clear();
7920b57cec5SDimitry Andric   FirstDbgValue = nullptr;
7930b57cec5SDimitry Andric 
7940b57cec5SDimitry Andric   assert(Defs.empty() && Uses.empty() &&
7950b57cec5SDimitry Andric          "Only BuildGraph should update Defs/Uses");
7960b57cec5SDimitry Andric   Defs.setUniverse(TRI->getNumRegs());
7970b57cec5SDimitry Andric   Uses.setUniverse(TRI->getNumRegs());
7980b57cec5SDimitry Andric 
7990b57cec5SDimitry Andric   assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
8000b57cec5SDimitry Andric   assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
8010b57cec5SDimitry Andric   unsigned NumVirtRegs = MRI.getNumVirtRegs();
8020b57cec5SDimitry Andric   CurrentVRegDefs.setUniverse(NumVirtRegs);
8030b57cec5SDimitry Andric   CurrentVRegUses.setUniverse(NumVirtRegs);
8040b57cec5SDimitry Andric 
8050b57cec5SDimitry Andric   // Model data dependencies between instructions being scheduled and the
8060b57cec5SDimitry Andric   // ExitSU.
8070b57cec5SDimitry Andric   addSchedBarrierDeps();
8080b57cec5SDimitry Andric 
8090b57cec5SDimitry Andric   // Walk the list of instructions, from bottom moving up.
8100b57cec5SDimitry Andric   MachineInstr *DbgMI = nullptr;
8110b57cec5SDimitry Andric   for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
8120b57cec5SDimitry Andric        MII != MIE; --MII) {
8130b57cec5SDimitry Andric     MachineInstr &MI = *std::prev(MII);
8140b57cec5SDimitry Andric     if (DbgMI) {
815bdd1243dSDimitry Andric       DbgValues.emplace_back(DbgMI, &MI);
8160b57cec5SDimitry Andric       DbgMI = nullptr;
8170b57cec5SDimitry Andric     }
8180b57cec5SDimitry Andric 
819fe6060f1SDimitry Andric     if (MI.isDebugValue() || MI.isDebugPHI()) {
8200b57cec5SDimitry Andric       DbgMI = &MI;
8210b57cec5SDimitry Andric       continue;
8220b57cec5SDimitry Andric     }
823fe6060f1SDimitry Andric 
824fe6060f1SDimitry Andric     if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe())
8250b57cec5SDimitry Andric       continue;
8260b57cec5SDimitry Andric 
8270b57cec5SDimitry Andric     SUnit *SU = MISUnitMap[&MI];
8280b57cec5SDimitry Andric     assert(SU && "No SUnit mapped to this MI");
8290b57cec5SDimitry Andric 
8300b57cec5SDimitry Andric     if (RPTracker) {
8310b57cec5SDimitry Andric       RegisterOperands RegOpers;
8320b57cec5SDimitry Andric       RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
8330b57cec5SDimitry Andric       if (TrackLaneMasks) {
8340b57cec5SDimitry Andric         SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
8350b57cec5SDimitry Andric         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
8360b57cec5SDimitry Andric       }
8370b57cec5SDimitry Andric       if (PDiffs != nullptr)
8380b57cec5SDimitry Andric         PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
8390b57cec5SDimitry Andric 
8400b57cec5SDimitry Andric       if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
8410b57cec5SDimitry Andric         RPTracker->recedeSkipDebugValues();
8420b57cec5SDimitry Andric       assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
8430b57cec5SDimitry Andric       RPTracker->recede(RegOpers);
8440b57cec5SDimitry Andric     }
8450b57cec5SDimitry Andric 
8460b57cec5SDimitry Andric     assert(
8470b57cec5SDimitry Andric         (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
8480b57cec5SDimitry Andric         "Cannot schedule terminators or labels!");
8490b57cec5SDimitry Andric 
8500b57cec5SDimitry Andric     // Add register-based dependencies (data, anti, and output).
8510b57cec5SDimitry Andric     // For some instructions (calls, returns, inline-asm, etc.) there can
8520b57cec5SDimitry Andric     // be explicit uses and implicit defs, in which case the use will appear
8530b57cec5SDimitry Andric     // on the operand list before the def. Do two passes over the operand
8540b57cec5SDimitry Andric     // list to make sure that defs are processed before any uses.
8550b57cec5SDimitry Andric     bool HasVRegDef = false;
8560b57cec5SDimitry Andric     for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
8570b57cec5SDimitry Andric       const MachineOperand &MO = MI.getOperand(j);
8580b57cec5SDimitry Andric       if (!MO.isReg() || !MO.isDef())
8590b57cec5SDimitry Andric         continue;
8608bcb0991SDimitry Andric       Register Reg = MO.getReg();
861bdd1243dSDimitry Andric       if (Reg.isPhysical()) {
8620b57cec5SDimitry Andric         addPhysRegDeps(SU, j);
863bdd1243dSDimitry Andric       } else if (Reg.isVirtual()) {
8640b57cec5SDimitry Andric         HasVRegDef = true;
8650b57cec5SDimitry Andric         addVRegDefDeps(SU, j);
8660b57cec5SDimitry Andric       }
8670b57cec5SDimitry Andric     }
8680b57cec5SDimitry Andric     // Now process all uses.
8690b57cec5SDimitry Andric     for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
8700b57cec5SDimitry Andric       const MachineOperand &MO = MI.getOperand(j);
8710b57cec5SDimitry Andric       // Only look at use operands.
8720b57cec5SDimitry Andric       // We do not need to check for MO.readsReg() here because subsequent
8730b57cec5SDimitry Andric       // subregister defs will get output dependence edges and need no
8740b57cec5SDimitry Andric       // additional use dependencies.
8750b57cec5SDimitry Andric       if (!MO.isReg() || !MO.isUse())
8760b57cec5SDimitry Andric         continue;
8778bcb0991SDimitry Andric       Register Reg = MO.getReg();
878bdd1243dSDimitry Andric       if (Reg.isPhysical()) {
8790b57cec5SDimitry Andric         addPhysRegDeps(SU, j);
880bdd1243dSDimitry Andric       } else if (Reg.isVirtual() && MO.readsReg()) {
8810b57cec5SDimitry Andric         addVRegUseDeps(SU, j);
8820b57cec5SDimitry Andric       }
8830b57cec5SDimitry Andric     }
8840b57cec5SDimitry Andric 
8850b57cec5SDimitry Andric     // If we haven't seen any uses in this scheduling region, create a
8860b57cec5SDimitry Andric     // dependence edge to ExitSU to model the live-out latency. This is required
8870b57cec5SDimitry Andric     // for vreg defs with no in-region use, and prefetches with no vreg def.
8880b57cec5SDimitry Andric     //
8890b57cec5SDimitry Andric     // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
8900b57cec5SDimitry Andric     // check currently relies on being called before adding chain deps.
8910b57cec5SDimitry Andric     if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
8920b57cec5SDimitry Andric       SDep Dep(SU, SDep::Artificial);
8930b57cec5SDimitry Andric       Dep.setLatency(SU->Latency - 1);
8940b57cec5SDimitry Andric       ExitSU.addPred(Dep);
8950b57cec5SDimitry Andric     }
8960b57cec5SDimitry Andric 
8970b57cec5SDimitry Andric     // Add memory dependencies (Note: isStoreToStackSlot and
8980b57cec5SDimitry Andric     // isLoadFromStackSLot are not usable after stack slots are lowered to
8990b57cec5SDimitry Andric     // actual addresses).
9000b57cec5SDimitry Andric 
9010b57cec5SDimitry Andric     // This is a barrier event that acts as a pivotal node in the DAG.
902fcaf7f86SDimitry Andric     if (isGlobalMemoryObject(&MI)) {
9030b57cec5SDimitry Andric 
9040b57cec5SDimitry Andric       // Become the barrier chain.
9050b57cec5SDimitry Andric       if (BarrierChain)
9060b57cec5SDimitry Andric         BarrierChain->addPredBarrier(SU);
9070b57cec5SDimitry Andric       BarrierChain = SU;
9080b57cec5SDimitry Andric 
9090b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
9100b57cec5SDimitry Andric                         << BarrierChain->NodeNum << ").\n";);
9110b57cec5SDimitry Andric 
9120b57cec5SDimitry Andric       // Add dependencies against everything below it and clear maps.
9130b57cec5SDimitry Andric       addBarrierChain(Stores);
9140b57cec5SDimitry Andric       addBarrierChain(Loads);
9150b57cec5SDimitry Andric       addBarrierChain(NonAliasStores);
9160b57cec5SDimitry Andric       addBarrierChain(NonAliasLoads);
9170b57cec5SDimitry Andric       addBarrierChain(FPExceptions);
9180b57cec5SDimitry Andric 
9190b57cec5SDimitry Andric       continue;
9200b57cec5SDimitry Andric     }
9210b57cec5SDimitry Andric 
9220b57cec5SDimitry Andric     // Instructions that may raise FP exceptions may not be moved
9230b57cec5SDimitry Andric     // across any global barriers.
9240b57cec5SDimitry Andric     if (MI.mayRaiseFPException()) {
9250b57cec5SDimitry Andric       if (BarrierChain)
9260b57cec5SDimitry Andric         BarrierChain->addPredBarrier(SU);
9270b57cec5SDimitry Andric 
9280b57cec5SDimitry Andric       FPExceptions.insert(SU, UnknownValue);
9290b57cec5SDimitry Andric 
9300b57cec5SDimitry Andric       if (FPExceptions.size() >= HugeRegion) {
9310b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
9320b57cec5SDimitry Andric         Value2SUsMap empty;
9330b57cec5SDimitry Andric         reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
9340b57cec5SDimitry Andric       }
9350b57cec5SDimitry Andric     }
9360b57cec5SDimitry Andric 
9370b57cec5SDimitry Andric     // If it's not a store or a variant load, we're done.
9380b57cec5SDimitry Andric     if (!MI.mayStore() &&
939fcaf7f86SDimitry Andric         !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad()))
9400b57cec5SDimitry Andric       continue;
9410b57cec5SDimitry Andric 
9420b57cec5SDimitry Andric     // Always add dependecy edge to BarrierChain if present.
9430b57cec5SDimitry Andric     if (BarrierChain)
9440b57cec5SDimitry Andric       BarrierChain->addPredBarrier(SU);
9450b57cec5SDimitry Andric 
9460b57cec5SDimitry Andric     // Find the underlying objects for MI. The Objs vector is either
9470b57cec5SDimitry Andric     // empty, or filled with the Values of memory locations which this
9480b57cec5SDimitry Andric     // SU depends on.
9490b57cec5SDimitry Andric     UnderlyingObjectsVector Objs;
9500b57cec5SDimitry Andric     bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
9510b57cec5SDimitry Andric                                                   MF.getDataLayout());
9520b57cec5SDimitry Andric 
9530b57cec5SDimitry Andric     if (MI.mayStore()) {
9540b57cec5SDimitry Andric       if (!ObjsFound) {
9550b57cec5SDimitry Andric         // An unknown store depends on all stores and loads.
9560b57cec5SDimitry Andric         addChainDependencies(SU, Stores);
9570b57cec5SDimitry Andric         addChainDependencies(SU, NonAliasStores);
9580b57cec5SDimitry Andric         addChainDependencies(SU, Loads);
9590b57cec5SDimitry Andric         addChainDependencies(SU, NonAliasLoads);
9600b57cec5SDimitry Andric 
9610b57cec5SDimitry Andric         // Map this store to 'UnknownValue'.
9620b57cec5SDimitry Andric         Stores.insert(SU, UnknownValue);
9630b57cec5SDimitry Andric       } else {
9640b57cec5SDimitry Andric         // Add precise dependencies against all previously seen memory
9650b57cec5SDimitry Andric         // accesses mapped to the same Value(s).
9660b57cec5SDimitry Andric         for (const UnderlyingObject &UnderlObj : Objs) {
9670b57cec5SDimitry Andric           ValueType V = UnderlObj.getValue();
9680b57cec5SDimitry Andric           bool ThisMayAlias = UnderlObj.mayAlias();
9690b57cec5SDimitry Andric 
9700b57cec5SDimitry Andric           // Add dependencies to previous stores and loads mapped to V.
9710b57cec5SDimitry Andric           addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
9720b57cec5SDimitry Andric           addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
9730b57cec5SDimitry Andric         }
9740b57cec5SDimitry Andric         // Update the store map after all chains have been added to avoid adding
9750b57cec5SDimitry Andric         // self-loop edge if multiple underlying objects are present.
9760b57cec5SDimitry Andric         for (const UnderlyingObject &UnderlObj : Objs) {
9770b57cec5SDimitry Andric           ValueType V = UnderlObj.getValue();
9780b57cec5SDimitry Andric           bool ThisMayAlias = UnderlObj.mayAlias();
9790b57cec5SDimitry Andric 
9800b57cec5SDimitry Andric           // Map this store to V.
9810b57cec5SDimitry Andric           (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
9820b57cec5SDimitry Andric         }
9830b57cec5SDimitry Andric         // The store may have dependencies to unanalyzable loads and
9840b57cec5SDimitry Andric         // stores.
9850b57cec5SDimitry Andric         addChainDependencies(SU, Loads, UnknownValue);
9860b57cec5SDimitry Andric         addChainDependencies(SU, Stores, UnknownValue);
9870b57cec5SDimitry Andric       }
9880b57cec5SDimitry Andric     } else { // SU is a load.
9890b57cec5SDimitry Andric       if (!ObjsFound) {
9900b57cec5SDimitry Andric         // An unknown load depends on all stores.
9910b57cec5SDimitry Andric         addChainDependencies(SU, Stores);
9920b57cec5SDimitry Andric         addChainDependencies(SU, NonAliasStores);
9930b57cec5SDimitry Andric 
9940b57cec5SDimitry Andric         Loads.insert(SU, UnknownValue);
9950b57cec5SDimitry Andric       } else {
9960b57cec5SDimitry Andric         for (const UnderlyingObject &UnderlObj : Objs) {
9970b57cec5SDimitry Andric           ValueType V = UnderlObj.getValue();
9980b57cec5SDimitry Andric           bool ThisMayAlias = UnderlObj.mayAlias();
9990b57cec5SDimitry Andric 
10000b57cec5SDimitry Andric           // Add precise dependencies against all previously seen stores
10010b57cec5SDimitry Andric           // mapping to the same Value(s).
10020b57cec5SDimitry Andric           addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
10030b57cec5SDimitry Andric 
10040b57cec5SDimitry Andric           // Map this load to V.
10050b57cec5SDimitry Andric           (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
10060b57cec5SDimitry Andric         }
10070b57cec5SDimitry Andric         // The load may have dependencies to unanalyzable stores.
10080b57cec5SDimitry Andric         addChainDependencies(SU, Stores, UnknownValue);
10090b57cec5SDimitry Andric       }
10100b57cec5SDimitry Andric     }
10110b57cec5SDimitry Andric 
10120b57cec5SDimitry Andric     // Reduce maps if they grow huge.
10130b57cec5SDimitry Andric     if (Stores.size() + Loads.size() >= HugeRegion) {
10140b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
10150b57cec5SDimitry Andric       reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
10160b57cec5SDimitry Andric     }
10170b57cec5SDimitry Andric     if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
10180b57cec5SDimitry Andric       LLVM_DEBUG(
10190b57cec5SDimitry Andric           dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
10200b57cec5SDimitry Andric       reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
10210b57cec5SDimitry Andric     }
10220b57cec5SDimitry Andric   }
10230b57cec5SDimitry Andric 
10240b57cec5SDimitry Andric   if (DbgMI)
10250b57cec5SDimitry Andric     FirstDbgValue = DbgMI;
10260b57cec5SDimitry Andric 
10270b57cec5SDimitry Andric   Defs.clear();
10280b57cec5SDimitry Andric   Uses.clear();
10290b57cec5SDimitry Andric   CurrentVRegDefs.clear();
10300b57cec5SDimitry Andric   CurrentVRegUses.clear();
10310b57cec5SDimitry Andric 
10320b57cec5SDimitry Andric   Topo.MarkDirty();
10330b57cec5SDimitry Andric }
10340b57cec5SDimitry Andric 
10350b57cec5SDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
10360b57cec5SDimitry Andric   PSV->printCustom(OS);
10370b57cec5SDimitry Andric   return OS;
10380b57cec5SDimitry Andric }
10390b57cec5SDimitry Andric 
10400b57cec5SDimitry Andric void ScheduleDAGInstrs::Value2SUsMap::dump() {
1041bdd1243dSDimitry Andric   for (const auto &[ValType, SUs] : *this) {
104206c3fb27SDimitry Andric     if (isa<const Value *>(ValType)) {
104306c3fb27SDimitry Andric       const Value *V = cast<const Value *>(ValType);
10440b57cec5SDimitry Andric       if (isa<UndefValue>(V))
10450b57cec5SDimitry Andric         dbgs() << "Unknown";
10460b57cec5SDimitry Andric       else
10470b57cec5SDimitry Andric         V->printAsOperand(dbgs());
104806c3fb27SDimitry Andric     } else if (isa<const PseudoSourceValue *>(ValType))
104906c3fb27SDimitry Andric       dbgs() << cast<const PseudoSourceValue *>(ValType);
10500b57cec5SDimitry Andric     else
10510b57cec5SDimitry Andric       llvm_unreachable("Unknown Value type.");
10520b57cec5SDimitry Andric 
10530b57cec5SDimitry Andric     dbgs() << " : ";
1054bdd1243dSDimitry Andric     dumpSUList(SUs);
10550b57cec5SDimitry Andric   }
10560b57cec5SDimitry Andric }
10570b57cec5SDimitry Andric 
10580b57cec5SDimitry Andric void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
10590b57cec5SDimitry Andric                                               Value2SUsMap &loads, unsigned N) {
10600b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
10610b57cec5SDimitry Andric              dbgs() << "Loading SUnits:\n"; loads.dump());
10620b57cec5SDimitry Andric 
10630b57cec5SDimitry Andric   // Insert all SU's NodeNums into a vector and sort it.
10640b57cec5SDimitry Andric   std::vector<unsigned> NodeNums;
10650b57cec5SDimitry Andric   NodeNums.reserve(stores.size() + loads.size());
1066bdd1243dSDimitry Andric   for (const auto &[V, SUs] : stores) {
1067bdd1243dSDimitry Andric     (void)V;
1068bdd1243dSDimitry Andric     for (const auto *SU : SUs)
10690b57cec5SDimitry Andric       NodeNums.push_back(SU->NodeNum);
1070bdd1243dSDimitry Andric   }
1071bdd1243dSDimitry Andric   for (const auto &[V, SUs] : loads) {
1072bdd1243dSDimitry Andric     (void)V;
1073bdd1243dSDimitry Andric     for (const auto *SU : SUs)
10740b57cec5SDimitry Andric       NodeNums.push_back(SU->NodeNum);
1075bdd1243dSDimitry Andric   }
10760b57cec5SDimitry Andric   llvm::sort(NodeNums);
10770b57cec5SDimitry Andric 
10780b57cec5SDimitry Andric   // The N last elements in NodeNums will be removed, and the SU with
10790b57cec5SDimitry Andric   // the lowest NodeNum of them will become the new BarrierChain to
10800b57cec5SDimitry Andric   // let the not yet seen SUs have a dependency to the removed SUs.
10810b57cec5SDimitry Andric   assert(N <= NodeNums.size());
10820b57cec5SDimitry Andric   SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
10830b57cec5SDimitry Andric   if (BarrierChain) {
10840b57cec5SDimitry Andric     // The aliasing and non-aliasing maps reduce independently of each
10850b57cec5SDimitry Andric     // other, but share a common BarrierChain. Check if the
10860b57cec5SDimitry Andric     // newBarrierChain is above the former one. If it is not, it may
10870b57cec5SDimitry Andric     // introduce a loop to use newBarrierChain, so keep the old one.
10880b57cec5SDimitry Andric     if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
10890b57cec5SDimitry Andric       BarrierChain->addPredBarrier(newBarrierChain);
10900b57cec5SDimitry Andric       BarrierChain = newBarrierChain;
10910b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
10920b57cec5SDimitry Andric                         << BarrierChain->NodeNum << ").\n";);
10930b57cec5SDimitry Andric     }
10940b57cec5SDimitry Andric     else
10950b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
10960b57cec5SDimitry Andric                         << BarrierChain->NodeNum << ").\n";);
10970b57cec5SDimitry Andric   }
10980b57cec5SDimitry Andric   else
10990b57cec5SDimitry Andric     BarrierChain = newBarrierChain;
11000b57cec5SDimitry Andric 
11010b57cec5SDimitry Andric   insertBarrierChain(stores);
11020b57cec5SDimitry Andric   insertBarrierChain(loads);
11030b57cec5SDimitry Andric 
11040b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
11050b57cec5SDimitry Andric              dbgs() << "Loading SUnits:\n"; loads.dump());
11060b57cec5SDimitry Andric }
11070b57cec5SDimitry Andric 
1108*0fca6ea1SDimitry Andric static void toggleKills(const MachineRegisterInfo &MRI, LiveRegUnits &LiveRegs,
11090b57cec5SDimitry Andric                         MachineInstr &MI, bool addToLiveRegs) {
11100b57cec5SDimitry Andric   for (MachineOperand &MO : MI.operands()) {
11110b57cec5SDimitry Andric     if (!MO.isReg() || !MO.readsReg())
11120b57cec5SDimitry Andric       continue;
11138bcb0991SDimitry Andric     Register Reg = MO.getReg();
11140b57cec5SDimitry Andric     if (!Reg)
11150b57cec5SDimitry Andric       continue;
11160b57cec5SDimitry Andric 
11170b57cec5SDimitry Andric     // Things that are available after the instruction are killed by it.
1118*0fca6ea1SDimitry Andric     bool IsKill = LiveRegs.available(Reg);
1119*0fca6ea1SDimitry Andric 
1120*0fca6ea1SDimitry Andric     // Exception: Do not kill reserved registers
1121*0fca6ea1SDimitry Andric     MO.setIsKill(IsKill && !MRI.isReserved(Reg));
11220b57cec5SDimitry Andric     if (addToLiveRegs)
11230b57cec5SDimitry Andric       LiveRegs.addReg(Reg);
11240b57cec5SDimitry Andric   }
11250b57cec5SDimitry Andric }
11260b57cec5SDimitry Andric 
11270b57cec5SDimitry Andric void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
11280b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
11290b57cec5SDimitry Andric 
11300b57cec5SDimitry Andric   LiveRegs.init(*TRI);
11310b57cec5SDimitry Andric   LiveRegs.addLiveOuts(MBB);
11320b57cec5SDimitry Andric 
11330b57cec5SDimitry Andric   // Examine block from end to start...
1134349cc55cSDimitry Andric   for (MachineInstr &MI : llvm::reverse(MBB)) {
1135fe6060f1SDimitry Andric     if (MI.isDebugOrPseudoInstr())
11360b57cec5SDimitry Andric       continue;
11370b57cec5SDimitry Andric 
11380b57cec5SDimitry Andric     // Update liveness.  Registers that are defed but not used in this
11390b57cec5SDimitry Andric     // instruction are now dead. Mark register and all subregs as they
11400b57cec5SDimitry Andric     // are completely defined.
11410b57cec5SDimitry Andric     for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
11420b57cec5SDimitry Andric       const MachineOperand &MO = *O;
11430b57cec5SDimitry Andric       if (MO.isReg()) {
11440b57cec5SDimitry Andric         if (!MO.isDef())
11450b57cec5SDimitry Andric           continue;
11468bcb0991SDimitry Andric         Register Reg = MO.getReg();
11470b57cec5SDimitry Andric         if (!Reg)
11480b57cec5SDimitry Andric           continue;
11490b57cec5SDimitry Andric         LiveRegs.removeReg(Reg);
11500b57cec5SDimitry Andric       } else if (MO.isRegMask()) {
1151*0fca6ea1SDimitry Andric         LiveRegs.removeRegsNotPreserved(MO.getRegMask());
11520b57cec5SDimitry Andric       }
11530b57cec5SDimitry Andric     }
11540b57cec5SDimitry Andric 
11550b57cec5SDimitry Andric     // If there is a bundle header fix it up first.
11560b57cec5SDimitry Andric     if (!MI.isBundled()) {
11570b57cec5SDimitry Andric       toggleKills(MRI, LiveRegs, MI, true);
11580b57cec5SDimitry Andric     } else {
11590b57cec5SDimitry Andric       MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
11600b57cec5SDimitry Andric       if (MI.isBundle())
11610b57cec5SDimitry Andric         toggleKills(MRI, LiveRegs, MI, false);
11620b57cec5SDimitry Andric 
11630b57cec5SDimitry Andric       // Some targets make the (questionable) assumtion that the instructions
11640b57cec5SDimitry Andric       // inside the bundle are ordered and consequently only the last use of
11650b57cec5SDimitry Andric       // a register inside the bundle can kill it.
11660b57cec5SDimitry Andric       MachineBasicBlock::instr_iterator I = std::next(Bundle);
11670b57cec5SDimitry Andric       while (I->isBundledWithSucc())
11680b57cec5SDimitry Andric         ++I;
11690b57cec5SDimitry Andric       do {
1170fe6060f1SDimitry Andric         if (!I->isDebugOrPseudoInstr())
11710b57cec5SDimitry Andric           toggleKills(MRI, LiveRegs, *I, true);
11720b57cec5SDimitry Andric         --I;
11730b57cec5SDimitry Andric       } while (I != Bundle);
11740b57cec5SDimitry Andric     }
11750b57cec5SDimitry Andric   }
11760b57cec5SDimitry Andric }
11770b57cec5SDimitry Andric 
11780b57cec5SDimitry Andric void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
11790b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
11800b57cec5SDimitry Andric   dumpNodeName(SU);
1181bdd1243dSDimitry Andric   if (SchedPrintCycles)
1182bdd1243dSDimitry Andric     dbgs() << " [TopReadyCycle = " << SU.TopReadyCycle
1183bdd1243dSDimitry Andric            << ", BottomReadyCycle = " << SU.BotReadyCycle << "]";
11840b57cec5SDimitry Andric   dbgs() << ": ";
11850b57cec5SDimitry Andric   SU.getInstr()->dump();
11860b57cec5SDimitry Andric #endif
11870b57cec5SDimitry Andric }
11880b57cec5SDimitry Andric 
11890b57cec5SDimitry Andric void ScheduleDAGInstrs::dump() const {
11900b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
11910b57cec5SDimitry Andric   if (EntrySU.getInstr() != nullptr)
11920b57cec5SDimitry Andric     dumpNodeAll(EntrySU);
11930b57cec5SDimitry Andric   for (const SUnit &SU : SUnits)
11940b57cec5SDimitry Andric     dumpNodeAll(SU);
11950b57cec5SDimitry Andric   if (ExitSU.getInstr() != nullptr)
11960b57cec5SDimitry Andric     dumpNodeAll(ExitSU);
11970b57cec5SDimitry Andric #endif
11980b57cec5SDimitry Andric }
11990b57cec5SDimitry Andric 
12000b57cec5SDimitry Andric std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
12010b57cec5SDimitry Andric   std::string s;
12020b57cec5SDimitry Andric   raw_string_ostream oss(s);
12030b57cec5SDimitry Andric   if (SU == &EntrySU)
12040b57cec5SDimitry Andric     oss << "<entry>";
12050b57cec5SDimitry Andric   else if (SU == &ExitSU)
12060b57cec5SDimitry Andric     oss << "<exit>";
12070b57cec5SDimitry Andric   else
1208e8d8bef9SDimitry Andric     SU->getInstr()->print(oss, /*IsStandalone=*/true);
1209*0fca6ea1SDimitry Andric   return s;
12100b57cec5SDimitry Andric }
12110b57cec5SDimitry Andric 
12120b57cec5SDimitry Andric /// Return the basic block label. It is not necessarilly unique because a block
12130b57cec5SDimitry Andric /// contains multiple scheduling regions. But it is fine for visualization.
12140b57cec5SDimitry Andric std::string ScheduleDAGInstrs::getDAGName() const {
12150b57cec5SDimitry Andric   return "dag." + BB->getFullName();
12160b57cec5SDimitry Andric }
12170b57cec5SDimitry Andric 
12180b57cec5SDimitry Andric bool ScheduleDAGInstrs::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
12190b57cec5SDimitry Andric   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
12200b57cec5SDimitry Andric }
12210b57cec5SDimitry Andric 
12220b57cec5SDimitry Andric bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
12230b57cec5SDimitry Andric   if (SuccSU != &ExitSU) {
12240b57cec5SDimitry Andric     // Do not use WillCreateCycle, it assumes SD scheduling.
12250b57cec5SDimitry Andric     // If Pred is reachable from Succ, then the edge creates a cycle.
12260b57cec5SDimitry Andric     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
12270b57cec5SDimitry Andric       return false;
12280b57cec5SDimitry Andric     Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
12290b57cec5SDimitry Andric   }
12300b57cec5SDimitry Andric   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
12310b57cec5SDimitry Andric   // Return true regardless of whether a new edge needed to be inserted.
12320b57cec5SDimitry Andric   return true;
12330b57cec5SDimitry Andric }
12340b57cec5SDimitry Andric 
12350b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
12360b57cec5SDimitry Andric // SchedDFSResult Implementation
12370b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
12380b57cec5SDimitry Andric 
12390b57cec5SDimitry Andric namespace llvm {
12400b57cec5SDimitry Andric 
12410b57cec5SDimitry Andric /// Internal state used to compute SchedDFSResult.
12420b57cec5SDimitry Andric class SchedDFSImpl {
12430b57cec5SDimitry Andric   SchedDFSResult &R;
12440b57cec5SDimitry Andric 
12450b57cec5SDimitry Andric   /// Join DAG nodes into equivalence classes by their subtree.
12460b57cec5SDimitry Andric   IntEqClasses SubtreeClasses;
12470b57cec5SDimitry Andric   /// List PredSU, SuccSU pairs that represent data edges between subtrees.
12480b57cec5SDimitry Andric   std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
12490b57cec5SDimitry Andric 
12500b57cec5SDimitry Andric   struct RootData {
12510b57cec5SDimitry Andric     unsigned NodeID;
12520b57cec5SDimitry Andric     unsigned ParentNodeID;  ///< Parent node (member of the parent subtree).
12530b57cec5SDimitry Andric     unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
12540b57cec5SDimitry Andric                                 /// children.
12550b57cec5SDimitry Andric 
12560b57cec5SDimitry Andric     RootData(unsigned id): NodeID(id),
12570b57cec5SDimitry Andric                            ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
12580b57cec5SDimitry Andric 
12590b57cec5SDimitry Andric     unsigned getSparseSetIndex() const { return NodeID; }
12600b57cec5SDimitry Andric   };
12610b57cec5SDimitry Andric 
12620b57cec5SDimitry Andric   SparseSet<RootData> RootSet;
12630b57cec5SDimitry Andric 
12640b57cec5SDimitry Andric public:
12650b57cec5SDimitry Andric   SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
12660b57cec5SDimitry Andric     RootSet.setUniverse(R.DFSNodeData.size());
12670b57cec5SDimitry Andric   }
12680b57cec5SDimitry Andric 
12690b57cec5SDimitry Andric   /// Returns true if this node been visited by the DFS traversal.
12700b57cec5SDimitry Andric   ///
12710b57cec5SDimitry Andric   /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
12720b57cec5SDimitry Andric   /// ID. Later, SubtreeID is updated but remains valid.
12730b57cec5SDimitry Andric   bool isVisited(const SUnit *SU) const {
12740b57cec5SDimitry Andric     return R.DFSNodeData[SU->NodeNum].SubtreeID
12750b57cec5SDimitry Andric       != SchedDFSResult::InvalidSubtreeID;
12760b57cec5SDimitry Andric   }
12770b57cec5SDimitry Andric 
12780b57cec5SDimitry Andric   /// Initializes this node's instruction count. We don't need to flag the node
12790b57cec5SDimitry Andric   /// visited until visitPostorder because the DAG cannot have cycles.
12800b57cec5SDimitry Andric   void visitPreorder(const SUnit *SU) {
12810b57cec5SDimitry Andric     R.DFSNodeData[SU->NodeNum].InstrCount =
12820b57cec5SDimitry Andric       SU->getInstr()->isTransient() ? 0 : 1;
12830b57cec5SDimitry Andric   }
12840b57cec5SDimitry Andric 
12850b57cec5SDimitry Andric   /// Called once for each node after all predecessors are visited. Revisit this
12860b57cec5SDimitry Andric   /// node's predecessors and potentially join them now that we know the ILP of
12870b57cec5SDimitry Andric   /// the other predecessors.
12880b57cec5SDimitry Andric   void visitPostorderNode(const SUnit *SU) {
12890b57cec5SDimitry Andric     // Mark this node as the root of a subtree. It may be joined with its
12900b57cec5SDimitry Andric     // successors later.
12910b57cec5SDimitry Andric     R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
12920b57cec5SDimitry Andric     RootData RData(SU->NodeNum);
12930b57cec5SDimitry Andric     RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
12940b57cec5SDimitry Andric 
12950b57cec5SDimitry Andric     // If any predecessors are still in their own subtree, they either cannot be
12960b57cec5SDimitry Andric     // joined or are large enough to remain separate. If this parent node's
12970b57cec5SDimitry Andric     // total instruction count is not greater than a child subtree by at least
12980b57cec5SDimitry Andric     // the subtree limit, then try to join it now since splitting subtrees is
12990b57cec5SDimitry Andric     // only useful if multiple high-pressure paths are possible.
13000b57cec5SDimitry Andric     unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
13010b57cec5SDimitry Andric     for (const SDep &PredDep : SU->Preds) {
13020b57cec5SDimitry Andric       if (PredDep.getKind() != SDep::Data)
13030b57cec5SDimitry Andric         continue;
13040b57cec5SDimitry Andric       unsigned PredNum = PredDep.getSUnit()->NodeNum;
13050b57cec5SDimitry Andric       if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
13060b57cec5SDimitry Andric         joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
13070b57cec5SDimitry Andric 
13080b57cec5SDimitry Andric       // Either link or merge the TreeData entry from the child to the parent.
13090b57cec5SDimitry Andric       if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
13100b57cec5SDimitry Andric         // If the predecessor's parent is invalid, this is a tree edge and the
13110b57cec5SDimitry Andric         // current node is the parent.
13120b57cec5SDimitry Andric         if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
13130b57cec5SDimitry Andric           RootSet[PredNum].ParentNodeID = SU->NodeNum;
13140b57cec5SDimitry Andric       }
13150b57cec5SDimitry Andric       else if (RootSet.count(PredNum)) {
13160b57cec5SDimitry Andric         // The predecessor is not a root, but is still in the root set. This
13170b57cec5SDimitry Andric         // must be the new parent that it was just joined to. Note that
13180b57cec5SDimitry Andric         // RootSet[PredNum].ParentNodeID may either be invalid or may still be
13190b57cec5SDimitry Andric         // set to the original parent.
13200b57cec5SDimitry Andric         RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
13210b57cec5SDimitry Andric         RootSet.erase(PredNum);
13220b57cec5SDimitry Andric       }
13230b57cec5SDimitry Andric     }
13240b57cec5SDimitry Andric     RootSet[SU->NodeNum] = RData;
13250b57cec5SDimitry Andric   }
13260b57cec5SDimitry Andric 
13270b57cec5SDimitry Andric   /// Called once for each tree edge after calling visitPostOrderNode on
13280b57cec5SDimitry Andric   /// the predecessor. Increment the parent node's instruction count and
13290b57cec5SDimitry Andric   /// preemptively join this subtree to its parent's if it is small enough.
13300b57cec5SDimitry Andric   void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
13310b57cec5SDimitry Andric     R.DFSNodeData[Succ->NodeNum].InstrCount
13320b57cec5SDimitry Andric       += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
13330b57cec5SDimitry Andric     joinPredSubtree(PredDep, Succ);
13340b57cec5SDimitry Andric   }
13350b57cec5SDimitry Andric 
13360b57cec5SDimitry Andric   /// Adds a connection for cross edges.
13370b57cec5SDimitry Andric   void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1338bdd1243dSDimitry Andric     ConnectionPairs.emplace_back(PredDep.getSUnit(), Succ);
13390b57cec5SDimitry Andric   }
13400b57cec5SDimitry Andric 
13410b57cec5SDimitry Andric   /// Sets each node's subtree ID to the representative ID and record
13420b57cec5SDimitry Andric   /// connections between trees.
13430b57cec5SDimitry Andric   void finalize() {
13440b57cec5SDimitry Andric     SubtreeClasses.compress();
13450b57cec5SDimitry Andric     R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
13460b57cec5SDimitry Andric     assert(SubtreeClasses.getNumClasses() == RootSet.size()
13470b57cec5SDimitry Andric            && "number of roots should match trees");
13480b57cec5SDimitry Andric     for (const RootData &Root : RootSet) {
13490b57cec5SDimitry Andric       unsigned TreeID = SubtreeClasses[Root.NodeID];
13500b57cec5SDimitry Andric       if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
13510b57cec5SDimitry Andric         R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
13520b57cec5SDimitry Andric       R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
13530b57cec5SDimitry Andric       // Note that SubInstrCount may be greater than InstrCount if we joined
13540b57cec5SDimitry Andric       // subtrees across a cross edge. InstrCount will be attributed to the
13550b57cec5SDimitry Andric       // original parent, while SubInstrCount will be attributed to the joined
13560b57cec5SDimitry Andric       // parent.
13570b57cec5SDimitry Andric     }
13580b57cec5SDimitry Andric     R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
13590b57cec5SDimitry Andric     R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
13600b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
13610b57cec5SDimitry Andric     for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
13620b57cec5SDimitry Andric       R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
13630b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  SU(" << Idx << ") in tree "
13640b57cec5SDimitry Andric                         << R.DFSNodeData[Idx].SubtreeID << '\n');
13650b57cec5SDimitry Andric     }
1366bdd1243dSDimitry Andric     for (const auto &[Pred, Succ] : ConnectionPairs) {
1367bdd1243dSDimitry Andric       unsigned PredTree = SubtreeClasses[Pred->NodeNum];
1368bdd1243dSDimitry Andric       unsigned SuccTree = SubtreeClasses[Succ->NodeNum];
13690b57cec5SDimitry Andric       if (PredTree == SuccTree)
13700b57cec5SDimitry Andric         continue;
1371bdd1243dSDimitry Andric       unsigned Depth = Pred->getDepth();
13720b57cec5SDimitry Andric       addConnection(PredTree, SuccTree, Depth);
13730b57cec5SDimitry Andric       addConnection(SuccTree, PredTree, Depth);
13740b57cec5SDimitry Andric     }
13750b57cec5SDimitry Andric   }
13760b57cec5SDimitry Andric 
13770b57cec5SDimitry Andric protected:
13780b57cec5SDimitry Andric   /// Joins the predecessor subtree with the successor that is its DFS parent.
13790b57cec5SDimitry Andric   /// Applies some heuristics before joining.
13800b57cec5SDimitry Andric   bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
13810b57cec5SDimitry Andric                        bool CheckLimit = true) {
13820b57cec5SDimitry Andric     assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
13830b57cec5SDimitry Andric 
13840b57cec5SDimitry Andric     // Check if the predecessor is already joined.
13850b57cec5SDimitry Andric     const SUnit *PredSU = PredDep.getSUnit();
13860b57cec5SDimitry Andric     unsigned PredNum = PredSU->NodeNum;
13870b57cec5SDimitry Andric     if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
13880b57cec5SDimitry Andric       return false;
13890b57cec5SDimitry Andric 
13900b57cec5SDimitry Andric     // Four is the magic number of successors before a node is considered a
13910b57cec5SDimitry Andric     // pinch point.
13920b57cec5SDimitry Andric     unsigned NumDataSucs = 0;
13930b57cec5SDimitry Andric     for (const SDep &SuccDep : PredSU->Succs) {
13940b57cec5SDimitry Andric       if (SuccDep.getKind() == SDep::Data) {
13950b57cec5SDimitry Andric         if (++NumDataSucs >= 4)
13960b57cec5SDimitry Andric           return false;
13970b57cec5SDimitry Andric       }
13980b57cec5SDimitry Andric     }
13990b57cec5SDimitry Andric     if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
14000b57cec5SDimitry Andric       return false;
14010b57cec5SDimitry Andric     R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
14020b57cec5SDimitry Andric     SubtreeClasses.join(Succ->NodeNum, PredNum);
14030b57cec5SDimitry Andric     return true;
14040b57cec5SDimitry Andric   }
14050b57cec5SDimitry Andric 
14060b57cec5SDimitry Andric   /// Called by finalize() to record a connection between trees.
14070b57cec5SDimitry Andric   void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
14080b57cec5SDimitry Andric     if (!Depth)
14090b57cec5SDimitry Andric       return;
14100b57cec5SDimitry Andric 
14110b57cec5SDimitry Andric     do {
14120b57cec5SDimitry Andric       SmallVectorImpl<SchedDFSResult::Connection> &Connections =
14130b57cec5SDimitry Andric         R.SubtreeConnections[FromTree];
14140b57cec5SDimitry Andric       for (SchedDFSResult::Connection &C : Connections) {
14150b57cec5SDimitry Andric         if (C.TreeID == ToTree) {
14160b57cec5SDimitry Andric           C.Level = std::max(C.Level, Depth);
14170b57cec5SDimitry Andric           return;
14180b57cec5SDimitry Andric         }
14190b57cec5SDimitry Andric       }
14200b57cec5SDimitry Andric       Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
14210b57cec5SDimitry Andric       FromTree = R.DFSTreeData[FromTree].ParentTreeID;
14220b57cec5SDimitry Andric     } while (FromTree != SchedDFSResult::InvalidSubtreeID);
14230b57cec5SDimitry Andric   }
14240b57cec5SDimitry Andric };
14250b57cec5SDimitry Andric 
14260b57cec5SDimitry Andric } // end namespace llvm
14270b57cec5SDimitry Andric 
14280b57cec5SDimitry Andric namespace {
14290b57cec5SDimitry Andric 
14300b57cec5SDimitry Andric /// Manage the stack used by a reverse depth-first search over the DAG.
14310b57cec5SDimitry Andric class SchedDAGReverseDFS {
14320b57cec5SDimitry Andric   std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
14330b57cec5SDimitry Andric 
14340b57cec5SDimitry Andric public:
14350b57cec5SDimitry Andric   bool isComplete() const { return DFSStack.empty(); }
14360b57cec5SDimitry Andric 
14370b57cec5SDimitry Andric   void follow(const SUnit *SU) {
1438bdd1243dSDimitry Andric     DFSStack.emplace_back(SU, SU->Preds.begin());
14390b57cec5SDimitry Andric   }
14400b57cec5SDimitry Andric   void advance() { ++DFSStack.back().second; }
14410b57cec5SDimitry Andric 
14420b57cec5SDimitry Andric   const SDep *backtrack() {
14430b57cec5SDimitry Andric     DFSStack.pop_back();
14440b57cec5SDimitry Andric     return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
14450b57cec5SDimitry Andric   }
14460b57cec5SDimitry Andric 
14470b57cec5SDimitry Andric   const SUnit *getCurr() const { return DFSStack.back().first; }
14480b57cec5SDimitry Andric 
14490b57cec5SDimitry Andric   SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
14500b57cec5SDimitry Andric 
14510b57cec5SDimitry Andric   SUnit::const_pred_iterator getPredEnd() const {
14520b57cec5SDimitry Andric     return getCurr()->Preds.end();
14530b57cec5SDimitry Andric   }
14540b57cec5SDimitry Andric };
14550b57cec5SDimitry Andric 
14560b57cec5SDimitry Andric } // end anonymous namespace
14570b57cec5SDimitry Andric 
14580b57cec5SDimitry Andric static bool hasDataSucc(const SUnit *SU) {
14590b57cec5SDimitry Andric   for (const SDep &SuccDep : SU->Succs) {
14600b57cec5SDimitry Andric     if (SuccDep.getKind() == SDep::Data &&
14610b57cec5SDimitry Andric         !SuccDep.getSUnit()->isBoundaryNode())
14620b57cec5SDimitry Andric       return true;
14630b57cec5SDimitry Andric   }
14640b57cec5SDimitry Andric   return false;
14650b57cec5SDimitry Andric }
14660b57cec5SDimitry Andric 
14670b57cec5SDimitry Andric /// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
14680b57cec5SDimitry Andric /// search from this root.
14690b57cec5SDimitry Andric void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
14700b57cec5SDimitry Andric   if (!IsBottomUp)
14710b57cec5SDimitry Andric     llvm_unreachable("Top-down ILP metric is unimplemented");
14720b57cec5SDimitry Andric 
14730b57cec5SDimitry Andric   SchedDFSImpl Impl(*this);
14740b57cec5SDimitry Andric   for (const SUnit &SU : SUnits) {
14750b57cec5SDimitry Andric     if (Impl.isVisited(&SU) || hasDataSucc(&SU))
14760b57cec5SDimitry Andric       continue;
14770b57cec5SDimitry Andric 
14780b57cec5SDimitry Andric     SchedDAGReverseDFS DFS;
14790b57cec5SDimitry Andric     Impl.visitPreorder(&SU);
14800b57cec5SDimitry Andric     DFS.follow(&SU);
14810b57cec5SDimitry Andric     while (true) {
14820b57cec5SDimitry Andric       // Traverse the leftmost path as far as possible.
14830b57cec5SDimitry Andric       while (DFS.getPred() != DFS.getPredEnd()) {
14840b57cec5SDimitry Andric         const SDep &PredDep = *DFS.getPred();
14850b57cec5SDimitry Andric         DFS.advance();
14860b57cec5SDimitry Andric         // Ignore non-data edges.
14870b57cec5SDimitry Andric         if (PredDep.getKind() != SDep::Data
14880b57cec5SDimitry Andric             || PredDep.getSUnit()->isBoundaryNode()) {
14890b57cec5SDimitry Andric           continue;
14900b57cec5SDimitry Andric         }
14910b57cec5SDimitry Andric         // An already visited edge is a cross edge, assuming an acyclic DAG.
14920b57cec5SDimitry Andric         if (Impl.isVisited(PredDep.getSUnit())) {
14930b57cec5SDimitry Andric           Impl.visitCrossEdge(PredDep, DFS.getCurr());
14940b57cec5SDimitry Andric           continue;
14950b57cec5SDimitry Andric         }
14960b57cec5SDimitry Andric         Impl.visitPreorder(PredDep.getSUnit());
14970b57cec5SDimitry Andric         DFS.follow(PredDep.getSUnit());
14980b57cec5SDimitry Andric       }
14990b57cec5SDimitry Andric       // Visit the top of the stack in postorder and backtrack.
15000b57cec5SDimitry Andric       const SUnit *Child = DFS.getCurr();
15010b57cec5SDimitry Andric       const SDep *PredDep = DFS.backtrack();
15020b57cec5SDimitry Andric       Impl.visitPostorderNode(Child);
15030b57cec5SDimitry Andric       if (PredDep)
15040b57cec5SDimitry Andric         Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
15050b57cec5SDimitry Andric       if (DFS.isComplete())
15060b57cec5SDimitry Andric         break;
15070b57cec5SDimitry Andric     }
15080b57cec5SDimitry Andric   }
15090b57cec5SDimitry Andric   Impl.finalize();
15100b57cec5SDimitry Andric }
15110b57cec5SDimitry Andric 
15120b57cec5SDimitry Andric /// The root of the given SubtreeID was just scheduled. For all subtrees
15130b57cec5SDimitry Andric /// connected to this tree, record the depth of the connection so that the
15140b57cec5SDimitry Andric /// nearest connected subtrees can be prioritized.
15150b57cec5SDimitry Andric void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
15160b57cec5SDimitry Andric   for (const Connection &C : SubtreeConnections[SubtreeID]) {
15170b57cec5SDimitry Andric     SubtreeConnectLevels[C.TreeID] =
15180b57cec5SDimitry Andric       std::max(SubtreeConnectLevels[C.TreeID], C.Level);
15190b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Tree: " << C.TreeID << " @"
15200b57cec5SDimitry Andric                       << SubtreeConnectLevels[C.TreeID] << '\n');
15210b57cec5SDimitry Andric   }
15220b57cec5SDimitry Andric }
15230b57cec5SDimitry Andric 
15240b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
15250b57cec5SDimitry Andric LLVM_DUMP_METHOD void ILPValue::print(raw_ostream &OS) const {
15260b57cec5SDimitry Andric   OS << InstrCount << " / " << Length << " = ";
15270b57cec5SDimitry Andric   if (!Length)
15280b57cec5SDimitry Andric     OS << "BADILP";
15290b57cec5SDimitry Andric   else
15300b57cec5SDimitry Andric     OS << format("%g", ((double)InstrCount / Length));
15310b57cec5SDimitry Andric }
15320b57cec5SDimitry Andric 
15330b57cec5SDimitry Andric LLVM_DUMP_METHOD void ILPValue::dump() const {
15340b57cec5SDimitry Andric   dbgs() << *this << '\n';
15350b57cec5SDimitry Andric }
15360b57cec5SDimitry Andric 
15370b57cec5SDimitry Andric namespace llvm {
15380b57cec5SDimitry Andric 
153906c3fb27SDimitry Andric LLVM_ATTRIBUTE_UNUSED
15400b57cec5SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
15410b57cec5SDimitry Andric   Val.print(OS);
15420b57cec5SDimitry Andric   return OS;
15430b57cec5SDimitry Andric }
15440b57cec5SDimitry Andric 
15450b57cec5SDimitry Andric } // end namespace llvm
15460b57cec5SDimitry Andric 
15470b57cec5SDimitry Andric #endif
1548