10b57cec5SDimitry Andric //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file This implements the ScheduleDAGInstrs class, which implements 100b57cec5SDimitry Andric /// re-scheduling of MachineInstrs. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAGInstrs.h" 15bdd1243dSDimitry Andric 160b57cec5SDimitry Andric #include "llvm/ADT/IntEqClasses.h" 170b57cec5SDimitry Andric #include "llvm/ADT/MapVector.h" 180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 190b57cec5SDimitry Andric #include "llvm/ADT/SparseSet.h" 200b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h" 215ffd83dbSDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 230b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h" 240b57cec5SDimitry Andric #include "llvm/CodeGen/LivePhysRegs.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 260b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 270b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 280b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 290b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstrBundle.h" 300b57cec5SDimitry Andric #include "llvm/CodeGen/MachineMemOperand.h" 310b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 320b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 330b57cec5SDimitry Andric #include "llvm/CodeGen/PseudoSourceValue.h" 340b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterPressure.h" 350b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDAG.h" 360b57cec5SDimitry Andric #include "llvm/CodeGen/ScheduleDFS.h" 370b57cec5SDimitry Andric #include "llvm/CodeGen/SlotIndexes.h" 380b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 390b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 400b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h" 410b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 420b57cec5SDimitry Andric #include "llvm/IR/Function.h" 430b57cec5SDimitry Andric #include "llvm/IR/Type.h" 440b57cec5SDimitry Andric #include "llvm/IR/Value.h" 450b57cec5SDimitry Andric #include "llvm/MC/LaneBitmask.h" 460b57cec5SDimitry Andric #include "llvm/MC/MCRegisterInfo.h" 470b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 480b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 490b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 500b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 510b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 520b57cec5SDimitry Andric #include "llvm/Support/Format.h" 530b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 540b57cec5SDimitry Andric #include <algorithm> 550b57cec5SDimitry Andric #include <cassert> 560b57cec5SDimitry Andric #include <iterator> 570b57cec5SDimitry Andric #include <utility> 580b57cec5SDimitry Andric #include <vector> 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric using namespace llvm; 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric #define DEBUG_TYPE "machine-scheduler" 630b57cec5SDimitry Andric 6481ad6265SDimitry Andric static cl::opt<bool> 6581ad6265SDimitry Andric EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, 660b57cec5SDimitry Andric cl::desc("Enable use of AA during MI DAG construction")); 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, 690b57cec5SDimitry Andric cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric // Note: the two options below might be used in tuning compile time vs 720b57cec5SDimitry Andric // output quality. Setting HugeRegion so large that it will never be 730b57cec5SDimitry Andric // reached means best-effort, but may be slow. 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric // When Stores and Loads maps (or NonAliasStores and NonAliasLoads) 760b57cec5SDimitry Andric // together hold this many SUs, a reduction of maps will be done. 770b57cec5SDimitry Andric static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden, 780b57cec5SDimitry Andric cl::init(1000), cl::desc("The limit to use while constructing the DAG " 790b57cec5SDimitry Andric "prior to scheduling, at which point a trade-off " 800b57cec5SDimitry Andric "is made to avoid excessive compile time.")); 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric static cl::opt<unsigned> ReductionSize( 830b57cec5SDimitry Andric "dag-maps-reduction-size", cl::Hidden, 840b57cec5SDimitry Andric cl::desc("A huge scheduling region will have maps reduced by this many " 850b57cec5SDimitry Andric "nodes at a time. Defaults to HugeRegion / 2.")); 860b57cec5SDimitry Andric 87bdd1243dSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 88bdd1243dSDimitry Andric static cl::opt<bool> SchedPrintCycles( 89bdd1243dSDimitry Andric "sched-print-cycles", cl::Hidden, cl::init(false), 90bdd1243dSDimitry Andric cl::desc("Report top/bottom cycles when dumping SUnit instances")); 91bdd1243dSDimitry Andric #endif 92bdd1243dSDimitry Andric 930b57cec5SDimitry Andric static unsigned getReductionSize() { 940b57cec5SDimitry Andric // Always reduce a huge region with half of the elements, except 950b57cec5SDimitry Andric // when user sets this number explicitly. 960b57cec5SDimitry Andric if (ReductionSize.getNumOccurrences() == 0) 970b57cec5SDimitry Andric return HugeRegion / 2; 980b57cec5SDimitry Andric return ReductionSize; 990b57cec5SDimitry Andric } 1000b57cec5SDimitry Andric 101bdd1243dSDimitry Andric static void dumpSUList(const ScheduleDAGInstrs::SUList &L) { 1020b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1030b57cec5SDimitry Andric dbgs() << "{ "; 104bdd1243dSDimitry Andric for (const SUnit *SU : L) { 105bdd1243dSDimitry Andric dbgs() << "SU(" << SU->NodeNum << ")"; 106bdd1243dSDimitry Andric if (SU != L.back()) 1070b57cec5SDimitry Andric dbgs() << ", "; 1080b57cec5SDimitry Andric } 1090b57cec5SDimitry Andric dbgs() << "}\n"; 1100b57cec5SDimitry Andric #endif 1110b57cec5SDimitry Andric } 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 1140b57cec5SDimitry Andric const MachineLoopInfo *mli, 1150b57cec5SDimitry Andric bool RemoveKillFlags) 1160b57cec5SDimitry Andric : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), 1170b57cec5SDimitry Andric RemoveKillFlags(RemoveKillFlags), 1180b57cec5SDimitry Andric UnknownValue(UndefValue::get( 1190b57cec5SDimitry Andric Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) { 1200b57cec5SDimitry Andric DbgValues.clear(); 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric const TargetSubtargetInfo &ST = mf.getSubtarget(); 1230b57cec5SDimitry Andric SchedModel.init(&ST); 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric /// If this machine instr has memory reference information and it can be 1270b57cec5SDimitry Andric /// tracked to a normal reference to a known object, return the Value 1280b57cec5SDimitry Andric /// for that object. This function returns false the memory location is 1290b57cec5SDimitry Andric /// unknown or may alias anything. 1300b57cec5SDimitry Andric static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, 1310b57cec5SDimitry Andric const MachineFrameInfo &MFI, 1320b57cec5SDimitry Andric UnderlyingObjectsVector &Objects, 1330b57cec5SDimitry Andric const DataLayout &DL) { 134bdd1243dSDimitry Andric auto AllMMOsOkay = [&]() { 1350b57cec5SDimitry Andric for (const MachineMemOperand *MMO : MI->memoperands()) { 1360b57cec5SDimitry Andric // TODO: Figure out whether isAtomic is really necessary (see D57601). 1370b57cec5SDimitry Andric if (MMO->isVolatile() || MMO->isAtomic()) 1380b57cec5SDimitry Andric return false; 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) { 1410b57cec5SDimitry Andric // Function that contain tail calls don't have unique PseudoSourceValue 1420b57cec5SDimitry Andric // objects. Two PseudoSourceValues might refer to the same or 1430b57cec5SDimitry Andric // overlapping locations. The client code calling this function assumes 1440b57cec5SDimitry Andric // this is not the case. So return a conservative answer of no known 1450b57cec5SDimitry Andric // object. 1460b57cec5SDimitry Andric if (MFI.hasTailCall()) 1470b57cec5SDimitry Andric return false; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric // For now, ignore PseudoSourceValues which may alias LLVM IR values 1500b57cec5SDimitry Andric // because the code that uses this function has no way to cope with 1510b57cec5SDimitry Andric // such aliases. 1520b57cec5SDimitry Andric if (PSV->isAliased(&MFI)) 1530b57cec5SDimitry Andric return false; 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric bool MayAlias = PSV->mayAlias(&MFI); 156bdd1243dSDimitry Andric Objects.emplace_back(PSV, MayAlias); 1570b57cec5SDimitry Andric } else if (const Value *V = MMO->getValue()) { 1580b57cec5SDimitry Andric SmallVector<Value *, 4> Objs; 159e8d8bef9SDimitry Andric if (!getUnderlyingObjectsForCodeGen(V, Objs)) 1600b57cec5SDimitry Andric return false; 1610b57cec5SDimitry Andric 1620b57cec5SDimitry Andric for (Value *V : Objs) { 1630b57cec5SDimitry Andric assert(isIdentifiedObject(V)); 164bdd1243dSDimitry Andric Objects.emplace_back(V, true); 1650b57cec5SDimitry Andric } 1660b57cec5SDimitry Andric } else 1670b57cec5SDimitry Andric return false; 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric return true; 1700b57cec5SDimitry Andric }; 1710b57cec5SDimitry Andric 172bdd1243dSDimitry Andric if (!AllMMOsOkay()) { 1730b57cec5SDimitry Andric Objects.clear(); 1740b57cec5SDimitry Andric return false; 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric return true; 1780b57cec5SDimitry Andric } 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { 1810b57cec5SDimitry Andric BB = bb; 1820b57cec5SDimitry Andric } 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric void ScheduleDAGInstrs::finishBlock() { 1850b57cec5SDimitry Andric // Subclasses should no longer refer to the old block. 1860b57cec5SDimitry Andric BB = nullptr; 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, 1900b57cec5SDimitry Andric MachineBasicBlock::iterator begin, 1910b57cec5SDimitry Andric MachineBasicBlock::iterator end, 1920b57cec5SDimitry Andric unsigned regioninstrs) { 1930b57cec5SDimitry Andric assert(bb == BB && "startBlock should set BB"); 1940b57cec5SDimitry Andric RegionBegin = begin; 1950b57cec5SDimitry Andric RegionEnd = end; 1960b57cec5SDimitry Andric NumRegionInstrs = regioninstrs; 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric void ScheduleDAGInstrs::exitRegion() { 2000b57cec5SDimitry Andric // Nothing to do. 2010b57cec5SDimitry Andric } 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric void ScheduleDAGInstrs::addSchedBarrierDeps() { 204e8d8bef9SDimitry Andric MachineInstr *ExitMI = 205e8d8bef9SDimitry Andric RegionEnd != BB->end() 206e8d8bef9SDimitry Andric ? &*skipDebugInstructionsBackward(RegionEnd, RegionBegin) 207e8d8bef9SDimitry Andric : nullptr; 2080b57cec5SDimitry Andric ExitSU.setInstr(ExitMI); 2090b57cec5SDimitry Andric // Add dependencies on the defs and uses of the instruction. 2100b57cec5SDimitry Andric if (ExitMI) { 21106c3fb27SDimitry Andric for (const MachineOperand &MO : ExitMI->all_uses()) { 2128bcb0991SDimitry Andric Register Reg = MO.getReg(); 213bdd1243dSDimitry Andric if (Reg.isPhysical()) { 2145f757f3fSDimitry Andric for (MCRegUnit Unit : TRI->regunits(Reg)) 2155f757f3fSDimitry Andric Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit)); 216bdd1243dSDimitry Andric } else if (Reg.isVirtual() && MO.readsReg()) { 21706c3fb27SDimitry Andric addVRegUseDeps(&ExitSU, MO.getOperandNo()); 2180b57cec5SDimitry Andric } 2190b57cec5SDimitry Andric } 2200b57cec5SDimitry Andric } 2210b57cec5SDimitry Andric if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) { 2220b57cec5SDimitry Andric // For others, e.g. fallthrough, conditional branch, assume the exit 2230b57cec5SDimitry Andric // uses all the registers that are livein to the successor blocks. 2240b57cec5SDimitry Andric for (const MachineBasicBlock *Succ : BB->successors()) { 2250b57cec5SDimitry Andric for (const auto &LI : Succ->liveins()) { 2265f757f3fSDimitry Andric for (MCRegUnitMaskIterator U(LI.PhysReg, TRI); U.isValid(); ++U) { 2275f757f3fSDimitry Andric auto [Unit, Mask] = *U; 2285f757f3fSDimitry Andric if ((Mask & LI.LaneMask).any() && !Uses.contains(Unit)) 2295f757f3fSDimitry Andric Uses.insert(PhysRegSUOper(&ExitSU, -1, Unit)); 2305f757f3fSDimitry Andric } 2310b57cec5SDimitry Andric } 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric } 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric /// MO is an operand of SU's instruction that defines a physical register. Adds 2370b57cec5SDimitry Andric /// data dependencies from SU to any uses of the physical register. 2380b57cec5SDimitry Andric void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { 2390b57cec5SDimitry Andric const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); 2400b57cec5SDimitry Andric assert(MO.isDef() && "expect physreg def"); 2415f757f3fSDimitry Andric Register Reg = MO.getReg(); 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric // Ask the target if address-backscheduling is desirable, and if so how much. 2440b57cec5SDimitry Andric const TargetSubtargetInfo &ST = MF.getSubtarget(); 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric // Only use any non-zero latency for real defs/uses, in contrast to 2470b57cec5SDimitry Andric // "fake" operands added by regalloc. 2485f757f3fSDimitry Andric const MCInstrDesc &DefMIDesc = SU->getInstr()->getDesc(); 2495f757f3fSDimitry Andric bool ImplicitPseudoDef = (OperIdx >= DefMIDesc.getNumOperands() && 2505f757f3fSDimitry Andric !DefMIDesc.hasImplicitDefOfPhysReg(Reg)); 2515f757f3fSDimitry Andric for (MCRegUnit Unit : TRI->regunits(Reg)) { 2525f757f3fSDimitry Andric for (RegUnit2SUnitsMap::iterator I = Uses.find(Unit); I != Uses.end(); 2535f757f3fSDimitry Andric ++I) { 2540b57cec5SDimitry Andric SUnit *UseSU = I->SU; 2550b57cec5SDimitry Andric if (UseSU == SU) 2560b57cec5SDimitry Andric continue; 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric // Adjust the dependence latency using operand def/use information, 2590b57cec5SDimitry Andric // then allow the target to perform its own adjustments. 2605f757f3fSDimitry Andric MachineInstr *UseInstr = nullptr; 2615f757f3fSDimitry Andric int UseOpIdx = I->OpIdx; 2625f757f3fSDimitry Andric bool ImplicitPseudoUse = false; 2630b57cec5SDimitry Andric SDep Dep; 2645f757f3fSDimitry Andric if (UseOpIdx < 0) { 2650b57cec5SDimitry Andric Dep = SDep(SU, SDep::Artificial); 2665f757f3fSDimitry Andric } else { 2670b57cec5SDimitry Andric // Set the hasPhysRegDefs only for physreg defs that have a use within 2680b57cec5SDimitry Andric // the scheduling region. 2690b57cec5SDimitry Andric SU->hasPhysRegDefs = true; 2705f757f3fSDimitry Andric 2715f757f3fSDimitry Andric UseInstr = UseSU->getInstr(); 2725f757f3fSDimitry Andric Register UseReg = UseInstr->getOperand(UseOpIdx).getReg(); 2735f757f3fSDimitry Andric const MCInstrDesc &UseMIDesc = UseInstr->getDesc(); 2745f757f3fSDimitry Andric ImplicitPseudoUse = UseOpIdx >= ((int)UseMIDesc.getNumOperands()) && 2755f757f3fSDimitry Andric !UseMIDesc.hasImplicitUseOfPhysReg(UseReg); 2765f757f3fSDimitry Andric 2775f757f3fSDimitry Andric Dep = SDep(SU, SDep::Data, UseReg); 2780b57cec5SDimitry Andric } 2790b57cec5SDimitry Andric if (!ImplicitPseudoDef && !ImplicitPseudoUse) { 2800b57cec5SDimitry Andric Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, 2815f757f3fSDimitry Andric UseInstr, UseOpIdx)); 282480093f4SDimitry Andric } else { 2830b57cec5SDimitry Andric Dep.setLatency(0); 284480093f4SDimitry Andric } 285*0fca6ea1SDimitry Andric ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOpIdx, Dep, &SchedModel); 2860b57cec5SDimitry Andric UseSU->addPred(Dep); 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric } 2890b57cec5SDimitry Andric } 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric /// Adds register dependencies (data, anti, and output) from this SUnit 2920b57cec5SDimitry Andric /// to following instructions in the same scheduling region that depend the 2930b57cec5SDimitry Andric /// physical register referenced at OperIdx. 2940b57cec5SDimitry Andric void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 2950b57cec5SDimitry Andric MachineInstr *MI = SU->getInstr(); 2960b57cec5SDimitry Andric MachineOperand &MO = MI->getOperand(OperIdx); 2978bcb0991SDimitry Andric Register Reg = MO.getReg(); 2980b57cec5SDimitry Andric // We do not need to track any dependencies for constant registers. 2990b57cec5SDimitry Andric if (MRI.isConstantPhysReg(Reg)) 3000b57cec5SDimitry Andric return; 3010b57cec5SDimitry Andric 3025ffd83dbSDimitry Andric const TargetSubtargetInfo &ST = MF.getSubtarget(); 3035ffd83dbSDimitry Andric 3040b57cec5SDimitry Andric // Optionally add output and anti dependencies. For anti 3050b57cec5SDimitry Andric // dependencies we use a latency of 0 because for a multi-issue 3060b57cec5SDimitry Andric // target we want to allow the defining instruction to issue 3070b57cec5SDimitry Andric // in the same cycle as the using instruction. 3080b57cec5SDimitry Andric // TODO: Using a latency of 1 here for output dependencies assumes 3090b57cec5SDimitry Andric // there's no cost for reusing registers. 3100b57cec5SDimitry Andric SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 3115f757f3fSDimitry Andric for (MCRegUnit Unit : TRI->regunits(Reg)) { 3125f757f3fSDimitry Andric for (RegUnit2SUnitsMap::iterator I = Defs.find(Unit); I != Defs.end(); 3135f757f3fSDimitry Andric ++I) { 3140b57cec5SDimitry Andric SUnit *DefSU = I->SU; 3150b57cec5SDimitry Andric if (DefSU == &ExitSU) 3160b57cec5SDimitry Andric continue; 3175f757f3fSDimitry Andric MachineInstr *DefInstr = DefSU->getInstr(); 3185f757f3fSDimitry Andric MachineOperand &DefMO = DefInstr->getOperand(I->OpIdx); 3190b57cec5SDimitry Andric if (DefSU != SU && 3205f757f3fSDimitry Andric (Kind != SDep::Output || !MO.isDead() || !DefMO.isDead())) { 3215f757f3fSDimitry Andric SDep Dep(SU, Kind, DefMO.getReg()); 3225f757f3fSDimitry Andric if (Kind != SDep::Anti) { 3230b57cec5SDimitry Andric Dep.setLatency( 3245f757f3fSDimitry Andric SchedModel.computeOutputLatency(MI, OperIdx, DefInstr)); 3255f757f3fSDimitry Andric } 326*0fca6ea1SDimitry Andric ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep, 327*0fca6ea1SDimitry Andric &SchedModel); 3280b57cec5SDimitry Andric DefSU->addPred(Dep); 3290b57cec5SDimitry Andric } 3300b57cec5SDimitry Andric } 3310b57cec5SDimitry Andric } 3320b57cec5SDimitry Andric 3335f757f3fSDimitry Andric if (MO.isUse()) { 3340b57cec5SDimitry Andric SU->hasPhysRegUses = true; 3350b57cec5SDimitry Andric // Either insert a new Reg2SUnits entry with an empty SUnits list, or 3360b57cec5SDimitry Andric // retrieve the existing SUnits list for this register's uses. 3370b57cec5SDimitry Andric // Push this SUnit on the use list. 3385f757f3fSDimitry Andric for (MCRegUnit Unit : TRI->regunits(Reg)) 3395f757f3fSDimitry Andric Uses.insert(PhysRegSUOper(SU, OperIdx, Unit)); 3400b57cec5SDimitry Andric if (RemoveKillFlags) 3410b57cec5SDimitry Andric MO.setIsKill(false); 3420b57cec5SDimitry Andric } else { 3430b57cec5SDimitry Andric addPhysRegDataDeps(SU, OperIdx); 3440b57cec5SDimitry Andric 3455f757f3fSDimitry Andric // Clear previous uses and defs of this register and its subregisters. 3465f757f3fSDimitry Andric for (MCRegUnit Unit : TRI->regunits(Reg)) { 3475f757f3fSDimitry Andric Uses.eraseAll(Unit); 3480b57cec5SDimitry Andric if (!MO.isDead()) 3495f757f3fSDimitry Andric Defs.eraseAll(Unit); 3500b57cec5SDimitry Andric } 3515f757f3fSDimitry Andric 3520b57cec5SDimitry Andric if (MO.isDead() && SU->isCall) { 3530b57cec5SDimitry Andric // Calls will not be reordered because of chain dependencies (see 3540b57cec5SDimitry Andric // below). Since call operands are dead, calls may continue to be added 3550b57cec5SDimitry Andric // to the DefList making dependence checking quadratic in the size of 3560b57cec5SDimitry Andric // the block. Instead, we leave only one call at the back of the 3570b57cec5SDimitry Andric // DefList. 3585f757f3fSDimitry Andric for (MCRegUnit Unit : TRI->regunits(Reg)) { 3595f757f3fSDimitry Andric RegUnit2SUnitsMap::RangePair P = Defs.equal_range(Unit); 3605f757f3fSDimitry Andric RegUnit2SUnitsMap::iterator B = P.first; 3615f757f3fSDimitry Andric RegUnit2SUnitsMap::iterator I = P.second; 3620b57cec5SDimitry Andric for (bool isBegin = I == B; !isBegin; /* empty */) { 3630b57cec5SDimitry Andric isBegin = (--I) == B; 3640b57cec5SDimitry Andric if (!I->SU->isCall) 3650b57cec5SDimitry Andric break; 3660b57cec5SDimitry Andric I = Defs.erase(I); 3670b57cec5SDimitry Andric } 3680b57cec5SDimitry Andric } 3695f757f3fSDimitry Andric } 3700b57cec5SDimitry Andric 3710b57cec5SDimitry Andric // Defs are pushed in the order they are visited and never reordered. 3725f757f3fSDimitry Andric for (MCRegUnit Unit : TRI->regunits(Reg)) 3735f757f3fSDimitry Andric Defs.insert(PhysRegSUOper(SU, OperIdx, Unit)); 3740b57cec5SDimitry Andric } 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const 3780b57cec5SDimitry Andric { 3798bcb0991SDimitry Andric Register Reg = MO.getReg(); 3800b57cec5SDimitry Andric // No point in tracking lanemasks if we don't have interesting subregisters. 3810b57cec5SDimitry Andric const TargetRegisterClass &RC = *MRI.getRegClass(Reg); 3820b57cec5SDimitry Andric if (!RC.HasDisjunctSubRegs) 3830b57cec5SDimitry Andric return LaneBitmask::getAll(); 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric unsigned SubReg = MO.getSubReg(); 3860b57cec5SDimitry Andric if (SubReg == 0) 3870b57cec5SDimitry Andric return RC.getLaneMask(); 3880b57cec5SDimitry Andric return TRI->getSubRegIndexLaneMask(SubReg); 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric 3918bcb0991SDimitry Andric bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) { 3928bcb0991SDimitry Andric auto RegUse = CurrentVRegUses.find(MO.getReg()); 3938bcb0991SDimitry Andric if (RegUse == CurrentVRegUses.end()) 3948bcb0991SDimitry Andric return true; 3958bcb0991SDimitry Andric return (RegUse->LaneMask & getLaneMaskForMO(MO)).none(); 3968bcb0991SDimitry Andric } 3978bcb0991SDimitry Andric 3980b57cec5SDimitry Andric /// Adds register output and data dependencies from this SUnit to instructions 3990b57cec5SDimitry Andric /// that occur later in the same scheduling region if they read from or write to 4000b57cec5SDimitry Andric /// the virtual register defined at OperIdx. 4010b57cec5SDimitry Andric /// 4020b57cec5SDimitry Andric /// TODO: Hoist loop induction variable increments. This has to be 4030b57cec5SDimitry Andric /// reevaluated. Generally, IV scheduling should be done before coalescing. 4040b57cec5SDimitry Andric void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 4050b57cec5SDimitry Andric MachineInstr *MI = SU->getInstr(); 4060b57cec5SDimitry Andric MachineOperand &MO = MI->getOperand(OperIdx); 4078bcb0991SDimitry Andric Register Reg = MO.getReg(); 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric LaneBitmask DefLaneMask; 4100b57cec5SDimitry Andric LaneBitmask KillLaneMask; 4110b57cec5SDimitry Andric if (TrackLaneMasks) { 4120b57cec5SDimitry Andric bool IsKill = MO.getSubReg() == 0 || MO.isUndef(); 4130b57cec5SDimitry Andric DefLaneMask = getLaneMaskForMO(MO); 4140b57cec5SDimitry Andric // If we have a <read-undef> flag, none of the lane values comes from an 4150b57cec5SDimitry Andric // earlier instruction. 4160b57cec5SDimitry Andric KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask; 4170b57cec5SDimitry Andric 4188bcb0991SDimitry Andric if (MO.getSubReg() != 0 && MO.isUndef()) { 4198bcb0991SDimitry Andric // There may be other subregister defs on the same instruction of the same 4208bcb0991SDimitry Andric // register in later operands. The lanes of other defs will now be live 4218bcb0991SDimitry Andric // after this instruction, so these should not be treated as killed by the 4228bcb0991SDimitry Andric // instruction even though they appear to be killed in this one operand. 4234824e7fdSDimitry Andric for (const MachineOperand &OtherMO : 4244824e7fdSDimitry Andric llvm::drop_begin(MI->operands(), OperIdx + 1)) 4258bcb0991SDimitry Andric if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg) 4268bcb0991SDimitry Andric KillLaneMask &= ~getLaneMaskForMO(OtherMO); 4278bcb0991SDimitry Andric } 4288bcb0991SDimitry Andric 4290b57cec5SDimitry Andric // Clear undef flag, we'll re-add it later once we know which subregister 4300b57cec5SDimitry Andric // Def is first. 4310b57cec5SDimitry Andric MO.setIsUndef(false); 4320b57cec5SDimitry Andric } else { 4330b57cec5SDimitry Andric DefLaneMask = LaneBitmask::getAll(); 4340b57cec5SDimitry Andric KillLaneMask = LaneBitmask::getAll(); 4350b57cec5SDimitry Andric } 4360b57cec5SDimitry Andric 4370b57cec5SDimitry Andric if (MO.isDead()) { 4388bcb0991SDimitry Andric assert(deadDefHasNoUse(MO) && "Dead defs should have no uses"); 4390b57cec5SDimitry Andric } else { 4400b57cec5SDimitry Andric // Add data dependence to all uses we found so far. 4410b57cec5SDimitry Andric const TargetSubtargetInfo &ST = MF.getSubtarget(); 4420b57cec5SDimitry Andric for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg), 4430b57cec5SDimitry Andric E = CurrentVRegUses.end(); I != E; /*empty*/) { 4440b57cec5SDimitry Andric LaneBitmask LaneMask = I->LaneMask; 4450b57cec5SDimitry Andric // Ignore uses of other lanes. 4460b57cec5SDimitry Andric if ((LaneMask & KillLaneMask).none()) { 4470b57cec5SDimitry Andric ++I; 4480b57cec5SDimitry Andric continue; 4490b57cec5SDimitry Andric } 4500b57cec5SDimitry Andric 4510b57cec5SDimitry Andric if ((LaneMask & DefLaneMask).any()) { 4520b57cec5SDimitry Andric SUnit *UseSU = I->SU; 4530b57cec5SDimitry Andric MachineInstr *Use = UseSU->getInstr(); 4540b57cec5SDimitry Andric SDep Dep(SU, SDep::Data, Reg); 4550b57cec5SDimitry Andric Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, 4560b57cec5SDimitry Andric I->OperandIndex)); 457*0fca6ea1SDimitry Andric ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep, 458*0fca6ea1SDimitry Andric &SchedModel); 4590b57cec5SDimitry Andric UseSU->addPred(Dep); 4600b57cec5SDimitry Andric } 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric LaneMask &= ~KillLaneMask; 4630b57cec5SDimitry Andric // If we found a Def for all lanes of this use, remove it from the list. 4640b57cec5SDimitry Andric if (LaneMask.any()) { 4650b57cec5SDimitry Andric I->LaneMask = LaneMask; 4660b57cec5SDimitry Andric ++I; 4670b57cec5SDimitry Andric } else 4680b57cec5SDimitry Andric I = CurrentVRegUses.erase(I); 4690b57cec5SDimitry Andric } 4700b57cec5SDimitry Andric } 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric // Shortcut: Singly defined vregs do not have output/anti dependencies. 4730b57cec5SDimitry Andric if (MRI.hasOneDef(Reg)) 4740b57cec5SDimitry Andric return; 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric // Add output dependence to the next nearest defs of this vreg. 4770b57cec5SDimitry Andric // 4780b57cec5SDimitry Andric // Unless this definition is dead, the output dependence should be 4790b57cec5SDimitry Andric // transitively redundant with antidependencies from this definition's 4800b57cec5SDimitry Andric // uses. We're conservative for now until we have a way to guarantee the uses 4810b57cec5SDimitry Andric // are not eliminated sometime during scheduling. The output dependence edge 4820b57cec5SDimitry Andric // is also useful if output latency exceeds def-use latency. 4830b57cec5SDimitry Andric LaneBitmask LaneMask = DefLaneMask; 4840b57cec5SDimitry Andric for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), 4850b57cec5SDimitry Andric CurrentVRegDefs.end())) { 4860b57cec5SDimitry Andric // Ignore defs for other lanes. 4870b57cec5SDimitry Andric if ((V2SU.LaneMask & LaneMask).none()) 4880b57cec5SDimitry Andric continue; 4890b57cec5SDimitry Andric // Add an output dependence. 4900b57cec5SDimitry Andric SUnit *DefSU = V2SU.SU; 4910b57cec5SDimitry Andric // Ignore additional defs of the same lanes in one instruction. This can 4920b57cec5SDimitry Andric // happen because lanemasks are shared for targets with too many 4930b57cec5SDimitry Andric // subregisters. We also use some representration tricks/hacks where we 4940b57cec5SDimitry Andric // add super-register defs/uses, to imply that although we only access parts 4950b57cec5SDimitry Andric // of the reg we care about the full one. 4960b57cec5SDimitry Andric if (DefSU == SU) 4970b57cec5SDimitry Andric continue; 4980b57cec5SDimitry Andric SDep Dep(SU, SDep::Output, Reg); 4990b57cec5SDimitry Andric Dep.setLatency( 5000b57cec5SDimitry Andric SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); 5010b57cec5SDimitry Andric DefSU->addPred(Dep); 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric // Update current definition. This can get tricky if the def was about a 5040b57cec5SDimitry Andric // bigger lanemask before. We then have to shrink it and create a new 5050b57cec5SDimitry Andric // VReg2SUnit for the non-overlapping part. 5060b57cec5SDimitry Andric LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask; 5070b57cec5SDimitry Andric LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask; 5080b57cec5SDimitry Andric V2SU.SU = SU; 5090b57cec5SDimitry Andric V2SU.LaneMask = OverlapMask; 5100b57cec5SDimitry Andric if (NonOverlapMask.any()) 5110b57cec5SDimitry Andric CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU)); 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric // If there was no CurrentVRegDefs entry for some lanes yet, create one. 5140b57cec5SDimitry Andric if (LaneMask.any()) 5150b57cec5SDimitry Andric CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU)); 5160b57cec5SDimitry Andric } 5170b57cec5SDimitry Andric 5180b57cec5SDimitry Andric /// Adds a register data dependency if the instruction that defines the 5190b57cec5SDimitry Andric /// virtual register used at OperIdx is mapped to an SUnit. Add a register 5200b57cec5SDimitry Andric /// antidependency from this SUnit to instructions that occur later in the same 5210b57cec5SDimitry Andric /// scheduling region if they write the virtual register. 5220b57cec5SDimitry Andric /// 5230b57cec5SDimitry Andric /// TODO: Handle ExitSU "uses" properly. 5240b57cec5SDimitry Andric void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 5250b57cec5SDimitry Andric const MachineInstr *MI = SU->getInstr(); 526fe6060f1SDimitry Andric assert(!MI->isDebugOrPseudoInstr()); 527e8d8bef9SDimitry Andric 5280b57cec5SDimitry Andric const MachineOperand &MO = MI->getOperand(OperIdx); 5298bcb0991SDimitry Andric Register Reg = MO.getReg(); 5300b57cec5SDimitry Andric 5310b57cec5SDimitry Andric // Remember the use. Data dependencies will be added when we find the def. 5320b57cec5SDimitry Andric LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) 5330b57cec5SDimitry Andric : LaneBitmask::getAll(); 5340b57cec5SDimitry Andric CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU)); 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric // Add antidependences to the following defs of the vreg. 5370b57cec5SDimitry Andric for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), 5380b57cec5SDimitry Andric CurrentVRegDefs.end())) { 5390b57cec5SDimitry Andric // Ignore defs for unrelated lanes. 5400b57cec5SDimitry Andric LaneBitmask PrevDefLaneMask = V2SU.LaneMask; 5410b57cec5SDimitry Andric if ((PrevDefLaneMask & LaneMask).none()) 5420b57cec5SDimitry Andric continue; 5430b57cec5SDimitry Andric if (V2SU.SU == SU) 5440b57cec5SDimitry Andric continue; 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg)); 5470b57cec5SDimitry Andric } 5480b57cec5SDimitry Andric } 5490b57cec5SDimitry Andric 5500b57cec5SDimitry Andric /// Returns true if MI is an instruction we are unable to reason about 5510b57cec5SDimitry Andric /// (like a call or something with unmodeled side effects). 552fcaf7f86SDimitry Andric static inline bool isGlobalMemoryObject(MachineInstr *MI) { 5530b57cec5SDimitry Andric return MI->isCall() || MI->hasUnmodeledSideEffects() || 554fcaf7f86SDimitry Andric (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad()); 5550b57cec5SDimitry Andric } 5560b57cec5SDimitry Andric 5570b57cec5SDimitry Andric void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb, 5580b57cec5SDimitry Andric unsigned Latency) { 5590b57cec5SDimitry Andric if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) { 5600b57cec5SDimitry Andric SDep Dep(SUa, SDep::MayAliasMem); 5610b57cec5SDimitry Andric Dep.setLatency(Latency); 5620b57cec5SDimitry Andric SUb->addPred(Dep); 5630b57cec5SDimitry Andric } 5640b57cec5SDimitry Andric } 5650b57cec5SDimitry Andric 5660b57cec5SDimitry Andric /// Creates an SUnit for each real instruction, numbered in top-down 5670b57cec5SDimitry Andric /// topological order. The instruction order A < B, implies that no edge exists 5680b57cec5SDimitry Andric /// from B to A. 5690b57cec5SDimitry Andric /// 5700b57cec5SDimitry Andric /// Map each real instruction to its SUnit. 5710b57cec5SDimitry Andric /// 5720b57cec5SDimitry Andric /// After initSUnits, the SUnits vector cannot be resized and the scheduler may 5730b57cec5SDimitry Andric /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs 5740b57cec5SDimitry Andric /// instead of pointers. 5750b57cec5SDimitry Andric /// 5760b57cec5SDimitry Andric /// MachineScheduler relies on initSUnits numbering the nodes by their order in 5770b57cec5SDimitry Andric /// the original instruction list. 5780b57cec5SDimitry Andric void ScheduleDAGInstrs::initSUnits() { 5790b57cec5SDimitry Andric // We'll be allocating one SUnit for each real instruction in the region, 5800b57cec5SDimitry Andric // which is contained within a basic block. 5810b57cec5SDimitry Andric SUnits.reserve(NumRegionInstrs); 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) { 584fe6060f1SDimitry Andric if (MI.isDebugOrPseudoInstr()) 5850b57cec5SDimitry Andric continue; 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric SUnit *SU = newSUnit(&MI); 5880b57cec5SDimitry Andric MISUnitMap[&MI] = SU; 5890b57cec5SDimitry Andric 5900b57cec5SDimitry Andric SU->isCall = MI.isCall(); 5910b57cec5SDimitry Andric SU->isCommutable = MI.isCommutable(); 5920b57cec5SDimitry Andric 5930b57cec5SDimitry Andric // Assign the Latency field of SU using target-provided information. 5940b57cec5SDimitry Andric SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric // If this SUnit uses a reserved or unbuffered resource, mark it as such. 5970b57cec5SDimitry Andric // 5980b57cec5SDimitry Andric // Reserved resources block an instruction from issuing and stall the 5990b57cec5SDimitry Andric // entire pipeline. These are identified by BufferSize=0. 6000b57cec5SDimitry Andric // 6010b57cec5SDimitry Andric // Unbuffered resources prevent execution of subsequent instructions that 6020b57cec5SDimitry Andric // require the same resources. This is used for in-order execution pipelines 6030b57cec5SDimitry Andric // within an out-of-order core. These are identified by BufferSize=1. 6040b57cec5SDimitry Andric if (SchedModel.hasInstrSchedModel()) { 6050b57cec5SDimitry Andric const MCSchedClassDesc *SC = getSchedClass(SU); 6060b57cec5SDimitry Andric for (const MCWriteProcResEntry &PRE : 6070b57cec5SDimitry Andric make_range(SchedModel.getWriteProcResBegin(SC), 6080b57cec5SDimitry Andric SchedModel.getWriteProcResEnd(SC))) { 6090b57cec5SDimitry Andric switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) { 6100b57cec5SDimitry Andric case 0: 6110b57cec5SDimitry Andric SU->hasReservedResource = true; 6120b57cec5SDimitry Andric break; 6130b57cec5SDimitry Andric case 1: 6140b57cec5SDimitry Andric SU->isUnbuffered = true; 6150b57cec5SDimitry Andric break; 6160b57cec5SDimitry Andric default: 6170b57cec5SDimitry Andric break; 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric } 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> { 6250b57cec5SDimitry Andric /// Current total number of SUs in map. 6260b57cec5SDimitry Andric unsigned NumNodes = 0; 6270b57cec5SDimitry Andric 6280b57cec5SDimitry Andric /// 1 for loads, 0 for stores. (see comment in SUList) 6290b57cec5SDimitry Andric unsigned TrueMemOrderLatency; 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andric public: 6320b57cec5SDimitry Andric Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {} 6330b57cec5SDimitry Andric 6340b57cec5SDimitry Andric /// To keep NumNodes up to date, insert() is used instead of 6350b57cec5SDimitry Andric /// this operator w/ push_back(). 6360b57cec5SDimitry Andric ValueType &operator[](const SUList &Key) { 6370b57cec5SDimitry Andric llvm_unreachable("Don't use. Use insert() instead."); }; 6380b57cec5SDimitry Andric 6390b57cec5SDimitry Andric /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling 6400b57cec5SDimitry Andric /// reduce(). 6410b57cec5SDimitry Andric void inline insert(SUnit *SU, ValueType V) { 6420b57cec5SDimitry Andric MapVector::operator[](V).push_back(SU); 6430b57cec5SDimitry Andric NumNodes++; 6440b57cec5SDimitry Andric } 6450b57cec5SDimitry Andric 6460b57cec5SDimitry Andric /// Clears the list of SUs mapped to V. 6470b57cec5SDimitry Andric void inline clearList(ValueType V) { 6480b57cec5SDimitry Andric iterator Itr = find(V); 6490b57cec5SDimitry Andric if (Itr != end()) { 6500b57cec5SDimitry Andric assert(NumNodes >= Itr->second.size()); 6510b57cec5SDimitry Andric NumNodes -= Itr->second.size(); 6520b57cec5SDimitry Andric 6530b57cec5SDimitry Andric Itr->second.clear(); 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric } 6560b57cec5SDimitry Andric 6570b57cec5SDimitry Andric /// Clears map from all contents. 6580b57cec5SDimitry Andric void clear() { 6590b57cec5SDimitry Andric MapVector<ValueType, SUList>::clear(); 6600b57cec5SDimitry Andric NumNodes = 0; 6610b57cec5SDimitry Andric } 6620b57cec5SDimitry Andric 6630b57cec5SDimitry Andric unsigned inline size() const { return NumNodes; } 6640b57cec5SDimitry Andric 6650b57cec5SDimitry Andric /// Counts the number of SUs in this map after a reduction. 6660b57cec5SDimitry Andric void reComputeSize() { 6670b57cec5SDimitry Andric NumNodes = 0; 6680b57cec5SDimitry Andric for (auto &I : *this) 6690b57cec5SDimitry Andric NumNodes += I.second.size(); 6700b57cec5SDimitry Andric } 6710b57cec5SDimitry Andric 6720b57cec5SDimitry Andric unsigned inline getTrueMemOrderLatency() const { 6730b57cec5SDimitry Andric return TrueMemOrderLatency; 6740b57cec5SDimitry Andric } 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric void dump(); 6770b57cec5SDimitry Andric }; 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, 6800b57cec5SDimitry Andric Value2SUsMap &Val2SUsMap) { 6810b57cec5SDimitry Andric for (auto &I : Val2SUsMap) 6820b57cec5SDimitry Andric addChainDependencies(SU, I.second, 6830b57cec5SDimitry Andric Val2SUsMap.getTrueMemOrderLatency()); 6840b57cec5SDimitry Andric } 6850b57cec5SDimitry Andric 6860b57cec5SDimitry Andric void ScheduleDAGInstrs::addChainDependencies(SUnit *SU, 6870b57cec5SDimitry Andric Value2SUsMap &Val2SUsMap, 6880b57cec5SDimitry Andric ValueType V) { 6890b57cec5SDimitry Andric Value2SUsMap::iterator Itr = Val2SUsMap.find(V); 6900b57cec5SDimitry Andric if (Itr != Val2SUsMap.end()) 6910b57cec5SDimitry Andric addChainDependencies(SU, Itr->second, 6920b57cec5SDimitry Andric Val2SUsMap.getTrueMemOrderLatency()); 6930b57cec5SDimitry Andric } 6940b57cec5SDimitry Andric 6950b57cec5SDimitry Andric void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) { 6960b57cec5SDimitry Andric assert(BarrierChain != nullptr); 6970b57cec5SDimitry Andric 698bdd1243dSDimitry Andric for (auto &[V, SUs] : map) { 699bdd1243dSDimitry Andric (void)V; 700bdd1243dSDimitry Andric for (auto *SU : SUs) 7010b57cec5SDimitry Andric SU->addPredBarrier(BarrierChain); 7020b57cec5SDimitry Andric } 7030b57cec5SDimitry Andric map.clear(); 7040b57cec5SDimitry Andric } 7050b57cec5SDimitry Andric 7060b57cec5SDimitry Andric void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) { 7070b57cec5SDimitry Andric assert(BarrierChain != nullptr); 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric // Go through all lists of SUs. 7100b57cec5SDimitry Andric for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) { 7110b57cec5SDimitry Andric Value2SUsMap::iterator CurrItr = I++; 7120b57cec5SDimitry Andric SUList &sus = CurrItr->second; 7130b57cec5SDimitry Andric SUList::iterator SUItr = sus.begin(), SUEE = sus.end(); 7140b57cec5SDimitry Andric for (; SUItr != SUEE; ++SUItr) { 7150b57cec5SDimitry Andric // Stop on BarrierChain or any instruction above it. 7160b57cec5SDimitry Andric if ((*SUItr)->NodeNum <= BarrierChain->NodeNum) 7170b57cec5SDimitry Andric break; 7180b57cec5SDimitry Andric 7190b57cec5SDimitry Andric (*SUItr)->addPredBarrier(BarrierChain); 7200b57cec5SDimitry Andric } 7210b57cec5SDimitry Andric 7220b57cec5SDimitry Andric // Remove also the BarrierChain from list if present. 7230b57cec5SDimitry Andric if (SUItr != SUEE && *SUItr == BarrierChain) 7240b57cec5SDimitry Andric SUItr++; 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andric // Remove all SUs that are now successors of BarrierChain. 7270b57cec5SDimitry Andric if (SUItr != sus.begin()) 7280b57cec5SDimitry Andric sus.erase(sus.begin(), SUItr); 7290b57cec5SDimitry Andric } 7300b57cec5SDimitry Andric 7310b57cec5SDimitry Andric // Remove all entries with empty su lists. 7320b57cec5SDimitry Andric map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) { 7330b57cec5SDimitry Andric return (mapEntry.second.empty()); }); 7340b57cec5SDimitry Andric 7350b57cec5SDimitry Andric // Recompute the size of the map (NumNodes). 7360b57cec5SDimitry Andric map.reComputeSize(); 7370b57cec5SDimitry Andric } 7380b57cec5SDimitry Andric 7398bcb0991SDimitry Andric void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA, 7400b57cec5SDimitry Andric RegPressureTracker *RPTracker, 7410b57cec5SDimitry Andric PressureDiffs *PDiffs, 7420b57cec5SDimitry Andric LiveIntervals *LIS, 7430b57cec5SDimitry Andric bool TrackLaneMasks) { 7440b57cec5SDimitry Andric const TargetSubtargetInfo &ST = MF.getSubtarget(); 7450b57cec5SDimitry Andric bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI 7460b57cec5SDimitry Andric : ST.useAA(); 7470b57cec5SDimitry Andric AAForDep = UseAA ? AA : nullptr; 7480b57cec5SDimitry Andric 7490b57cec5SDimitry Andric BarrierChain = nullptr; 7500b57cec5SDimitry Andric 7510b57cec5SDimitry Andric this->TrackLaneMasks = TrackLaneMasks; 7520b57cec5SDimitry Andric MISUnitMap.clear(); 7530b57cec5SDimitry Andric ScheduleDAG::clearDAG(); 7540b57cec5SDimitry Andric 7550b57cec5SDimitry Andric // Create an SUnit for each real instruction. 7560b57cec5SDimitry Andric initSUnits(); 7570b57cec5SDimitry Andric 7580b57cec5SDimitry Andric if (PDiffs) 7590b57cec5SDimitry Andric PDiffs->init(SUnits.size()); 7600b57cec5SDimitry Andric 7610b57cec5SDimitry Andric // We build scheduling units by walking a block's instruction list 7620b57cec5SDimitry Andric // from bottom to top. 7630b57cec5SDimitry Andric 7640b57cec5SDimitry Andric // Each MIs' memory operand(s) is analyzed to a list of underlying 7650b57cec5SDimitry Andric // objects. The SU is then inserted in the SUList(s) mapped from the 7660b57cec5SDimitry Andric // Value(s). Each Value thus gets mapped to lists of SUs depending 7670b57cec5SDimitry Andric // on it, stores and loads kept separately. Two SUs are trivially 7680b57cec5SDimitry Andric // non-aliasing if they both depend on only identified Values and do 7690b57cec5SDimitry Andric // not share any common Value. 7700b57cec5SDimitry Andric Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/); 7710b57cec5SDimitry Andric 7720b57cec5SDimitry Andric // Certain memory accesses are known to not alias any SU in Stores 7730b57cec5SDimitry Andric // or Loads, and have therefore their own 'NonAlias' 7740b57cec5SDimitry Andric // domain. E.g. spill / reload instructions never alias LLVM I/R 7750b57cec5SDimitry Andric // Values. It would be nice to assume that this type of memory 7760b57cec5SDimitry Andric // accesses always have a proper memory operand modelling, and are 7770b57cec5SDimitry Andric // therefore never unanalyzable, but this is conservatively not 7780b57cec5SDimitry Andric // done. 7790b57cec5SDimitry Andric Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/); 7800b57cec5SDimitry Andric 7810b57cec5SDimitry Andric // Track all instructions that may raise floating-point exceptions. 7820b57cec5SDimitry Andric // These do not depend on one other (or normal loads or stores), but 7830b57cec5SDimitry Andric // must not be rescheduled across global barriers. Note that we don't 7840b57cec5SDimitry Andric // really need a "map" here since we don't track those MIs by value; 7850b57cec5SDimitry Andric // using the same Value2SUsMap data type here is simply a matter of 7860b57cec5SDimitry Andric // convenience. 7870b57cec5SDimitry Andric Value2SUsMap FPExceptions; 7880b57cec5SDimitry Andric 7890b57cec5SDimitry Andric // Remove any stale debug info; sometimes BuildSchedGraph is called again 7900b57cec5SDimitry Andric // without emitting the info from the previous call. 7910b57cec5SDimitry Andric DbgValues.clear(); 7920b57cec5SDimitry Andric FirstDbgValue = nullptr; 7930b57cec5SDimitry Andric 7940b57cec5SDimitry Andric assert(Defs.empty() && Uses.empty() && 7950b57cec5SDimitry Andric "Only BuildGraph should update Defs/Uses"); 7960b57cec5SDimitry Andric Defs.setUniverse(TRI->getNumRegs()); 7970b57cec5SDimitry Andric Uses.setUniverse(TRI->getNumRegs()); 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs"); 8000b57cec5SDimitry Andric assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses"); 8010b57cec5SDimitry Andric unsigned NumVirtRegs = MRI.getNumVirtRegs(); 8020b57cec5SDimitry Andric CurrentVRegDefs.setUniverse(NumVirtRegs); 8030b57cec5SDimitry Andric CurrentVRegUses.setUniverse(NumVirtRegs); 8040b57cec5SDimitry Andric 8050b57cec5SDimitry Andric // Model data dependencies between instructions being scheduled and the 8060b57cec5SDimitry Andric // ExitSU. 8070b57cec5SDimitry Andric addSchedBarrierDeps(); 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andric // Walk the list of instructions, from bottom moving up. 8100b57cec5SDimitry Andric MachineInstr *DbgMI = nullptr; 8110b57cec5SDimitry Andric for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; 8120b57cec5SDimitry Andric MII != MIE; --MII) { 8130b57cec5SDimitry Andric MachineInstr &MI = *std::prev(MII); 8140b57cec5SDimitry Andric if (DbgMI) { 815bdd1243dSDimitry Andric DbgValues.emplace_back(DbgMI, &MI); 8160b57cec5SDimitry Andric DbgMI = nullptr; 8170b57cec5SDimitry Andric } 8180b57cec5SDimitry Andric 819fe6060f1SDimitry Andric if (MI.isDebugValue() || MI.isDebugPHI()) { 8200b57cec5SDimitry Andric DbgMI = &MI; 8210b57cec5SDimitry Andric continue; 8220b57cec5SDimitry Andric } 823fe6060f1SDimitry Andric 824fe6060f1SDimitry Andric if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe()) 8250b57cec5SDimitry Andric continue; 8260b57cec5SDimitry Andric 8270b57cec5SDimitry Andric SUnit *SU = MISUnitMap[&MI]; 8280b57cec5SDimitry Andric assert(SU && "No SUnit mapped to this MI"); 8290b57cec5SDimitry Andric 8300b57cec5SDimitry Andric if (RPTracker) { 8310b57cec5SDimitry Andric RegisterOperands RegOpers; 8320b57cec5SDimitry Andric RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false); 8330b57cec5SDimitry Andric if (TrackLaneMasks) { 8340b57cec5SDimitry Andric SlotIndex SlotIdx = LIS->getInstructionIndex(MI); 8350b57cec5SDimitry Andric RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx); 8360b57cec5SDimitry Andric } 8370b57cec5SDimitry Andric if (PDiffs != nullptr) 8380b57cec5SDimitry Andric PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI); 8390b57cec5SDimitry Andric 8400b57cec5SDimitry Andric if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI) 8410b57cec5SDimitry Andric RPTracker->recedeSkipDebugValues(); 8420b57cec5SDimitry Andric assert(&*RPTracker->getPos() == &MI && "RPTracker in sync"); 8430b57cec5SDimitry Andric RPTracker->recede(RegOpers); 8440b57cec5SDimitry Andric } 8450b57cec5SDimitry Andric 8460b57cec5SDimitry Andric assert( 8470b57cec5SDimitry Andric (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) && 8480b57cec5SDimitry Andric "Cannot schedule terminators or labels!"); 8490b57cec5SDimitry Andric 8500b57cec5SDimitry Andric // Add register-based dependencies (data, anti, and output). 8510b57cec5SDimitry Andric // For some instructions (calls, returns, inline-asm, etc.) there can 8520b57cec5SDimitry Andric // be explicit uses and implicit defs, in which case the use will appear 8530b57cec5SDimitry Andric // on the operand list before the def. Do two passes over the operand 8540b57cec5SDimitry Andric // list to make sure that defs are processed before any uses. 8550b57cec5SDimitry Andric bool HasVRegDef = false; 8560b57cec5SDimitry Andric for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) { 8570b57cec5SDimitry Andric const MachineOperand &MO = MI.getOperand(j); 8580b57cec5SDimitry Andric if (!MO.isReg() || !MO.isDef()) 8590b57cec5SDimitry Andric continue; 8608bcb0991SDimitry Andric Register Reg = MO.getReg(); 861bdd1243dSDimitry Andric if (Reg.isPhysical()) { 8620b57cec5SDimitry Andric addPhysRegDeps(SU, j); 863bdd1243dSDimitry Andric } else if (Reg.isVirtual()) { 8640b57cec5SDimitry Andric HasVRegDef = true; 8650b57cec5SDimitry Andric addVRegDefDeps(SU, j); 8660b57cec5SDimitry Andric } 8670b57cec5SDimitry Andric } 8680b57cec5SDimitry Andric // Now process all uses. 8690b57cec5SDimitry Andric for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) { 8700b57cec5SDimitry Andric const MachineOperand &MO = MI.getOperand(j); 8710b57cec5SDimitry Andric // Only look at use operands. 8720b57cec5SDimitry Andric // We do not need to check for MO.readsReg() here because subsequent 8730b57cec5SDimitry Andric // subregister defs will get output dependence edges and need no 8740b57cec5SDimitry Andric // additional use dependencies. 8750b57cec5SDimitry Andric if (!MO.isReg() || !MO.isUse()) 8760b57cec5SDimitry Andric continue; 8778bcb0991SDimitry Andric Register Reg = MO.getReg(); 878bdd1243dSDimitry Andric if (Reg.isPhysical()) { 8790b57cec5SDimitry Andric addPhysRegDeps(SU, j); 880bdd1243dSDimitry Andric } else if (Reg.isVirtual() && MO.readsReg()) { 8810b57cec5SDimitry Andric addVRegUseDeps(SU, j); 8820b57cec5SDimitry Andric } 8830b57cec5SDimitry Andric } 8840b57cec5SDimitry Andric 8850b57cec5SDimitry Andric // If we haven't seen any uses in this scheduling region, create a 8860b57cec5SDimitry Andric // dependence edge to ExitSU to model the live-out latency. This is required 8870b57cec5SDimitry Andric // for vreg defs with no in-region use, and prefetches with no vreg def. 8880b57cec5SDimitry Andric // 8890b57cec5SDimitry Andric // FIXME: NumDataSuccs would be more precise than NumSuccs here. This 8900b57cec5SDimitry Andric // check currently relies on being called before adding chain deps. 8910b57cec5SDimitry Andric if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) { 8920b57cec5SDimitry Andric SDep Dep(SU, SDep::Artificial); 8930b57cec5SDimitry Andric Dep.setLatency(SU->Latency - 1); 8940b57cec5SDimitry Andric ExitSU.addPred(Dep); 8950b57cec5SDimitry Andric } 8960b57cec5SDimitry Andric 8970b57cec5SDimitry Andric // Add memory dependencies (Note: isStoreToStackSlot and 8980b57cec5SDimitry Andric // isLoadFromStackSLot are not usable after stack slots are lowered to 8990b57cec5SDimitry Andric // actual addresses). 9000b57cec5SDimitry Andric 9010b57cec5SDimitry Andric // This is a barrier event that acts as a pivotal node in the DAG. 902fcaf7f86SDimitry Andric if (isGlobalMemoryObject(&MI)) { 9030b57cec5SDimitry Andric 9040b57cec5SDimitry Andric // Become the barrier chain. 9050b57cec5SDimitry Andric if (BarrierChain) 9060b57cec5SDimitry Andric BarrierChain->addPredBarrier(SU); 9070b57cec5SDimitry Andric BarrierChain = SU; 9080b57cec5SDimitry Andric 9090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU(" 9100b57cec5SDimitry Andric << BarrierChain->NodeNum << ").\n";); 9110b57cec5SDimitry Andric 9120b57cec5SDimitry Andric // Add dependencies against everything below it and clear maps. 9130b57cec5SDimitry Andric addBarrierChain(Stores); 9140b57cec5SDimitry Andric addBarrierChain(Loads); 9150b57cec5SDimitry Andric addBarrierChain(NonAliasStores); 9160b57cec5SDimitry Andric addBarrierChain(NonAliasLoads); 9170b57cec5SDimitry Andric addBarrierChain(FPExceptions); 9180b57cec5SDimitry Andric 9190b57cec5SDimitry Andric continue; 9200b57cec5SDimitry Andric } 9210b57cec5SDimitry Andric 9220b57cec5SDimitry Andric // Instructions that may raise FP exceptions may not be moved 9230b57cec5SDimitry Andric // across any global barriers. 9240b57cec5SDimitry Andric if (MI.mayRaiseFPException()) { 9250b57cec5SDimitry Andric if (BarrierChain) 9260b57cec5SDimitry Andric BarrierChain->addPredBarrier(SU); 9270b57cec5SDimitry Andric 9280b57cec5SDimitry Andric FPExceptions.insert(SU, UnknownValue); 9290b57cec5SDimitry Andric 9300b57cec5SDimitry Andric if (FPExceptions.size() >= HugeRegion) { 9310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";); 9320b57cec5SDimitry Andric Value2SUsMap empty; 9330b57cec5SDimitry Andric reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize()); 9340b57cec5SDimitry Andric } 9350b57cec5SDimitry Andric } 9360b57cec5SDimitry Andric 9370b57cec5SDimitry Andric // If it's not a store or a variant load, we're done. 9380b57cec5SDimitry Andric if (!MI.mayStore() && 939fcaf7f86SDimitry Andric !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad())) 9400b57cec5SDimitry Andric continue; 9410b57cec5SDimitry Andric 9420b57cec5SDimitry Andric // Always add dependecy edge to BarrierChain if present. 9430b57cec5SDimitry Andric if (BarrierChain) 9440b57cec5SDimitry Andric BarrierChain->addPredBarrier(SU); 9450b57cec5SDimitry Andric 9460b57cec5SDimitry Andric // Find the underlying objects for MI. The Objs vector is either 9470b57cec5SDimitry Andric // empty, or filled with the Values of memory locations which this 9480b57cec5SDimitry Andric // SU depends on. 9490b57cec5SDimitry Andric UnderlyingObjectsVector Objs; 9500b57cec5SDimitry Andric bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs, 9510b57cec5SDimitry Andric MF.getDataLayout()); 9520b57cec5SDimitry Andric 9530b57cec5SDimitry Andric if (MI.mayStore()) { 9540b57cec5SDimitry Andric if (!ObjsFound) { 9550b57cec5SDimitry Andric // An unknown store depends on all stores and loads. 9560b57cec5SDimitry Andric addChainDependencies(SU, Stores); 9570b57cec5SDimitry Andric addChainDependencies(SU, NonAliasStores); 9580b57cec5SDimitry Andric addChainDependencies(SU, Loads); 9590b57cec5SDimitry Andric addChainDependencies(SU, NonAliasLoads); 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric // Map this store to 'UnknownValue'. 9620b57cec5SDimitry Andric Stores.insert(SU, UnknownValue); 9630b57cec5SDimitry Andric } else { 9640b57cec5SDimitry Andric // Add precise dependencies against all previously seen memory 9650b57cec5SDimitry Andric // accesses mapped to the same Value(s). 9660b57cec5SDimitry Andric for (const UnderlyingObject &UnderlObj : Objs) { 9670b57cec5SDimitry Andric ValueType V = UnderlObj.getValue(); 9680b57cec5SDimitry Andric bool ThisMayAlias = UnderlObj.mayAlias(); 9690b57cec5SDimitry Andric 9700b57cec5SDimitry Andric // Add dependencies to previous stores and loads mapped to V. 9710b57cec5SDimitry Andric addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V); 9720b57cec5SDimitry Andric addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V); 9730b57cec5SDimitry Andric } 9740b57cec5SDimitry Andric // Update the store map after all chains have been added to avoid adding 9750b57cec5SDimitry Andric // self-loop edge if multiple underlying objects are present. 9760b57cec5SDimitry Andric for (const UnderlyingObject &UnderlObj : Objs) { 9770b57cec5SDimitry Andric ValueType V = UnderlObj.getValue(); 9780b57cec5SDimitry Andric bool ThisMayAlias = UnderlObj.mayAlias(); 9790b57cec5SDimitry Andric 9800b57cec5SDimitry Andric // Map this store to V. 9810b57cec5SDimitry Andric (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V); 9820b57cec5SDimitry Andric } 9830b57cec5SDimitry Andric // The store may have dependencies to unanalyzable loads and 9840b57cec5SDimitry Andric // stores. 9850b57cec5SDimitry Andric addChainDependencies(SU, Loads, UnknownValue); 9860b57cec5SDimitry Andric addChainDependencies(SU, Stores, UnknownValue); 9870b57cec5SDimitry Andric } 9880b57cec5SDimitry Andric } else { // SU is a load. 9890b57cec5SDimitry Andric if (!ObjsFound) { 9900b57cec5SDimitry Andric // An unknown load depends on all stores. 9910b57cec5SDimitry Andric addChainDependencies(SU, Stores); 9920b57cec5SDimitry Andric addChainDependencies(SU, NonAliasStores); 9930b57cec5SDimitry Andric 9940b57cec5SDimitry Andric Loads.insert(SU, UnknownValue); 9950b57cec5SDimitry Andric } else { 9960b57cec5SDimitry Andric for (const UnderlyingObject &UnderlObj : Objs) { 9970b57cec5SDimitry Andric ValueType V = UnderlObj.getValue(); 9980b57cec5SDimitry Andric bool ThisMayAlias = UnderlObj.mayAlias(); 9990b57cec5SDimitry Andric 10000b57cec5SDimitry Andric // Add precise dependencies against all previously seen stores 10010b57cec5SDimitry Andric // mapping to the same Value(s). 10020b57cec5SDimitry Andric addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V); 10030b57cec5SDimitry Andric 10040b57cec5SDimitry Andric // Map this load to V. 10050b57cec5SDimitry Andric (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V); 10060b57cec5SDimitry Andric } 10070b57cec5SDimitry Andric // The load may have dependencies to unanalyzable stores. 10080b57cec5SDimitry Andric addChainDependencies(SU, Stores, UnknownValue); 10090b57cec5SDimitry Andric } 10100b57cec5SDimitry Andric } 10110b57cec5SDimitry Andric 10120b57cec5SDimitry Andric // Reduce maps if they grow huge. 10130b57cec5SDimitry Andric if (Stores.size() + Loads.size() >= HugeRegion) { 10140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";); 10150b57cec5SDimitry Andric reduceHugeMemNodeMaps(Stores, Loads, getReductionSize()); 10160b57cec5SDimitry Andric } 10170b57cec5SDimitry Andric if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) { 10180b57cec5SDimitry Andric LLVM_DEBUG( 10190b57cec5SDimitry Andric dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";); 10200b57cec5SDimitry Andric reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize()); 10210b57cec5SDimitry Andric } 10220b57cec5SDimitry Andric } 10230b57cec5SDimitry Andric 10240b57cec5SDimitry Andric if (DbgMI) 10250b57cec5SDimitry Andric FirstDbgValue = DbgMI; 10260b57cec5SDimitry Andric 10270b57cec5SDimitry Andric Defs.clear(); 10280b57cec5SDimitry Andric Uses.clear(); 10290b57cec5SDimitry Andric CurrentVRegDefs.clear(); 10300b57cec5SDimitry Andric CurrentVRegUses.clear(); 10310b57cec5SDimitry Andric 10320b57cec5SDimitry Andric Topo.MarkDirty(); 10330b57cec5SDimitry Andric } 10340b57cec5SDimitry Andric 10350b57cec5SDimitry Andric raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) { 10360b57cec5SDimitry Andric PSV->printCustom(OS); 10370b57cec5SDimitry Andric return OS; 10380b57cec5SDimitry Andric } 10390b57cec5SDimitry Andric 10400b57cec5SDimitry Andric void ScheduleDAGInstrs::Value2SUsMap::dump() { 1041bdd1243dSDimitry Andric for (const auto &[ValType, SUs] : *this) { 104206c3fb27SDimitry Andric if (isa<const Value *>(ValType)) { 104306c3fb27SDimitry Andric const Value *V = cast<const Value *>(ValType); 10440b57cec5SDimitry Andric if (isa<UndefValue>(V)) 10450b57cec5SDimitry Andric dbgs() << "Unknown"; 10460b57cec5SDimitry Andric else 10470b57cec5SDimitry Andric V->printAsOperand(dbgs()); 104806c3fb27SDimitry Andric } else if (isa<const PseudoSourceValue *>(ValType)) 104906c3fb27SDimitry Andric dbgs() << cast<const PseudoSourceValue *>(ValType); 10500b57cec5SDimitry Andric else 10510b57cec5SDimitry Andric llvm_unreachable("Unknown Value type."); 10520b57cec5SDimitry Andric 10530b57cec5SDimitry Andric dbgs() << " : "; 1054bdd1243dSDimitry Andric dumpSUList(SUs); 10550b57cec5SDimitry Andric } 10560b57cec5SDimitry Andric } 10570b57cec5SDimitry Andric 10580b57cec5SDimitry Andric void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, 10590b57cec5SDimitry Andric Value2SUsMap &loads, unsigned N) { 10600b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump(); 10610b57cec5SDimitry Andric dbgs() << "Loading SUnits:\n"; loads.dump()); 10620b57cec5SDimitry Andric 10630b57cec5SDimitry Andric // Insert all SU's NodeNums into a vector and sort it. 10640b57cec5SDimitry Andric std::vector<unsigned> NodeNums; 10650b57cec5SDimitry Andric NodeNums.reserve(stores.size() + loads.size()); 1066bdd1243dSDimitry Andric for (const auto &[V, SUs] : stores) { 1067bdd1243dSDimitry Andric (void)V; 1068bdd1243dSDimitry Andric for (const auto *SU : SUs) 10690b57cec5SDimitry Andric NodeNums.push_back(SU->NodeNum); 1070bdd1243dSDimitry Andric } 1071bdd1243dSDimitry Andric for (const auto &[V, SUs] : loads) { 1072bdd1243dSDimitry Andric (void)V; 1073bdd1243dSDimitry Andric for (const auto *SU : SUs) 10740b57cec5SDimitry Andric NodeNums.push_back(SU->NodeNum); 1075bdd1243dSDimitry Andric } 10760b57cec5SDimitry Andric llvm::sort(NodeNums); 10770b57cec5SDimitry Andric 10780b57cec5SDimitry Andric // The N last elements in NodeNums will be removed, and the SU with 10790b57cec5SDimitry Andric // the lowest NodeNum of them will become the new BarrierChain to 10800b57cec5SDimitry Andric // let the not yet seen SUs have a dependency to the removed SUs. 10810b57cec5SDimitry Andric assert(N <= NodeNums.size()); 10820b57cec5SDimitry Andric SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)]; 10830b57cec5SDimitry Andric if (BarrierChain) { 10840b57cec5SDimitry Andric // The aliasing and non-aliasing maps reduce independently of each 10850b57cec5SDimitry Andric // other, but share a common BarrierChain. Check if the 10860b57cec5SDimitry Andric // newBarrierChain is above the former one. If it is not, it may 10870b57cec5SDimitry Andric // introduce a loop to use newBarrierChain, so keep the old one. 10880b57cec5SDimitry Andric if (newBarrierChain->NodeNum < BarrierChain->NodeNum) { 10890b57cec5SDimitry Andric BarrierChain->addPredBarrier(newBarrierChain); 10900b57cec5SDimitry Andric BarrierChain = newBarrierChain; 10910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU(" 10920b57cec5SDimitry Andric << BarrierChain->NodeNum << ").\n";); 10930b57cec5SDimitry Andric } 10940b57cec5SDimitry Andric else 10950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU(" 10960b57cec5SDimitry Andric << BarrierChain->NodeNum << ").\n";); 10970b57cec5SDimitry Andric } 10980b57cec5SDimitry Andric else 10990b57cec5SDimitry Andric BarrierChain = newBarrierChain; 11000b57cec5SDimitry Andric 11010b57cec5SDimitry Andric insertBarrierChain(stores); 11020b57cec5SDimitry Andric insertBarrierChain(loads); 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump(); 11050b57cec5SDimitry Andric dbgs() << "Loading SUnits:\n"; loads.dump()); 11060b57cec5SDimitry Andric } 11070b57cec5SDimitry Andric 1108*0fca6ea1SDimitry Andric static void toggleKills(const MachineRegisterInfo &MRI, LiveRegUnits &LiveRegs, 11090b57cec5SDimitry Andric MachineInstr &MI, bool addToLiveRegs) { 11100b57cec5SDimitry Andric for (MachineOperand &MO : MI.operands()) { 11110b57cec5SDimitry Andric if (!MO.isReg() || !MO.readsReg()) 11120b57cec5SDimitry Andric continue; 11138bcb0991SDimitry Andric Register Reg = MO.getReg(); 11140b57cec5SDimitry Andric if (!Reg) 11150b57cec5SDimitry Andric continue; 11160b57cec5SDimitry Andric 11170b57cec5SDimitry Andric // Things that are available after the instruction are killed by it. 1118*0fca6ea1SDimitry Andric bool IsKill = LiveRegs.available(Reg); 1119*0fca6ea1SDimitry Andric 1120*0fca6ea1SDimitry Andric // Exception: Do not kill reserved registers 1121*0fca6ea1SDimitry Andric MO.setIsKill(IsKill && !MRI.isReserved(Reg)); 11220b57cec5SDimitry Andric if (addToLiveRegs) 11230b57cec5SDimitry Andric LiveRegs.addReg(Reg); 11240b57cec5SDimitry Andric } 11250b57cec5SDimitry Andric } 11260b57cec5SDimitry Andric 11270b57cec5SDimitry Andric void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { 11280b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n'); 11290b57cec5SDimitry Andric 11300b57cec5SDimitry Andric LiveRegs.init(*TRI); 11310b57cec5SDimitry Andric LiveRegs.addLiveOuts(MBB); 11320b57cec5SDimitry Andric 11330b57cec5SDimitry Andric // Examine block from end to start... 1134349cc55cSDimitry Andric for (MachineInstr &MI : llvm::reverse(MBB)) { 1135fe6060f1SDimitry Andric if (MI.isDebugOrPseudoInstr()) 11360b57cec5SDimitry Andric continue; 11370b57cec5SDimitry Andric 11380b57cec5SDimitry Andric // Update liveness. Registers that are defed but not used in this 11390b57cec5SDimitry Andric // instruction are now dead. Mark register and all subregs as they 11400b57cec5SDimitry Andric // are completely defined. 11410b57cec5SDimitry Andric for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { 11420b57cec5SDimitry Andric const MachineOperand &MO = *O; 11430b57cec5SDimitry Andric if (MO.isReg()) { 11440b57cec5SDimitry Andric if (!MO.isDef()) 11450b57cec5SDimitry Andric continue; 11468bcb0991SDimitry Andric Register Reg = MO.getReg(); 11470b57cec5SDimitry Andric if (!Reg) 11480b57cec5SDimitry Andric continue; 11490b57cec5SDimitry Andric LiveRegs.removeReg(Reg); 11500b57cec5SDimitry Andric } else if (MO.isRegMask()) { 1151*0fca6ea1SDimitry Andric LiveRegs.removeRegsNotPreserved(MO.getRegMask()); 11520b57cec5SDimitry Andric } 11530b57cec5SDimitry Andric } 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric // If there is a bundle header fix it up first. 11560b57cec5SDimitry Andric if (!MI.isBundled()) { 11570b57cec5SDimitry Andric toggleKills(MRI, LiveRegs, MI, true); 11580b57cec5SDimitry Andric } else { 11590b57cec5SDimitry Andric MachineBasicBlock::instr_iterator Bundle = MI.getIterator(); 11600b57cec5SDimitry Andric if (MI.isBundle()) 11610b57cec5SDimitry Andric toggleKills(MRI, LiveRegs, MI, false); 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric // Some targets make the (questionable) assumtion that the instructions 11640b57cec5SDimitry Andric // inside the bundle are ordered and consequently only the last use of 11650b57cec5SDimitry Andric // a register inside the bundle can kill it. 11660b57cec5SDimitry Andric MachineBasicBlock::instr_iterator I = std::next(Bundle); 11670b57cec5SDimitry Andric while (I->isBundledWithSucc()) 11680b57cec5SDimitry Andric ++I; 11690b57cec5SDimitry Andric do { 1170fe6060f1SDimitry Andric if (!I->isDebugOrPseudoInstr()) 11710b57cec5SDimitry Andric toggleKills(MRI, LiveRegs, *I, true); 11720b57cec5SDimitry Andric --I; 11730b57cec5SDimitry Andric } while (I != Bundle); 11740b57cec5SDimitry Andric } 11750b57cec5SDimitry Andric } 11760b57cec5SDimitry Andric } 11770b57cec5SDimitry Andric 11780b57cec5SDimitry Andric void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const { 11790b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 11800b57cec5SDimitry Andric dumpNodeName(SU); 1181bdd1243dSDimitry Andric if (SchedPrintCycles) 1182bdd1243dSDimitry Andric dbgs() << " [TopReadyCycle = " << SU.TopReadyCycle 1183bdd1243dSDimitry Andric << ", BottomReadyCycle = " << SU.BotReadyCycle << "]"; 11840b57cec5SDimitry Andric dbgs() << ": "; 11850b57cec5SDimitry Andric SU.getInstr()->dump(); 11860b57cec5SDimitry Andric #endif 11870b57cec5SDimitry Andric } 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andric void ScheduleDAGInstrs::dump() const { 11900b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 11910b57cec5SDimitry Andric if (EntrySU.getInstr() != nullptr) 11920b57cec5SDimitry Andric dumpNodeAll(EntrySU); 11930b57cec5SDimitry Andric for (const SUnit &SU : SUnits) 11940b57cec5SDimitry Andric dumpNodeAll(SU); 11950b57cec5SDimitry Andric if (ExitSU.getInstr() != nullptr) 11960b57cec5SDimitry Andric dumpNodeAll(ExitSU); 11970b57cec5SDimitry Andric #endif 11980b57cec5SDimitry Andric } 11990b57cec5SDimitry Andric 12000b57cec5SDimitry Andric std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 12010b57cec5SDimitry Andric std::string s; 12020b57cec5SDimitry Andric raw_string_ostream oss(s); 12030b57cec5SDimitry Andric if (SU == &EntrySU) 12040b57cec5SDimitry Andric oss << "<entry>"; 12050b57cec5SDimitry Andric else if (SU == &ExitSU) 12060b57cec5SDimitry Andric oss << "<exit>"; 12070b57cec5SDimitry Andric else 1208e8d8bef9SDimitry Andric SU->getInstr()->print(oss, /*IsStandalone=*/true); 1209*0fca6ea1SDimitry Andric return s; 12100b57cec5SDimitry Andric } 12110b57cec5SDimitry Andric 12120b57cec5SDimitry Andric /// Return the basic block label. It is not necessarilly unique because a block 12130b57cec5SDimitry Andric /// contains multiple scheduling regions. But it is fine for visualization. 12140b57cec5SDimitry Andric std::string ScheduleDAGInstrs::getDAGName() const { 12150b57cec5SDimitry Andric return "dag." + BB->getFullName(); 12160b57cec5SDimitry Andric } 12170b57cec5SDimitry Andric 12180b57cec5SDimitry Andric bool ScheduleDAGInstrs::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { 12190b57cec5SDimitry Andric return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); 12200b57cec5SDimitry Andric } 12210b57cec5SDimitry Andric 12220b57cec5SDimitry Andric bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) { 12230b57cec5SDimitry Andric if (SuccSU != &ExitSU) { 12240b57cec5SDimitry Andric // Do not use WillCreateCycle, it assumes SD scheduling. 12250b57cec5SDimitry Andric // If Pred is reachable from Succ, then the edge creates a cycle. 12260b57cec5SDimitry Andric if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) 12270b57cec5SDimitry Andric return false; 12280b57cec5SDimitry Andric Topo.AddPredQueued(SuccSU, PredDep.getSUnit()); 12290b57cec5SDimitry Andric } 12300b57cec5SDimitry Andric SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); 12310b57cec5SDimitry Andric // Return true regardless of whether a new edge needed to be inserted. 12320b57cec5SDimitry Andric return true; 12330b57cec5SDimitry Andric } 12340b57cec5SDimitry Andric 12350b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 12360b57cec5SDimitry Andric // SchedDFSResult Implementation 12370b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 12380b57cec5SDimitry Andric 12390b57cec5SDimitry Andric namespace llvm { 12400b57cec5SDimitry Andric 12410b57cec5SDimitry Andric /// Internal state used to compute SchedDFSResult. 12420b57cec5SDimitry Andric class SchedDFSImpl { 12430b57cec5SDimitry Andric SchedDFSResult &R; 12440b57cec5SDimitry Andric 12450b57cec5SDimitry Andric /// Join DAG nodes into equivalence classes by their subtree. 12460b57cec5SDimitry Andric IntEqClasses SubtreeClasses; 12470b57cec5SDimitry Andric /// List PredSU, SuccSU pairs that represent data edges between subtrees. 12480b57cec5SDimitry Andric std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs; 12490b57cec5SDimitry Andric 12500b57cec5SDimitry Andric struct RootData { 12510b57cec5SDimitry Andric unsigned NodeID; 12520b57cec5SDimitry Andric unsigned ParentNodeID; ///< Parent node (member of the parent subtree). 12530b57cec5SDimitry Andric unsigned SubInstrCount = 0; ///< Instr count in this tree only, not 12540b57cec5SDimitry Andric /// children. 12550b57cec5SDimitry Andric 12560b57cec5SDimitry Andric RootData(unsigned id): NodeID(id), 12570b57cec5SDimitry Andric ParentNodeID(SchedDFSResult::InvalidSubtreeID) {} 12580b57cec5SDimitry Andric 12590b57cec5SDimitry Andric unsigned getSparseSetIndex() const { return NodeID; } 12600b57cec5SDimitry Andric }; 12610b57cec5SDimitry Andric 12620b57cec5SDimitry Andric SparseSet<RootData> RootSet; 12630b57cec5SDimitry Andric 12640b57cec5SDimitry Andric public: 12650b57cec5SDimitry Andric SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) { 12660b57cec5SDimitry Andric RootSet.setUniverse(R.DFSNodeData.size()); 12670b57cec5SDimitry Andric } 12680b57cec5SDimitry Andric 12690b57cec5SDimitry Andric /// Returns true if this node been visited by the DFS traversal. 12700b57cec5SDimitry Andric /// 12710b57cec5SDimitry Andric /// During visitPostorderNode the Node's SubtreeID is assigned to the Node 12720b57cec5SDimitry Andric /// ID. Later, SubtreeID is updated but remains valid. 12730b57cec5SDimitry Andric bool isVisited(const SUnit *SU) const { 12740b57cec5SDimitry Andric return R.DFSNodeData[SU->NodeNum].SubtreeID 12750b57cec5SDimitry Andric != SchedDFSResult::InvalidSubtreeID; 12760b57cec5SDimitry Andric } 12770b57cec5SDimitry Andric 12780b57cec5SDimitry Andric /// Initializes this node's instruction count. We don't need to flag the node 12790b57cec5SDimitry Andric /// visited until visitPostorder because the DAG cannot have cycles. 12800b57cec5SDimitry Andric void visitPreorder(const SUnit *SU) { 12810b57cec5SDimitry Andric R.DFSNodeData[SU->NodeNum].InstrCount = 12820b57cec5SDimitry Andric SU->getInstr()->isTransient() ? 0 : 1; 12830b57cec5SDimitry Andric } 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric /// Called once for each node after all predecessors are visited. Revisit this 12860b57cec5SDimitry Andric /// node's predecessors and potentially join them now that we know the ILP of 12870b57cec5SDimitry Andric /// the other predecessors. 12880b57cec5SDimitry Andric void visitPostorderNode(const SUnit *SU) { 12890b57cec5SDimitry Andric // Mark this node as the root of a subtree. It may be joined with its 12900b57cec5SDimitry Andric // successors later. 12910b57cec5SDimitry Andric R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum; 12920b57cec5SDimitry Andric RootData RData(SU->NodeNum); 12930b57cec5SDimitry Andric RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1; 12940b57cec5SDimitry Andric 12950b57cec5SDimitry Andric // If any predecessors are still in their own subtree, they either cannot be 12960b57cec5SDimitry Andric // joined or are large enough to remain separate. If this parent node's 12970b57cec5SDimitry Andric // total instruction count is not greater than a child subtree by at least 12980b57cec5SDimitry Andric // the subtree limit, then try to join it now since splitting subtrees is 12990b57cec5SDimitry Andric // only useful if multiple high-pressure paths are possible. 13000b57cec5SDimitry Andric unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; 13010b57cec5SDimitry Andric for (const SDep &PredDep : SU->Preds) { 13020b57cec5SDimitry Andric if (PredDep.getKind() != SDep::Data) 13030b57cec5SDimitry Andric continue; 13040b57cec5SDimitry Andric unsigned PredNum = PredDep.getSUnit()->NodeNum; 13050b57cec5SDimitry Andric if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) 13060b57cec5SDimitry Andric joinPredSubtree(PredDep, SU, /*CheckLimit=*/false); 13070b57cec5SDimitry Andric 13080b57cec5SDimitry Andric // Either link or merge the TreeData entry from the child to the parent. 13090b57cec5SDimitry Andric if (R.DFSNodeData[PredNum].SubtreeID == PredNum) { 13100b57cec5SDimitry Andric // If the predecessor's parent is invalid, this is a tree edge and the 13110b57cec5SDimitry Andric // current node is the parent. 13120b57cec5SDimitry Andric if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID) 13130b57cec5SDimitry Andric RootSet[PredNum].ParentNodeID = SU->NodeNum; 13140b57cec5SDimitry Andric } 13150b57cec5SDimitry Andric else if (RootSet.count(PredNum)) { 13160b57cec5SDimitry Andric // The predecessor is not a root, but is still in the root set. This 13170b57cec5SDimitry Andric // must be the new parent that it was just joined to. Note that 13180b57cec5SDimitry Andric // RootSet[PredNum].ParentNodeID may either be invalid or may still be 13190b57cec5SDimitry Andric // set to the original parent. 13200b57cec5SDimitry Andric RData.SubInstrCount += RootSet[PredNum].SubInstrCount; 13210b57cec5SDimitry Andric RootSet.erase(PredNum); 13220b57cec5SDimitry Andric } 13230b57cec5SDimitry Andric } 13240b57cec5SDimitry Andric RootSet[SU->NodeNum] = RData; 13250b57cec5SDimitry Andric } 13260b57cec5SDimitry Andric 13270b57cec5SDimitry Andric /// Called once for each tree edge after calling visitPostOrderNode on 13280b57cec5SDimitry Andric /// the predecessor. Increment the parent node's instruction count and 13290b57cec5SDimitry Andric /// preemptively join this subtree to its parent's if it is small enough. 13300b57cec5SDimitry Andric void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { 13310b57cec5SDimitry Andric R.DFSNodeData[Succ->NodeNum].InstrCount 13320b57cec5SDimitry Andric += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount; 13330b57cec5SDimitry Andric joinPredSubtree(PredDep, Succ); 13340b57cec5SDimitry Andric } 13350b57cec5SDimitry Andric 13360b57cec5SDimitry Andric /// Adds a connection for cross edges. 13370b57cec5SDimitry Andric void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) { 1338bdd1243dSDimitry Andric ConnectionPairs.emplace_back(PredDep.getSUnit(), Succ); 13390b57cec5SDimitry Andric } 13400b57cec5SDimitry Andric 13410b57cec5SDimitry Andric /// Sets each node's subtree ID to the representative ID and record 13420b57cec5SDimitry Andric /// connections between trees. 13430b57cec5SDimitry Andric void finalize() { 13440b57cec5SDimitry Andric SubtreeClasses.compress(); 13450b57cec5SDimitry Andric R.DFSTreeData.resize(SubtreeClasses.getNumClasses()); 13460b57cec5SDimitry Andric assert(SubtreeClasses.getNumClasses() == RootSet.size() 13470b57cec5SDimitry Andric && "number of roots should match trees"); 13480b57cec5SDimitry Andric for (const RootData &Root : RootSet) { 13490b57cec5SDimitry Andric unsigned TreeID = SubtreeClasses[Root.NodeID]; 13500b57cec5SDimitry Andric if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID) 13510b57cec5SDimitry Andric R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID]; 13520b57cec5SDimitry Andric R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount; 13530b57cec5SDimitry Andric // Note that SubInstrCount may be greater than InstrCount if we joined 13540b57cec5SDimitry Andric // subtrees across a cross edge. InstrCount will be attributed to the 13550b57cec5SDimitry Andric // original parent, while SubInstrCount will be attributed to the joined 13560b57cec5SDimitry Andric // parent. 13570b57cec5SDimitry Andric } 13580b57cec5SDimitry Andric R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); 13590b57cec5SDimitry Andric R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); 13600b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n"); 13610b57cec5SDimitry Andric for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) { 13620b57cec5SDimitry Andric R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; 13630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree " 13640b57cec5SDimitry Andric << R.DFSNodeData[Idx].SubtreeID << '\n'); 13650b57cec5SDimitry Andric } 1366bdd1243dSDimitry Andric for (const auto &[Pred, Succ] : ConnectionPairs) { 1367bdd1243dSDimitry Andric unsigned PredTree = SubtreeClasses[Pred->NodeNum]; 1368bdd1243dSDimitry Andric unsigned SuccTree = SubtreeClasses[Succ->NodeNum]; 13690b57cec5SDimitry Andric if (PredTree == SuccTree) 13700b57cec5SDimitry Andric continue; 1371bdd1243dSDimitry Andric unsigned Depth = Pred->getDepth(); 13720b57cec5SDimitry Andric addConnection(PredTree, SuccTree, Depth); 13730b57cec5SDimitry Andric addConnection(SuccTree, PredTree, Depth); 13740b57cec5SDimitry Andric } 13750b57cec5SDimitry Andric } 13760b57cec5SDimitry Andric 13770b57cec5SDimitry Andric protected: 13780b57cec5SDimitry Andric /// Joins the predecessor subtree with the successor that is its DFS parent. 13790b57cec5SDimitry Andric /// Applies some heuristics before joining. 13800b57cec5SDimitry Andric bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, 13810b57cec5SDimitry Andric bool CheckLimit = true) { 13820b57cec5SDimitry Andric assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges"); 13830b57cec5SDimitry Andric 13840b57cec5SDimitry Andric // Check if the predecessor is already joined. 13850b57cec5SDimitry Andric const SUnit *PredSU = PredDep.getSUnit(); 13860b57cec5SDimitry Andric unsigned PredNum = PredSU->NodeNum; 13870b57cec5SDimitry Andric if (R.DFSNodeData[PredNum].SubtreeID != PredNum) 13880b57cec5SDimitry Andric return false; 13890b57cec5SDimitry Andric 13900b57cec5SDimitry Andric // Four is the magic number of successors before a node is considered a 13910b57cec5SDimitry Andric // pinch point. 13920b57cec5SDimitry Andric unsigned NumDataSucs = 0; 13930b57cec5SDimitry Andric for (const SDep &SuccDep : PredSU->Succs) { 13940b57cec5SDimitry Andric if (SuccDep.getKind() == SDep::Data) { 13950b57cec5SDimitry Andric if (++NumDataSucs >= 4) 13960b57cec5SDimitry Andric return false; 13970b57cec5SDimitry Andric } 13980b57cec5SDimitry Andric } 13990b57cec5SDimitry Andric if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit) 14000b57cec5SDimitry Andric return false; 14010b57cec5SDimitry Andric R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum; 14020b57cec5SDimitry Andric SubtreeClasses.join(Succ->NodeNum, PredNum); 14030b57cec5SDimitry Andric return true; 14040b57cec5SDimitry Andric } 14050b57cec5SDimitry Andric 14060b57cec5SDimitry Andric /// Called by finalize() to record a connection between trees. 14070b57cec5SDimitry Andric void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) { 14080b57cec5SDimitry Andric if (!Depth) 14090b57cec5SDimitry Andric return; 14100b57cec5SDimitry Andric 14110b57cec5SDimitry Andric do { 14120b57cec5SDimitry Andric SmallVectorImpl<SchedDFSResult::Connection> &Connections = 14130b57cec5SDimitry Andric R.SubtreeConnections[FromTree]; 14140b57cec5SDimitry Andric for (SchedDFSResult::Connection &C : Connections) { 14150b57cec5SDimitry Andric if (C.TreeID == ToTree) { 14160b57cec5SDimitry Andric C.Level = std::max(C.Level, Depth); 14170b57cec5SDimitry Andric return; 14180b57cec5SDimitry Andric } 14190b57cec5SDimitry Andric } 14200b57cec5SDimitry Andric Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); 14210b57cec5SDimitry Andric FromTree = R.DFSTreeData[FromTree].ParentTreeID; 14220b57cec5SDimitry Andric } while (FromTree != SchedDFSResult::InvalidSubtreeID); 14230b57cec5SDimitry Andric } 14240b57cec5SDimitry Andric }; 14250b57cec5SDimitry Andric 14260b57cec5SDimitry Andric } // end namespace llvm 14270b57cec5SDimitry Andric 14280b57cec5SDimitry Andric namespace { 14290b57cec5SDimitry Andric 14300b57cec5SDimitry Andric /// Manage the stack used by a reverse depth-first search over the DAG. 14310b57cec5SDimitry Andric class SchedDAGReverseDFS { 14320b57cec5SDimitry Andric std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack; 14330b57cec5SDimitry Andric 14340b57cec5SDimitry Andric public: 14350b57cec5SDimitry Andric bool isComplete() const { return DFSStack.empty(); } 14360b57cec5SDimitry Andric 14370b57cec5SDimitry Andric void follow(const SUnit *SU) { 1438bdd1243dSDimitry Andric DFSStack.emplace_back(SU, SU->Preds.begin()); 14390b57cec5SDimitry Andric } 14400b57cec5SDimitry Andric void advance() { ++DFSStack.back().second; } 14410b57cec5SDimitry Andric 14420b57cec5SDimitry Andric const SDep *backtrack() { 14430b57cec5SDimitry Andric DFSStack.pop_back(); 14440b57cec5SDimitry Andric return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second); 14450b57cec5SDimitry Andric } 14460b57cec5SDimitry Andric 14470b57cec5SDimitry Andric const SUnit *getCurr() const { return DFSStack.back().first; } 14480b57cec5SDimitry Andric 14490b57cec5SDimitry Andric SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; } 14500b57cec5SDimitry Andric 14510b57cec5SDimitry Andric SUnit::const_pred_iterator getPredEnd() const { 14520b57cec5SDimitry Andric return getCurr()->Preds.end(); 14530b57cec5SDimitry Andric } 14540b57cec5SDimitry Andric }; 14550b57cec5SDimitry Andric 14560b57cec5SDimitry Andric } // end anonymous namespace 14570b57cec5SDimitry Andric 14580b57cec5SDimitry Andric static bool hasDataSucc(const SUnit *SU) { 14590b57cec5SDimitry Andric for (const SDep &SuccDep : SU->Succs) { 14600b57cec5SDimitry Andric if (SuccDep.getKind() == SDep::Data && 14610b57cec5SDimitry Andric !SuccDep.getSUnit()->isBoundaryNode()) 14620b57cec5SDimitry Andric return true; 14630b57cec5SDimitry Andric } 14640b57cec5SDimitry Andric return false; 14650b57cec5SDimitry Andric } 14660b57cec5SDimitry Andric 14670b57cec5SDimitry Andric /// Computes an ILP metric for all nodes in the subDAG reachable via depth-first 14680b57cec5SDimitry Andric /// search from this root. 14690b57cec5SDimitry Andric void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { 14700b57cec5SDimitry Andric if (!IsBottomUp) 14710b57cec5SDimitry Andric llvm_unreachable("Top-down ILP metric is unimplemented"); 14720b57cec5SDimitry Andric 14730b57cec5SDimitry Andric SchedDFSImpl Impl(*this); 14740b57cec5SDimitry Andric for (const SUnit &SU : SUnits) { 14750b57cec5SDimitry Andric if (Impl.isVisited(&SU) || hasDataSucc(&SU)) 14760b57cec5SDimitry Andric continue; 14770b57cec5SDimitry Andric 14780b57cec5SDimitry Andric SchedDAGReverseDFS DFS; 14790b57cec5SDimitry Andric Impl.visitPreorder(&SU); 14800b57cec5SDimitry Andric DFS.follow(&SU); 14810b57cec5SDimitry Andric while (true) { 14820b57cec5SDimitry Andric // Traverse the leftmost path as far as possible. 14830b57cec5SDimitry Andric while (DFS.getPred() != DFS.getPredEnd()) { 14840b57cec5SDimitry Andric const SDep &PredDep = *DFS.getPred(); 14850b57cec5SDimitry Andric DFS.advance(); 14860b57cec5SDimitry Andric // Ignore non-data edges. 14870b57cec5SDimitry Andric if (PredDep.getKind() != SDep::Data 14880b57cec5SDimitry Andric || PredDep.getSUnit()->isBoundaryNode()) { 14890b57cec5SDimitry Andric continue; 14900b57cec5SDimitry Andric } 14910b57cec5SDimitry Andric // An already visited edge is a cross edge, assuming an acyclic DAG. 14920b57cec5SDimitry Andric if (Impl.isVisited(PredDep.getSUnit())) { 14930b57cec5SDimitry Andric Impl.visitCrossEdge(PredDep, DFS.getCurr()); 14940b57cec5SDimitry Andric continue; 14950b57cec5SDimitry Andric } 14960b57cec5SDimitry Andric Impl.visitPreorder(PredDep.getSUnit()); 14970b57cec5SDimitry Andric DFS.follow(PredDep.getSUnit()); 14980b57cec5SDimitry Andric } 14990b57cec5SDimitry Andric // Visit the top of the stack in postorder and backtrack. 15000b57cec5SDimitry Andric const SUnit *Child = DFS.getCurr(); 15010b57cec5SDimitry Andric const SDep *PredDep = DFS.backtrack(); 15020b57cec5SDimitry Andric Impl.visitPostorderNode(Child); 15030b57cec5SDimitry Andric if (PredDep) 15040b57cec5SDimitry Andric Impl.visitPostorderEdge(*PredDep, DFS.getCurr()); 15050b57cec5SDimitry Andric if (DFS.isComplete()) 15060b57cec5SDimitry Andric break; 15070b57cec5SDimitry Andric } 15080b57cec5SDimitry Andric } 15090b57cec5SDimitry Andric Impl.finalize(); 15100b57cec5SDimitry Andric } 15110b57cec5SDimitry Andric 15120b57cec5SDimitry Andric /// The root of the given SubtreeID was just scheduled. For all subtrees 15130b57cec5SDimitry Andric /// connected to this tree, record the depth of the connection so that the 15140b57cec5SDimitry Andric /// nearest connected subtrees can be prioritized. 15150b57cec5SDimitry Andric void SchedDFSResult::scheduleTree(unsigned SubtreeID) { 15160b57cec5SDimitry Andric for (const Connection &C : SubtreeConnections[SubtreeID]) { 15170b57cec5SDimitry Andric SubtreeConnectLevels[C.TreeID] = 15180b57cec5SDimitry Andric std::max(SubtreeConnectLevels[C.TreeID], C.Level); 15190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @" 15200b57cec5SDimitry Andric << SubtreeConnectLevels[C.TreeID] << '\n'); 15210b57cec5SDimitry Andric } 15220b57cec5SDimitry Andric } 15230b57cec5SDimitry Andric 15240b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 15250b57cec5SDimitry Andric LLVM_DUMP_METHOD void ILPValue::print(raw_ostream &OS) const { 15260b57cec5SDimitry Andric OS << InstrCount << " / " << Length << " = "; 15270b57cec5SDimitry Andric if (!Length) 15280b57cec5SDimitry Andric OS << "BADILP"; 15290b57cec5SDimitry Andric else 15300b57cec5SDimitry Andric OS << format("%g", ((double)InstrCount / Length)); 15310b57cec5SDimitry Andric } 15320b57cec5SDimitry Andric 15330b57cec5SDimitry Andric LLVM_DUMP_METHOD void ILPValue::dump() const { 15340b57cec5SDimitry Andric dbgs() << *this << '\n'; 15350b57cec5SDimitry Andric } 15360b57cec5SDimitry Andric 15370b57cec5SDimitry Andric namespace llvm { 15380b57cec5SDimitry Andric 153906c3fb27SDimitry Andric LLVM_ATTRIBUTE_UNUSED 15400b57cec5SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { 15410b57cec5SDimitry Andric Val.print(OS); 15420b57cec5SDimitry Andric return OS; 15430b57cec5SDimitry Andric } 15440b57cec5SDimitry Andric 15450b57cec5SDimitry Andric } // end namespace llvm 15460b57cec5SDimitry Andric 15470b57cec5SDimitry Andric #endif 1548