10b57cec5SDimitry Andric //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// \file 90b57cec5SDimitry Andric /// 100b57cec5SDimitry Andric /// This file implements the functionalities used by the BottleneckAnalysis 110b57cec5SDimitry Andric /// to report bottleneck info. 120b57cec5SDimitry Andric /// 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "Views/BottleneckAnalysis.h" 160b57cec5SDimitry Andric #include "llvm/MC/MCInst.h" 170b57cec5SDimitry Andric #include "llvm/MCA/Support.h" 180b57cec5SDimitry Andric #include "llvm/Support/Format.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric namespace llvm { 210b57cec5SDimitry Andric namespace mca { 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric #define DEBUG_TYPE "llvm-mca" 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric PressureTracker::PressureTracker(const MCSchedModel &Model) 260b57cec5SDimitry Andric : SM(Model), 270b57cec5SDimitry Andric ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0), 280b57cec5SDimitry Andric ProcResID2Mask(Model.getNumProcResourceKinds(), 0), 290b57cec5SDimitry Andric ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0), 300b57cec5SDimitry Andric ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) { 310b57cec5SDimitry Andric computeProcResourceMasks(SM, ProcResID2Mask); 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric // Ignore the invalid resource at index zero. 340b57cec5SDimitry Andric unsigned NextResourceUsersIdx = 0; 350b57cec5SDimitry Andric for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) { 360b57cec5SDimitry Andric const MCProcResourceDesc &ProcResource = *SM.getProcResource(I); 370b57cec5SDimitry Andric ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx; 380b57cec5SDimitry Andric NextResourceUsersIdx += ProcResource.NumUnits; 390b57cec5SDimitry Andric uint64_t ResourceMask = ProcResID2Mask[I]; 400b57cec5SDimitry Andric ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I; 410b57cec5SDimitry Andric } 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric ResourceUsers.resize(NextResourceUsersIdx); 440b57cec5SDimitry Andric std::fill(ResourceUsers.begin(), ResourceUsers.end(), 450b57cec5SDimitry Andric std::make_pair<unsigned, unsigned>(~0U, 0U)); 460b57cec5SDimitry Andric } 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric void PressureTracker::getResourceUsers(uint64_t ResourceMask, 490b57cec5SDimitry Andric SmallVectorImpl<User> &Users) const { 500b57cec5SDimitry Andric unsigned Index = getResourceStateIndex(ResourceMask); 510b57cec5SDimitry Andric unsigned ProcResID = ResIdx2ProcResID[Index]; 520b57cec5SDimitry Andric const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID); 530b57cec5SDimitry Andric for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) { 540b57cec5SDimitry Andric const User U = getResourceUser(ProcResID, I); 5506c3fb27SDimitry Andric if (U.second && IPI.contains(U.first)) 560b57cec5SDimitry Andric Users.emplace_back(U); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric void PressureTracker::onInstructionDispatched(unsigned IID) { 610b57cec5SDimitry Andric IPI.insert(std::make_pair(IID, InstructionPressureInfo())); 620b57cec5SDimitry Andric } 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); } 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric void PressureTracker::handleInstructionIssuedEvent( 670b57cec5SDimitry Andric const HWInstructionIssuedEvent &Event) { 680b57cec5SDimitry Andric unsigned IID = Event.IR.getSourceIndex(); 690b57cec5SDimitry Andric for (const ResourceUse &Use : Event.UsedResources) { 700b57cec5SDimitry Andric const ResourceRef &RR = Use.first; 710b57cec5SDimitry Andric unsigned Index = ProcResID2ResourceUsersIndex[RR.first]; 7206c3fb27SDimitry Andric Index += llvm::countr_zero(RR.second); 730b57cec5SDimitry Andric ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator()); 740b57cec5SDimitry Andric } 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric void PressureTracker::updateResourcePressureDistribution( 780b57cec5SDimitry Andric uint64_t CumulativeMask) { 790b57cec5SDimitry Andric while (CumulativeMask) { 800b57cec5SDimitry Andric uint64_t Current = CumulativeMask & (-CumulativeMask); 810b57cec5SDimitry Andric unsigned ResIdx = getResourceStateIndex(Current); 820b57cec5SDimitry Andric unsigned ProcResID = ResIdx2ProcResID[ResIdx]; 830b57cec5SDimitry Andric uint64_t Mask = ProcResID2Mask[ProcResID]; 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric if (Mask == Current) { 860b57cec5SDimitry Andric ResourcePressureDistribution[ProcResID]++; 870b57cec5SDimitry Andric CumulativeMask ^= Current; 880b57cec5SDimitry Andric continue; 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric Mask ^= Current; 920b57cec5SDimitry Andric while (Mask) { 930b57cec5SDimitry Andric uint64_t SubUnit = Mask & (-Mask); 940b57cec5SDimitry Andric ResIdx = getResourceStateIndex(SubUnit); 950b57cec5SDimitry Andric ProcResID = ResIdx2ProcResID[ResIdx]; 960b57cec5SDimitry Andric ResourcePressureDistribution[ProcResID]++; 970b57cec5SDimitry Andric Mask ^= SubUnit; 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric CumulativeMask ^= Current; 1010b57cec5SDimitry Andric } 1020b57cec5SDimitry Andric } 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) { 1050b57cec5SDimitry Andric assert(Event.Reason != HWPressureEvent::INVALID && 1060b57cec5SDimitry Andric "Unexpected invalid event!"); 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric switch (Event.Reason) { 1090b57cec5SDimitry Andric default: 1100b57cec5SDimitry Andric break; 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric case HWPressureEvent::RESOURCES: { 1130b57cec5SDimitry Andric const uint64_t ResourceMask = Event.ResourceMask; 1140b57cec5SDimitry Andric updateResourcePressureDistribution(Event.ResourceMask); 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 1170b57cec5SDimitry Andric const Instruction &IS = *IR.getInstruction(); 1180b57cec5SDimitry Andric unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask; 1190b57cec5SDimitry Andric if (!BusyResources) 1200b57cec5SDimitry Andric continue; 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 1230b57cec5SDimitry Andric IPI[IID].ResourcePressureCycles++; 1240b57cec5SDimitry Andric } 1250b57cec5SDimitry Andric break; 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric case HWPressureEvent::REGISTER_DEPS: 1290b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 1300b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 1310b57cec5SDimitry Andric IPI[IID].RegisterPressureCycles++; 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric break; 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric case HWPressureEvent::MEMORY_DEPS: 1360b57cec5SDimitry Andric for (const InstRef &IR : Event.AffectedInstructions) { 1370b57cec5SDimitry Andric unsigned IID = IR.getSourceIndex(); 1380b57cec5SDimitry Andric IPI[IID].MemoryPressureCycles++; 1390b57cec5SDimitry Andric } 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric } 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric #ifndef NDEBUG 1440b57cec5SDimitry Andric void DependencyGraph::dumpDependencyEdge(raw_ostream &OS, 1450b57cec5SDimitry Andric const DependencyEdge &DepEdge, 1460b57cec5SDimitry Andric MCInstPrinter &MCIP) const { 1470b57cec5SDimitry Andric unsigned FromIID = DepEdge.FromIID; 1480b57cec5SDimitry Andric unsigned ToIID = DepEdge.ToIID; 1490b57cec5SDimitry Andric assert(FromIID < ToIID && "Graph should be acyclic!"); 1500b57cec5SDimitry Andric 1510b57cec5SDimitry Andric const DependencyEdge::Dependency &DE = DepEdge.Dep; 1520b57cec5SDimitry Andric assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!"); 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric OS << " FROM: " << FromIID << " TO: " << ToIID << " "; 1550b57cec5SDimitry Andric if (DE.Type == DependencyEdge::DT_REGISTER) { 1560b57cec5SDimitry Andric OS << " - REGISTER: "; 1570b57cec5SDimitry Andric MCIP.printRegName(OS, DE.ResourceOrRegID); 1580b57cec5SDimitry Andric } else if (DE.Type == DependencyEdge::DT_MEMORY) { 1590b57cec5SDimitry Andric OS << " - MEMORY"; 1600b57cec5SDimitry Andric } else { 1610b57cec5SDimitry Andric assert(DE.Type == DependencyEdge::DT_RESOURCE && 1620b57cec5SDimitry Andric "Unsupported dependency type!"); 1630b57cec5SDimitry Andric OS << " - RESOURCE MASK: " << DE.ResourceOrRegID; 1640b57cec5SDimitry Andric } 1658bcb0991SDimitry Andric OS << " - COST: " << DE.Cost << '\n'; 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric #endif // NDEBUG 1680b57cec5SDimitry Andric 1698bcb0991SDimitry Andric void DependencyGraph::pruneEdges(unsigned Iterations) { 1708bcb0991SDimitry Andric for (DGNode &N : Nodes) { 1718bcb0991SDimitry Andric unsigned NumPruned = 0; 1728bcb0991SDimitry Andric const unsigned Size = N.OutgoingEdges.size(); 1738bcb0991SDimitry Andric // Use a cut-off threshold to prune edges with a low frequency. 1748bcb0991SDimitry Andric for (unsigned I = 0, E = Size; I < E; ++I) { 1758bcb0991SDimitry Andric DependencyEdge &Edge = N.OutgoingEdges[I]; 1768bcb0991SDimitry Andric if (Edge.Frequency == Iterations) 1778bcb0991SDimitry Andric continue; 1788bcb0991SDimitry Andric double Factor = (double)Edge.Frequency / Iterations; 1798bcb0991SDimitry Andric if (0.10 < Factor) 1808bcb0991SDimitry Andric continue; 1818bcb0991SDimitry Andric Nodes[Edge.ToIID].NumPredecessors--; 1828bcb0991SDimitry Andric std::swap(Edge, N.OutgoingEdges[E - 1]); 1838bcb0991SDimitry Andric --E; 1848bcb0991SDimitry Andric ++NumPruned; 1858bcb0991SDimitry Andric } 1868bcb0991SDimitry Andric 1878bcb0991SDimitry Andric if (NumPruned) 1888bcb0991SDimitry Andric N.OutgoingEdges.resize(Size - NumPruned); 1898bcb0991SDimitry Andric } 1908bcb0991SDimitry Andric } 1918bcb0991SDimitry Andric 1920b57cec5SDimitry Andric void DependencyGraph::initializeRootSet( 1930b57cec5SDimitry Andric SmallVectorImpl<unsigned> &RootSet) const { 1940b57cec5SDimitry Andric for (unsigned I = 0, E = Nodes.size(); I < E; ++I) { 1950b57cec5SDimitry Andric const DGNode &N = Nodes[I]; 1960b57cec5SDimitry Andric if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty()) 1970b57cec5SDimitry Andric RootSet.emplace_back(I); 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric } 2000b57cec5SDimitry Andric 201fe6060f1SDimitry Andric void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet, 202fe6060f1SDimitry Andric unsigned Iterations) { 2030b57cec5SDimitry Andric SmallVector<unsigned, 8> ToVisit; 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric // A critical sequence is computed as the longest path from a node of the 2060b57cec5SDimitry Andric // RootSet to a leaf node (i.e. a node with no successors). The RootSet is 2070b57cec5SDimitry Andric // composed of nodes with at least one successor, and no predecessors. 2080b57cec5SDimitry Andric // 2090b57cec5SDimitry Andric // Each node of the graph starts with an initial default cost of zero. The 2100b57cec5SDimitry Andric // cost of a node is a measure of criticality: the higher the cost, the bigger 2110b57cec5SDimitry Andric // is the performance impact. 2128bcb0991SDimitry Andric // For register and memory dependencies, the cost is a function of the write 2138bcb0991SDimitry Andric // latency as well as the actual delay (in cycles) caused to users. 2148bcb0991SDimitry Andric // For processor resource dependencies, the cost is a function of the resource 2158bcb0991SDimitry Andric // pressure. Resource interferences with low frequency values are ignored. 2160b57cec5SDimitry Andric // 2170b57cec5SDimitry Andric // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of 2180b57cec5SDimitry Andric // the inner loop selects (i.e. visits) a node N from a set of `unvisited 2190b57cec5SDimitry Andric // nodes`, and then propagates the cost of N to all its neighbors. 2200b57cec5SDimitry Andric // 2210b57cec5SDimitry Andric // The `unvisited nodes` set initially contains all the nodes from the 2220b57cec5SDimitry Andric // RootSet. A node N is added to the `unvisited nodes` if all its 2230b57cec5SDimitry Andric // predecessors have been visited already. 2240b57cec5SDimitry Andric // 2250b57cec5SDimitry Andric // For simplicity, every node tracks the number of unvisited incoming edges in 2260b57cec5SDimitry Andric // field `NumVisitedPredecessors`. When the value of that field drops to 2270b57cec5SDimitry Andric // zero, then the corresponding node is added to a `ToVisit` set. 2280b57cec5SDimitry Andric // 2290b57cec5SDimitry Andric // At the end of every iteration of the outer loop, set `ToVisit` becomes our 2300b57cec5SDimitry Andric // new `unvisited nodes` set. 2310b57cec5SDimitry Andric // 2320b57cec5SDimitry Andric // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet) 2330b57cec5SDimitry Andric // is empty. This algorithm works under the assumption that the graph is 2340b57cec5SDimitry Andric // acyclic. 2350b57cec5SDimitry Andric do { 2360b57cec5SDimitry Andric for (unsigned IID : RootSet) { 2370b57cec5SDimitry Andric const DGNode &N = Nodes[IID]; 2380b57cec5SDimitry Andric for (const DependencyEdge &DepEdge : N.OutgoingEdges) { 2390b57cec5SDimitry Andric unsigned ToIID = DepEdge.ToIID; 2400b57cec5SDimitry Andric DGNode &To = Nodes[ToIID]; 2410b57cec5SDimitry Andric uint64_t Cost = N.Cost + DepEdge.Dep.Cost; 2420b57cec5SDimitry Andric // Check if this is the most expensive incoming edge seen so far. In 2430b57cec5SDimitry Andric // case, update the total cost of the destination node (ToIID), as well 2440b57cec5SDimitry Andric // its field `CriticalPredecessor`. 2450b57cec5SDimitry Andric if (Cost > To.Cost) { 2460b57cec5SDimitry Andric To.CriticalPredecessor = DepEdge; 2470b57cec5SDimitry Andric To.Cost = Cost; 2480b57cec5SDimitry Andric To.Depth = N.Depth + 1; 2490b57cec5SDimitry Andric } 2500b57cec5SDimitry Andric To.NumVisitedPredecessors++; 2510b57cec5SDimitry Andric if (To.NumVisitedPredecessors == To.NumPredecessors) 2520b57cec5SDimitry Andric ToVisit.emplace_back(ToIID); 2530b57cec5SDimitry Andric } 2540b57cec5SDimitry Andric } 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric std::swap(RootSet, ToVisit); 2570b57cec5SDimitry Andric ToVisit.clear(); 2580b57cec5SDimitry Andric } while (!RootSet.empty()); 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric void DependencyGraph::getCriticalSequence( 2620b57cec5SDimitry Andric SmallVectorImpl<const DependencyEdge *> &Seq) const { 2630b57cec5SDimitry Andric // At this stage, nodes of the graph have been already visited, and costs have 2640b57cec5SDimitry Andric // been propagated through the edges (see method `propagateThroughEdges()`). 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric // Identify the node N with the highest cost in the graph. By construction, 2670b57cec5SDimitry Andric // that node is the last instruction of our critical sequence. 2680b57cec5SDimitry Andric // Field N.Depth would tell us the total length of the sequence. 2690b57cec5SDimitry Andric // 270fe6060f1SDimitry Andric // To obtain the sequence of critical edges, we simply follow the chain of 271fe6060f1SDimitry Andric // critical predecessors starting from node N (field 272fe6060f1SDimitry Andric // DGNode::CriticalPredecessor). 273*0fca6ea1SDimitry Andric const auto It = 274*0fca6ea1SDimitry Andric llvm::max_element(Nodes, [](const DGNode &Lhs, const DGNode &Rhs) { 275*0fca6ea1SDimitry Andric return Lhs.Cost < Rhs.Cost; 276*0fca6ea1SDimitry Andric }); 2770b57cec5SDimitry Andric unsigned IID = std::distance(Nodes.begin(), It); 2780b57cec5SDimitry Andric Seq.resize(Nodes[IID].Depth); 2790eae32dcSDimitry Andric for (const DependencyEdge *&DE : llvm::reverse(Seq)) { 2800b57cec5SDimitry Andric const DGNode &N = Nodes[IID]; 2810eae32dcSDimitry Andric DE = &N.CriticalPredecessor; 2820b57cec5SDimitry Andric IID = N.CriticalPredecessor.FromIID; 2830b57cec5SDimitry Andric } 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric 286e8d8bef9SDimitry Andric void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS, 2870b57cec5SDimitry Andric const MCInst &MCI, 288e8d8bef9SDimitry Andric bool UseDifferentColor) const { 2890b57cec5SDimitry Andric FOS.PadToColumn(14); 2900b57cec5SDimitry Andric if (UseDifferentColor) 2910b57cec5SDimitry Andric FOS.changeColor(raw_ostream::CYAN, true, false); 292e8d8bef9SDimitry Andric FOS << printInstructionString(MCI); 2930b57cec5SDimitry Andric if (UseDifferentColor) 2940b57cec5SDimitry Andric FOS.resetColor(); 2950b57cec5SDimitry Andric } 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andric void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const { 2988bcb0991SDimitry Andric // Early exit if no bottlenecks were found during the simulation. 2998bcb0991SDimitry Andric if (!SeenStallCycles || !BPI.PressureIncreaseCycles) 3008bcb0991SDimitry Andric return; 3018bcb0991SDimitry Andric 3020b57cec5SDimitry Andric SmallVector<const DependencyEdge *, 16> Seq; 3030b57cec5SDimitry Andric DG.getCriticalSequence(Seq); 3040b57cec5SDimitry Andric if (Seq.empty()) 3050b57cec5SDimitry Andric return; 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric OS << "\nCritical sequence based on the simulation:\n\n"; 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric const DependencyEdge &FirstEdge = *Seq[0]; 310e8d8bef9SDimitry Andric ArrayRef<llvm::MCInst> Source = getSource(); 3110b57cec5SDimitry Andric unsigned FromIID = FirstEdge.FromIID % Source.size(); 3120b57cec5SDimitry Andric unsigned ToIID = FirstEdge.ToIID % Source.size(); 3130b57cec5SDimitry Andric bool IsLoopCarried = FromIID >= ToIID; 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric formatted_raw_ostream FOS(OS); 3160b57cec5SDimitry Andric FOS.PadToColumn(14); 3170b57cec5SDimitry Andric FOS << "Instruction"; 3180b57cec5SDimitry Andric FOS.PadToColumn(58); 3190b57cec5SDimitry Andric FOS << "Dependency Information"; 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric bool HasColors = FOS.has_colors(); 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric unsigned CurrentIID = 0; 3240b57cec5SDimitry Andric if (IsLoopCarried) { 3250b57cec5SDimitry Andric FOS << "\n +----< " << FromIID << "."; 326e8d8bef9SDimitry Andric printInstruction(FOS, Source[FromIID], HasColors); 3270b57cec5SDimitry Andric FOS << "\n |\n | < loop carried > \n |"; 3280b57cec5SDimitry Andric } else { 3290b57cec5SDimitry Andric while (CurrentIID < FromIID) { 3300b57cec5SDimitry Andric FOS << "\n " << CurrentIID << "."; 331e8d8bef9SDimitry Andric printInstruction(FOS, Source[CurrentIID]); 3320b57cec5SDimitry Andric CurrentIID++; 3330b57cec5SDimitry Andric } 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andric FOS << "\n +----< " << CurrentIID << "."; 336e8d8bef9SDimitry Andric printInstruction(FOS, Source[CurrentIID], HasColors); 3370b57cec5SDimitry Andric CurrentIID++; 3380b57cec5SDimitry Andric } 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric for (const DependencyEdge *&DE : Seq) { 3410b57cec5SDimitry Andric ToIID = DE->ToIID % Source.size(); 3420b57cec5SDimitry Andric unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID; 3430b57cec5SDimitry Andric 3440b57cec5SDimitry Andric while (CurrentIID < LastIID) { 3450b57cec5SDimitry Andric FOS << "\n | " << CurrentIID << "."; 346e8d8bef9SDimitry Andric printInstruction(FOS, Source[CurrentIID]); 3470b57cec5SDimitry Andric CurrentIID++; 3480b57cec5SDimitry Andric } 3490b57cec5SDimitry Andric 3500b57cec5SDimitry Andric if (CurrentIID == ToIID) { 3510b57cec5SDimitry Andric FOS << "\n +----> " << ToIID << "."; 352e8d8bef9SDimitry Andric printInstruction(FOS, Source[CurrentIID], HasColors); 3530b57cec5SDimitry Andric } else { 3540b57cec5SDimitry Andric FOS << "\n |\n | < loop carried > \n |" 3550b57cec5SDimitry Andric << "\n +----> " << ToIID << "."; 356e8d8bef9SDimitry Andric printInstruction(FOS, Source[ToIID], HasColors); 3570b57cec5SDimitry Andric } 3580b57cec5SDimitry Andric FOS.PadToColumn(58); 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric const DependencyEdge::Dependency &Dep = DE->Dep; 3610b57cec5SDimitry Andric if (HasColors) 3620b57cec5SDimitry Andric FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 3630b57cec5SDimitry Andric 3640b57cec5SDimitry Andric if (Dep.Type == DependencyEdge::DT_REGISTER) { 3650b57cec5SDimitry Andric FOS << "## REGISTER dependency: "; 3660b57cec5SDimitry Andric if (HasColors) 3670b57cec5SDimitry Andric FOS.changeColor(raw_ostream::MAGENTA, true, false); 368e8d8bef9SDimitry Andric getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID); 3690b57cec5SDimitry Andric } else if (Dep.Type == DependencyEdge::DT_MEMORY) { 3700b57cec5SDimitry Andric FOS << "## MEMORY dependency."; 3710b57cec5SDimitry Andric } else { 3720b57cec5SDimitry Andric assert(Dep.Type == DependencyEdge::DT_RESOURCE && 3730b57cec5SDimitry Andric "Unsupported dependency type!"); 3740b57cec5SDimitry Andric FOS << "## RESOURCE interference: "; 3750b57cec5SDimitry Andric if (HasColors) 3760b57cec5SDimitry Andric FOS.changeColor(raw_ostream::MAGENTA, true, false); 3770b57cec5SDimitry Andric FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID); 3780b57cec5SDimitry Andric if (HasColors) { 3790b57cec5SDimitry Andric FOS.resetColor(); 3800b57cec5SDimitry Andric FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false); 3810b57cec5SDimitry Andric } 3820b57cec5SDimitry Andric FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations) 3830b57cec5SDimitry Andric << "% ]"; 3840b57cec5SDimitry Andric } 3850b57cec5SDimitry Andric if (HasColors) 3860b57cec5SDimitry Andric FOS.resetColor(); 3870b57cec5SDimitry Andric ++CurrentIID; 3880b57cec5SDimitry Andric } 3890b57cec5SDimitry Andric 3900b57cec5SDimitry Andric while (CurrentIID < Source.size()) { 3910b57cec5SDimitry Andric FOS << "\n " << CurrentIID << "."; 392e8d8bef9SDimitry Andric printInstruction(FOS, Source[CurrentIID]); 3930b57cec5SDimitry Andric CurrentIID++; 3940b57cec5SDimitry Andric } 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric FOS << '\n'; 3970b57cec5SDimitry Andric FOS.flush(); 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric #ifndef NDEBUG 4010b57cec5SDimitry Andric void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const { 4020b57cec5SDimitry Andric OS << "\nREG DEPS\n"; 4030b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 4040b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 4050b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_REGISTER) 4060b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric OS << "\nMEM DEPS\n"; 4090b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 4100b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 4110b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_MEMORY) 4120b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric OS << "\nRESOURCE DEPS\n"; 4150b57cec5SDimitry Andric for (const DGNode &Node : Nodes) 4160b57cec5SDimitry Andric for (const DependencyEdge &DE : Node.OutgoingEdges) 4170b57cec5SDimitry Andric if (DE.Dep.Type == DependencyEdge::DT_RESOURCE) 4180b57cec5SDimitry Andric dumpDependencyEdge(OS, DE, MCIP); 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric #endif // NDEBUG 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric void DependencyGraph::addDependency(unsigned From, unsigned To, 4230b57cec5SDimitry Andric DependencyEdge::Dependency &&Dep) { 4240b57cec5SDimitry Andric DGNode &NodeFrom = Nodes[From]; 4250b57cec5SDimitry Andric DGNode &NodeTo = Nodes[To]; 4260b57cec5SDimitry Andric SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges; 4270b57cec5SDimitry Andric 4280b57cec5SDimitry Andric auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) { 4290b57cec5SDimitry Andric return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID; 4300b57cec5SDimitry Andric }); 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric if (It != Vec.end()) { 4330b57cec5SDimitry Andric It->Dep.Cost += Dep.Cost; 4340b57cec5SDimitry Andric It->Frequency++; 4350b57cec5SDimitry Andric return; 4360b57cec5SDimitry Andric } 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric DependencyEdge DE = {Dep, From, To, 1}; 4390b57cec5SDimitry Andric Vec.emplace_back(DE); 4400b57cec5SDimitry Andric NodeTo.NumPredecessors++; 4410b57cec5SDimitry Andric } 4420b57cec5SDimitry Andric 4430b57cec5SDimitry Andric BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti, 4440b57cec5SDimitry Andric MCInstPrinter &Printer, 4450b57cec5SDimitry Andric ArrayRef<MCInst> S, unsigned NumIter) 446e8d8bef9SDimitry Andric : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()), 447e8d8bef9SDimitry Andric DG(S.size() * 3), Iterations(NumIter), TotalCycles(0), 4480b57cec5SDimitry Andric PressureIncreasedBecauseOfResources(false), 4490b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies(false), 4500b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies(false), 4510b57cec5SDimitry Andric SeenStallCycles(false), BPI() {} 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To, 4540b57cec5SDimitry Andric unsigned RegID, unsigned Cost) { 4550b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 456e8d8bef9SDimitry Andric unsigned SourceSize = getSource().size(); 4570b57cec5SDimitry Andric if (IsLoopCarried) { 4580b57cec5SDimitry Andric DG.addRegisterDep(From, To + SourceSize, RegID, Cost); 4590b57cec5SDimitry Andric DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost); 4600b57cec5SDimitry Andric return; 4610b57cec5SDimitry Andric } 4620b57cec5SDimitry Andric DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost); 4630b57cec5SDimitry Andric } 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To, 4660b57cec5SDimitry Andric unsigned Cost) { 4670b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 468e8d8bef9SDimitry Andric unsigned SourceSize = getSource().size(); 4690b57cec5SDimitry Andric if (IsLoopCarried) { 4700b57cec5SDimitry Andric DG.addMemoryDep(From, To + SourceSize, Cost); 4710b57cec5SDimitry Andric DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost); 4720b57cec5SDimitry Andric return; 4730b57cec5SDimitry Andric } 4740b57cec5SDimitry Andric DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost); 4750b57cec5SDimitry Andric } 4760b57cec5SDimitry Andric 4770b57cec5SDimitry Andric void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To, 4780b57cec5SDimitry Andric uint64_t Mask, unsigned Cost) { 4790b57cec5SDimitry Andric bool IsLoopCarried = From >= To; 480e8d8bef9SDimitry Andric unsigned SourceSize = getSource().size(); 4810b57cec5SDimitry Andric if (IsLoopCarried) { 4820b57cec5SDimitry Andric DG.addResourceDep(From, To + SourceSize, Mask, Cost); 4830b57cec5SDimitry Andric DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost); 4840b57cec5SDimitry Andric return; 4850b57cec5SDimitry Andric } 4860b57cec5SDimitry Andric DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost); 4870b57cec5SDimitry Andric } 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andric void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) { 4900b57cec5SDimitry Andric const unsigned IID = Event.IR.getSourceIndex(); 4910b57cec5SDimitry Andric if (Event.Type == HWInstructionEvent::Dispatched) { 4920b57cec5SDimitry Andric Tracker.onInstructionDispatched(IID); 4930b57cec5SDimitry Andric return; 4940b57cec5SDimitry Andric } 4950b57cec5SDimitry Andric if (Event.Type == HWInstructionEvent::Executed) { 4960b57cec5SDimitry Andric Tracker.onInstructionExecuted(IID); 4970b57cec5SDimitry Andric return; 4980b57cec5SDimitry Andric } 4990b57cec5SDimitry Andric 5000b57cec5SDimitry Andric if (Event.Type != HWInstructionEvent::Issued) 5010b57cec5SDimitry Andric return; 5020b57cec5SDimitry Andric 503e8d8bef9SDimitry Andric ArrayRef<llvm::MCInst> Source = getSource(); 5040b57cec5SDimitry Andric const Instruction &IS = *Event.IR.getInstruction(); 5050b57cec5SDimitry Andric unsigned To = IID % Source.size(); 5060b57cec5SDimitry Andric 5070b57cec5SDimitry Andric unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID); 5080b57cec5SDimitry Andric uint64_t ResourceMask = IS.getCriticalResourceMask(); 5090b57cec5SDimitry Andric SmallVector<std::pair<unsigned, unsigned>, 4> Users; 5100b57cec5SDimitry Andric while (ResourceMask) { 5110b57cec5SDimitry Andric uint64_t Current = ResourceMask & (-ResourceMask); 5120b57cec5SDimitry Andric Tracker.getResourceUsers(Current, Users); 5130b57cec5SDimitry Andric for (const std::pair<unsigned, unsigned> &U : Users) 5140b57cec5SDimitry Andric addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles); 5150b57cec5SDimitry Andric Users.clear(); 5160b57cec5SDimitry Andric ResourceMask ^= Current; 5170b57cec5SDimitry Andric } 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric const CriticalDependency &RegDep = IS.getCriticalRegDep(); 5200b57cec5SDimitry Andric if (RegDep.Cycles) { 5210b57cec5SDimitry Andric Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID); 5220b57cec5SDimitry Andric unsigned From = RegDep.IID % Source.size(); 5230b57cec5SDimitry Andric addRegisterDep(From, To, RegDep.RegID, Cycles); 5240b57cec5SDimitry Andric } 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric const CriticalDependency &MemDep = IS.getCriticalMemDep(); 5270b57cec5SDimitry Andric if (MemDep.Cycles) { 5280b57cec5SDimitry Andric Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID); 5290b57cec5SDimitry Andric unsigned From = MemDep.IID % Source.size(); 5300b57cec5SDimitry Andric addMemoryDep(From, To, Cycles); 5310b57cec5SDimitry Andric } 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric Tracker.handleInstructionIssuedEvent( 5340b57cec5SDimitry Andric static_cast<const HWInstructionIssuedEvent &>(Event)); 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric // Check if this is the last simulated instruction. 5370b57cec5SDimitry Andric if (IID == ((Iterations * Source.size()) - 1)) 5388bcb0991SDimitry Andric DG.finalizeGraph(Iterations); 5390b57cec5SDimitry Andric } 5400b57cec5SDimitry Andric 5410b57cec5SDimitry Andric void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) { 5420b57cec5SDimitry Andric assert(Event.Reason != HWPressureEvent::INVALID && 5430b57cec5SDimitry Andric "Unexpected invalid event!"); 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric Tracker.handlePressureEvent(Event); 5460b57cec5SDimitry Andric 5470b57cec5SDimitry Andric switch (Event.Reason) { 5480b57cec5SDimitry Andric default: 5490b57cec5SDimitry Andric break; 5500b57cec5SDimitry Andric 5510b57cec5SDimitry Andric case HWPressureEvent::RESOURCES: 5520b57cec5SDimitry Andric PressureIncreasedBecauseOfResources = true; 5530b57cec5SDimitry Andric break; 5540b57cec5SDimitry Andric case HWPressureEvent::REGISTER_DEPS: 5550b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies = true; 5560b57cec5SDimitry Andric break; 5570b57cec5SDimitry Andric case HWPressureEvent::MEMORY_DEPS: 5580b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies = true; 5590b57cec5SDimitry Andric break; 5600b57cec5SDimitry Andric } 5610b57cec5SDimitry Andric } 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric void BottleneckAnalysis::onCycleEnd() { 5640b57cec5SDimitry Andric ++TotalCycles; 5650b57cec5SDimitry Andric 5660b57cec5SDimitry Andric bool PressureIncreasedBecauseOfDataDependencies = 5670b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies || 5680b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies; 5690b57cec5SDimitry Andric if (!PressureIncreasedBecauseOfResources && 5700b57cec5SDimitry Andric !PressureIncreasedBecauseOfDataDependencies) 5710b57cec5SDimitry Andric return; 5720b57cec5SDimitry Andric 5730b57cec5SDimitry Andric ++BPI.PressureIncreaseCycles; 5740b57cec5SDimitry Andric if (PressureIncreasedBecauseOfRegisterDependencies) 5750b57cec5SDimitry Andric ++BPI.RegisterDependencyCycles; 5760b57cec5SDimitry Andric if (PressureIncreasedBecauseOfMemoryDependencies) 5770b57cec5SDimitry Andric ++BPI.MemoryDependencyCycles; 5780b57cec5SDimitry Andric if (PressureIncreasedBecauseOfDataDependencies) 5790b57cec5SDimitry Andric ++BPI.DataDependencyCycles; 5800b57cec5SDimitry Andric if (PressureIncreasedBecauseOfResources) 5810b57cec5SDimitry Andric ++BPI.ResourcePressureCycles; 5820b57cec5SDimitry Andric PressureIncreasedBecauseOfResources = false; 5830b57cec5SDimitry Andric PressureIncreasedBecauseOfRegisterDependencies = false; 5840b57cec5SDimitry Andric PressureIncreasedBecauseOfMemoryDependencies = false; 5850b57cec5SDimitry Andric } 5860b57cec5SDimitry Andric 5870b57cec5SDimitry Andric void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const { 5880b57cec5SDimitry Andric if (!SeenStallCycles || !BPI.PressureIncreaseCycles) { 5890b57cec5SDimitry Andric OS << "\n\nNo resource or data dependency bottlenecks discovered.\n"; 5900b57cec5SDimitry Andric return; 5910b57cec5SDimitry Andric } 5920b57cec5SDimitry Andric 5930b57cec5SDimitry Andric double PressurePerCycle = 5940b57cec5SDimitry Andric (double)BPI.PressureIncreaseCycles * 100 / TotalCycles; 5950b57cec5SDimitry Andric double ResourcePressurePerCycle = 5960b57cec5SDimitry Andric (double)BPI.ResourcePressureCycles * 100 / TotalCycles; 5970b57cec5SDimitry Andric double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles; 5980b57cec5SDimitry Andric double RegDepPressurePerCycle = 5990b57cec5SDimitry Andric (double)BPI.RegisterDependencyCycles * 100 / TotalCycles; 6000b57cec5SDimitry Andric double MemDepPressurePerCycle = 6010b57cec5SDimitry Andric (double)BPI.MemoryDependencyCycles * 100 / TotalCycles; 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric OS << "\n\nCycles with backend pressure increase [ " 6040b57cec5SDimitry Andric << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]"; 6050b57cec5SDimitry Andric 6060b57cec5SDimitry Andric OS << "\nThroughput Bottlenecks: " 6070b57cec5SDimitry Andric << "\n Resource Pressure [ " 6080b57cec5SDimitry Andric << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100) 6090b57cec5SDimitry Andric << "% ]"; 6100b57cec5SDimitry Andric 6110b57cec5SDimitry Andric if (BPI.PressureIncreaseCycles) { 6120b57cec5SDimitry Andric ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution(); 613e8d8bef9SDimitry Andric const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); 6140b57cec5SDimitry Andric for (unsigned I = 0, E = Distribution.size(); I < E; ++I) { 6155f757f3fSDimitry Andric unsigned ReleaseAtCycles = Distribution[I]; 6165f757f3fSDimitry Andric if (ReleaseAtCycles) { 6175f757f3fSDimitry Andric double Frequency = (double)ReleaseAtCycles * 100 / TotalCycles; 6180b57cec5SDimitry Andric const MCProcResourceDesc &PRDesc = *SM.getProcResource(I); 6190b57cec5SDimitry Andric OS << "\n - " << PRDesc.Name << " [ " 6200b57cec5SDimitry Andric << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]"; 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric } 6230b57cec5SDimitry Andric } 6240b57cec5SDimitry Andric 6250b57cec5SDimitry Andric OS << "\n Data Dependencies: [ " 6260b57cec5SDimitry Andric << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]"; 6270b57cec5SDimitry Andric OS << "\n - Register Dependencies [ " 6280b57cec5SDimitry Andric << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100) 6290b57cec5SDimitry Andric << "% ]"; 6300b57cec5SDimitry Andric OS << "\n - Memory Dependencies [ " 6310b57cec5SDimitry Andric << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100) 6320b57cec5SDimitry Andric << "% ]\n"; 6330b57cec5SDimitry Andric } 6340b57cec5SDimitry Andric 6350b57cec5SDimitry Andric void BottleneckAnalysis::printView(raw_ostream &OS) const { 6360b57cec5SDimitry Andric std::string Buffer; 6370b57cec5SDimitry Andric raw_string_ostream TempStream(Buffer); 6380b57cec5SDimitry Andric printBottleneckHints(TempStream); 6390b57cec5SDimitry Andric TempStream.flush(); 6400b57cec5SDimitry Andric OS << Buffer; 6410b57cec5SDimitry Andric printCriticalSequence(OS); 6420b57cec5SDimitry Andric } 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric } // namespace mca. 6450b57cec5SDimitry Andric } // namespace llvm 646