1*4824e7fdSDimitry Andric //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 2*4824e7fdSDimitry Andric // 3*4824e7fdSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*4824e7fdSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*4824e7fdSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*4824e7fdSDimitry Andric // 7*4824e7fdSDimitry Andric //===----------------------------------------------------------------------===// 8*4824e7fdSDimitry Andric // 9*4824e7fdSDimitry Andric // This file implements a profile inference algorithm. Given an incomplete and 10*4824e7fdSDimitry Andric // possibly imprecise block counts, the algorithm reconstructs realistic block 11*4824e7fdSDimitry Andric // and edge counts that satisfy flow conservation rules, while minimally modify 12*4824e7fdSDimitry Andric // input block counts. 13*4824e7fdSDimitry Andric // 14*4824e7fdSDimitry Andric //===----------------------------------------------------------------------===// 15*4824e7fdSDimitry Andric 16*4824e7fdSDimitry Andric #include "llvm/Transforms/Utils/SampleProfileInference.h" 17*4824e7fdSDimitry Andric #include "llvm/Support/Debug.h" 18*4824e7fdSDimitry Andric #include <queue> 19*4824e7fdSDimitry Andric #include <set> 20*4824e7fdSDimitry Andric 21*4824e7fdSDimitry Andric using namespace llvm; 22*4824e7fdSDimitry Andric #define DEBUG_TYPE "sample-profile-inference" 23*4824e7fdSDimitry Andric 24*4824e7fdSDimitry Andric namespace { 25*4824e7fdSDimitry Andric 26*4824e7fdSDimitry Andric /// A value indicating an infinite flow/capacity/weight of a block/edge. 27*4824e7fdSDimitry Andric /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 28*4824e7fdSDimitry Andric /// during the execution. 29*4824e7fdSDimitry Andric static constexpr int64_t INF = ((int64_t)1) << 50; 30*4824e7fdSDimitry Andric 31*4824e7fdSDimitry Andric /// The minimum-cost maximum flow algorithm. 32*4824e7fdSDimitry Andric /// 33*4824e7fdSDimitry Andric /// The algorithm finds the maximum flow of minimum cost on a given (directed) 34*4824e7fdSDimitry Andric /// network using a modified version of the classical Moore-Bellman-Ford 35*4824e7fdSDimitry Andric /// approach. The algorithm applies a number of augmentation iterations in which 36*4824e7fdSDimitry Andric /// flow is sent along paths of positive capacity from the source to the sink. 37*4824e7fdSDimitry Andric /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 38*4824e7fdSDimitry Andric /// where m is the number of edges, n is the number of vertices, and v(f) is the 39*4824e7fdSDimitry Andric /// value of the maximum flow. However, the observed running time on typical 40*4824e7fdSDimitry Andric /// instances is sub-quadratic, that is, o(n^2). 41*4824e7fdSDimitry Andric /// 42*4824e7fdSDimitry Andric /// The input is a set of edges with specified costs and capacities, and a pair 43*4824e7fdSDimitry Andric /// of nodes (source and sink). The output is the flow along each edge of the 44*4824e7fdSDimitry Andric /// minimum total cost respecting the given edge capacities. 45*4824e7fdSDimitry Andric class MinCostMaxFlow { 46*4824e7fdSDimitry Andric public: 47*4824e7fdSDimitry Andric // Initialize algorithm's data structures for a network of a given size. 48*4824e7fdSDimitry Andric void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 49*4824e7fdSDimitry Andric Source = SourceNode; 50*4824e7fdSDimitry Andric Target = SinkNode; 51*4824e7fdSDimitry Andric 52*4824e7fdSDimitry Andric Nodes = std::vector<Node>(NodeCount); 53*4824e7fdSDimitry Andric Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 54*4824e7fdSDimitry Andric } 55*4824e7fdSDimitry Andric 56*4824e7fdSDimitry Andric // Run the algorithm. 57*4824e7fdSDimitry Andric int64_t run() { 58*4824e7fdSDimitry Andric // Find an augmenting path and update the flow along the path 59*4824e7fdSDimitry Andric size_t AugmentationIters = 0; 60*4824e7fdSDimitry Andric while (findAugmentingPath()) { 61*4824e7fdSDimitry Andric augmentFlowAlongPath(); 62*4824e7fdSDimitry Andric AugmentationIters++; 63*4824e7fdSDimitry Andric } 64*4824e7fdSDimitry Andric 65*4824e7fdSDimitry Andric // Compute the total flow and its cost 66*4824e7fdSDimitry Andric int64_t TotalCost = 0; 67*4824e7fdSDimitry Andric int64_t TotalFlow = 0; 68*4824e7fdSDimitry Andric for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 69*4824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 70*4824e7fdSDimitry Andric if (Edge.Flow > 0) { 71*4824e7fdSDimitry Andric TotalCost += Edge.Cost * Edge.Flow; 72*4824e7fdSDimitry Andric if (Src == Source) 73*4824e7fdSDimitry Andric TotalFlow += Edge.Flow; 74*4824e7fdSDimitry Andric } 75*4824e7fdSDimitry Andric } 76*4824e7fdSDimitry Andric } 77*4824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 78*4824e7fdSDimitry Andric << " iterations with " << TotalFlow << " total flow" 79*4824e7fdSDimitry Andric << " of " << TotalCost << " cost\n"); 80*4824e7fdSDimitry Andric (void)TotalFlow; 81*4824e7fdSDimitry Andric return TotalCost; 82*4824e7fdSDimitry Andric } 83*4824e7fdSDimitry Andric 84*4824e7fdSDimitry Andric /// Adding an edge to the network with a specified capacity and a cost. 85*4824e7fdSDimitry Andric /// Multiple edges between a pair of nodes are allowed but self-edges 86*4824e7fdSDimitry Andric /// are not supported. 87*4824e7fdSDimitry Andric void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 88*4824e7fdSDimitry Andric assert(Capacity > 0 && "adding an edge of zero capacity"); 89*4824e7fdSDimitry Andric assert(Src != Dst && "loop edge are not supported"); 90*4824e7fdSDimitry Andric 91*4824e7fdSDimitry Andric Edge SrcEdge; 92*4824e7fdSDimitry Andric SrcEdge.Dst = Dst; 93*4824e7fdSDimitry Andric SrcEdge.Cost = Cost; 94*4824e7fdSDimitry Andric SrcEdge.Capacity = Capacity; 95*4824e7fdSDimitry Andric SrcEdge.Flow = 0; 96*4824e7fdSDimitry Andric SrcEdge.RevEdgeIndex = Edges[Dst].size(); 97*4824e7fdSDimitry Andric 98*4824e7fdSDimitry Andric Edge DstEdge; 99*4824e7fdSDimitry Andric DstEdge.Dst = Src; 100*4824e7fdSDimitry Andric DstEdge.Cost = -Cost; 101*4824e7fdSDimitry Andric DstEdge.Capacity = 0; 102*4824e7fdSDimitry Andric DstEdge.Flow = 0; 103*4824e7fdSDimitry Andric DstEdge.RevEdgeIndex = Edges[Src].size(); 104*4824e7fdSDimitry Andric 105*4824e7fdSDimitry Andric Edges[Src].push_back(SrcEdge); 106*4824e7fdSDimitry Andric Edges[Dst].push_back(DstEdge); 107*4824e7fdSDimitry Andric } 108*4824e7fdSDimitry Andric 109*4824e7fdSDimitry Andric /// Adding an edge to the network of infinite capacity and a given cost. 110*4824e7fdSDimitry Andric void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 111*4824e7fdSDimitry Andric addEdge(Src, Dst, INF, Cost); 112*4824e7fdSDimitry Andric } 113*4824e7fdSDimitry Andric 114*4824e7fdSDimitry Andric /// Get the total flow from a given source node. 115*4824e7fdSDimitry Andric /// Returns a list of pairs (target node, amount of flow to the target). 116*4824e7fdSDimitry Andric const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 117*4824e7fdSDimitry Andric std::vector<std::pair<uint64_t, int64_t>> Flow; 118*4824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 119*4824e7fdSDimitry Andric if (Edge.Flow > 0) 120*4824e7fdSDimitry Andric Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 121*4824e7fdSDimitry Andric } 122*4824e7fdSDimitry Andric return Flow; 123*4824e7fdSDimitry Andric } 124*4824e7fdSDimitry Andric 125*4824e7fdSDimitry Andric /// Get the total flow between a pair of nodes. 126*4824e7fdSDimitry Andric int64_t getFlow(uint64_t Src, uint64_t Dst) const { 127*4824e7fdSDimitry Andric int64_t Flow = 0; 128*4824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 129*4824e7fdSDimitry Andric if (Edge.Dst == Dst) { 130*4824e7fdSDimitry Andric Flow += Edge.Flow; 131*4824e7fdSDimitry Andric } 132*4824e7fdSDimitry Andric } 133*4824e7fdSDimitry Andric return Flow; 134*4824e7fdSDimitry Andric } 135*4824e7fdSDimitry Andric 136*4824e7fdSDimitry Andric /// A cost of increasing a block's count by one. 137*4824e7fdSDimitry Andric static constexpr int64_t AuxCostInc = 10; 138*4824e7fdSDimitry Andric /// A cost of decreasing a block's count by one. 139*4824e7fdSDimitry Andric static constexpr int64_t AuxCostDec = 20; 140*4824e7fdSDimitry Andric /// A cost of increasing a count of zero-weight block by one. 141*4824e7fdSDimitry Andric static constexpr int64_t AuxCostIncZero = 11; 142*4824e7fdSDimitry Andric /// A cost of increasing the entry block's count by one. 143*4824e7fdSDimitry Andric static constexpr int64_t AuxCostIncEntry = 40; 144*4824e7fdSDimitry Andric /// A cost of decreasing the entry block's count by one. 145*4824e7fdSDimitry Andric static constexpr int64_t AuxCostDecEntry = 10; 146*4824e7fdSDimitry Andric /// A cost of taking an unlikely jump. 147*4824e7fdSDimitry Andric static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20; 148*4824e7fdSDimitry Andric 149*4824e7fdSDimitry Andric private: 150*4824e7fdSDimitry Andric /// Check for existence of an augmenting path with a positive capacity. 151*4824e7fdSDimitry Andric bool findAugmentingPath() { 152*4824e7fdSDimitry Andric // Initialize data structures 153*4824e7fdSDimitry Andric for (auto &Node : Nodes) { 154*4824e7fdSDimitry Andric Node.Distance = INF; 155*4824e7fdSDimitry Andric Node.ParentNode = uint64_t(-1); 156*4824e7fdSDimitry Andric Node.ParentEdgeIndex = uint64_t(-1); 157*4824e7fdSDimitry Andric Node.Taken = false; 158*4824e7fdSDimitry Andric } 159*4824e7fdSDimitry Andric 160*4824e7fdSDimitry Andric std::queue<uint64_t> Queue; 161*4824e7fdSDimitry Andric Queue.push(Source); 162*4824e7fdSDimitry Andric Nodes[Source].Distance = 0; 163*4824e7fdSDimitry Andric Nodes[Source].Taken = true; 164*4824e7fdSDimitry Andric while (!Queue.empty()) { 165*4824e7fdSDimitry Andric uint64_t Src = Queue.front(); 166*4824e7fdSDimitry Andric Queue.pop(); 167*4824e7fdSDimitry Andric Nodes[Src].Taken = false; 168*4824e7fdSDimitry Andric // Although the residual network contains edges with negative costs 169*4824e7fdSDimitry Andric // (in particular, backward edges), it can be shown that there are no 170*4824e7fdSDimitry Andric // negative-weight cycles and the following two invariants are maintained: 171*4824e7fdSDimitry Andric // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 172*4824e7fdSDimitry Andric // where Dist is the length of the shortest path between two nodes. This 173*4824e7fdSDimitry Andric // allows to prune the search-space of the path-finding algorithm using 174*4824e7fdSDimitry Andric // the following early-stop criteria: 175*4824e7fdSDimitry Andric // -- If we find a path with zero-distance from Source to Target, stop the 176*4824e7fdSDimitry Andric // search, as the path is the shortest since Dist[Source, Target] >= 0; 177*4824e7fdSDimitry Andric // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 178*4824e7fdSDimitry Andric // process node V, as it is guaranteed _not_ to be on a shortest path 179*4824e7fdSDimitry Andric // from Source to Target; it follows from inequalities 180*4824e7fdSDimitry Andric // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 181*4824e7fdSDimitry Andric // >= Dist[Source, V] 182*4824e7fdSDimitry Andric if (Nodes[Target].Distance == 0) 183*4824e7fdSDimitry Andric break; 184*4824e7fdSDimitry Andric if (Nodes[Src].Distance > Nodes[Target].Distance) 185*4824e7fdSDimitry Andric continue; 186*4824e7fdSDimitry Andric 187*4824e7fdSDimitry Andric // Process adjacent edges 188*4824e7fdSDimitry Andric for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 189*4824e7fdSDimitry Andric auto &Edge = Edges[Src][EdgeIdx]; 190*4824e7fdSDimitry Andric if (Edge.Flow < Edge.Capacity) { 191*4824e7fdSDimitry Andric uint64_t Dst = Edge.Dst; 192*4824e7fdSDimitry Andric int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 193*4824e7fdSDimitry Andric if (Nodes[Dst].Distance > NewDistance) { 194*4824e7fdSDimitry Andric // Update the distance and the parent node/edge 195*4824e7fdSDimitry Andric Nodes[Dst].Distance = NewDistance; 196*4824e7fdSDimitry Andric Nodes[Dst].ParentNode = Src; 197*4824e7fdSDimitry Andric Nodes[Dst].ParentEdgeIndex = EdgeIdx; 198*4824e7fdSDimitry Andric // Add the node to the queue, if it is not there yet 199*4824e7fdSDimitry Andric if (!Nodes[Dst].Taken) { 200*4824e7fdSDimitry Andric Queue.push(Dst); 201*4824e7fdSDimitry Andric Nodes[Dst].Taken = true; 202*4824e7fdSDimitry Andric } 203*4824e7fdSDimitry Andric } 204*4824e7fdSDimitry Andric } 205*4824e7fdSDimitry Andric } 206*4824e7fdSDimitry Andric } 207*4824e7fdSDimitry Andric 208*4824e7fdSDimitry Andric return Nodes[Target].Distance != INF; 209*4824e7fdSDimitry Andric } 210*4824e7fdSDimitry Andric 211*4824e7fdSDimitry Andric /// Update the current flow along the augmenting path. 212*4824e7fdSDimitry Andric void augmentFlowAlongPath() { 213*4824e7fdSDimitry Andric // Find path capacity 214*4824e7fdSDimitry Andric int64_t PathCapacity = INF; 215*4824e7fdSDimitry Andric uint64_t Now = Target; 216*4824e7fdSDimitry Andric while (Now != Source) { 217*4824e7fdSDimitry Andric uint64_t Pred = Nodes[Now].ParentNode; 218*4824e7fdSDimitry Andric auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 219*4824e7fdSDimitry Andric PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow); 220*4824e7fdSDimitry Andric Now = Pred; 221*4824e7fdSDimitry Andric } 222*4824e7fdSDimitry Andric 223*4824e7fdSDimitry Andric assert(PathCapacity > 0 && "found incorrect augmenting path"); 224*4824e7fdSDimitry Andric 225*4824e7fdSDimitry Andric // Update the flow along the path 226*4824e7fdSDimitry Andric Now = Target; 227*4824e7fdSDimitry Andric while (Now != Source) { 228*4824e7fdSDimitry Andric uint64_t Pred = Nodes[Now].ParentNode; 229*4824e7fdSDimitry Andric auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 230*4824e7fdSDimitry Andric auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 231*4824e7fdSDimitry Andric 232*4824e7fdSDimitry Andric Edge.Flow += PathCapacity; 233*4824e7fdSDimitry Andric RevEdge.Flow -= PathCapacity; 234*4824e7fdSDimitry Andric 235*4824e7fdSDimitry Andric Now = Pred; 236*4824e7fdSDimitry Andric } 237*4824e7fdSDimitry Andric } 238*4824e7fdSDimitry Andric 239*4824e7fdSDimitry Andric /// An node in a flow network. 240*4824e7fdSDimitry Andric struct Node { 241*4824e7fdSDimitry Andric /// The cost of the cheapest path from the source to the current node. 242*4824e7fdSDimitry Andric int64_t Distance; 243*4824e7fdSDimitry Andric /// The node preceding the current one in the path. 244*4824e7fdSDimitry Andric uint64_t ParentNode; 245*4824e7fdSDimitry Andric /// The index of the edge between ParentNode and the current node. 246*4824e7fdSDimitry Andric uint64_t ParentEdgeIndex; 247*4824e7fdSDimitry Andric /// An indicator of whether the current node is in a queue. 248*4824e7fdSDimitry Andric bool Taken; 249*4824e7fdSDimitry Andric }; 250*4824e7fdSDimitry Andric /// An edge in a flow network. 251*4824e7fdSDimitry Andric struct Edge { 252*4824e7fdSDimitry Andric /// The cost of the edge. 253*4824e7fdSDimitry Andric int64_t Cost; 254*4824e7fdSDimitry Andric /// The capacity of the edge. 255*4824e7fdSDimitry Andric int64_t Capacity; 256*4824e7fdSDimitry Andric /// The current flow on the edge. 257*4824e7fdSDimitry Andric int64_t Flow; 258*4824e7fdSDimitry Andric /// The destination node of the edge. 259*4824e7fdSDimitry Andric uint64_t Dst; 260*4824e7fdSDimitry Andric /// The index of the reverse edge between Dst and the current node. 261*4824e7fdSDimitry Andric uint64_t RevEdgeIndex; 262*4824e7fdSDimitry Andric }; 263*4824e7fdSDimitry Andric 264*4824e7fdSDimitry Andric /// The set of network nodes. 265*4824e7fdSDimitry Andric std::vector<Node> Nodes; 266*4824e7fdSDimitry Andric /// The set of network edges. 267*4824e7fdSDimitry Andric std::vector<std::vector<Edge>> Edges; 268*4824e7fdSDimitry Andric /// Source node of the flow. 269*4824e7fdSDimitry Andric uint64_t Source; 270*4824e7fdSDimitry Andric /// Target (sink) node of the flow. 271*4824e7fdSDimitry Andric uint64_t Target; 272*4824e7fdSDimitry Andric }; 273*4824e7fdSDimitry Andric 274*4824e7fdSDimitry Andric /// Initializing flow network for a given function. 275*4824e7fdSDimitry Andric /// 276*4824e7fdSDimitry Andric /// Every block is split into three nodes that are responsible for (i) an 277*4824e7fdSDimitry Andric /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 278*4824e7fdSDimitry Andric /// reduction of the block weight. 279*4824e7fdSDimitry Andric void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 280*4824e7fdSDimitry Andric uint64_t NumBlocks = Func.Blocks.size(); 281*4824e7fdSDimitry Andric assert(NumBlocks > 1 && "Too few blocks in a function"); 282*4824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 283*4824e7fdSDimitry Andric 284*4824e7fdSDimitry Andric // Pre-process data: make sure the entry weight is at least 1 285*4824e7fdSDimitry Andric if (Func.Blocks[Func.Entry].Weight == 0) { 286*4824e7fdSDimitry Andric Func.Blocks[Func.Entry].Weight = 1; 287*4824e7fdSDimitry Andric } 288*4824e7fdSDimitry Andric // Introducing dummy source/sink pairs to allow flow circulation. 289*4824e7fdSDimitry Andric // The nodes corresponding to blocks of Func have indicies in the range 290*4824e7fdSDimitry Andric // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 291*4824e7fdSDimitry Andric uint64_t S = 3 * NumBlocks; 292*4824e7fdSDimitry Andric uint64_t T = S + 1; 293*4824e7fdSDimitry Andric uint64_t S1 = S + 2; 294*4824e7fdSDimitry Andric uint64_t T1 = S + 3; 295*4824e7fdSDimitry Andric 296*4824e7fdSDimitry Andric Network.initialize(3 * NumBlocks + 4, S1, T1); 297*4824e7fdSDimitry Andric 298*4824e7fdSDimitry Andric // Create three nodes for every block of the function 299*4824e7fdSDimitry Andric for (uint64_t B = 0; B < NumBlocks; B++) { 300*4824e7fdSDimitry Andric auto &Block = Func.Blocks[B]; 301*4824e7fdSDimitry Andric assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 302*4824e7fdSDimitry Andric "non-zero weight of a block w/o weight except for an entry"); 303*4824e7fdSDimitry Andric 304*4824e7fdSDimitry Andric // Split every block into two nodes 305*4824e7fdSDimitry Andric uint64_t Bin = 3 * B; 306*4824e7fdSDimitry Andric uint64_t Bout = 3 * B + 1; 307*4824e7fdSDimitry Andric uint64_t Baux = 3 * B + 2; 308*4824e7fdSDimitry Andric if (Block.Weight > 0) { 309*4824e7fdSDimitry Andric Network.addEdge(S1, Bout, Block.Weight, 0); 310*4824e7fdSDimitry Andric Network.addEdge(Bin, T1, Block.Weight, 0); 311*4824e7fdSDimitry Andric } 312*4824e7fdSDimitry Andric 313*4824e7fdSDimitry Andric // Edges from S and to T 314*4824e7fdSDimitry Andric assert((!Block.isEntry() || !Block.isExit()) && 315*4824e7fdSDimitry Andric "a block cannot be an entry and an exit"); 316*4824e7fdSDimitry Andric if (Block.isEntry()) { 317*4824e7fdSDimitry Andric Network.addEdge(S, Bin, 0); 318*4824e7fdSDimitry Andric } else if (Block.isExit()) { 319*4824e7fdSDimitry Andric Network.addEdge(Bout, T, 0); 320*4824e7fdSDimitry Andric } 321*4824e7fdSDimitry Andric 322*4824e7fdSDimitry Andric // An auxiliary node to allow increase/reduction of block counts: 323*4824e7fdSDimitry Andric // We assume that decreasing block counts is more expensive than increasing, 324*4824e7fdSDimitry Andric // and thus, setting separate costs here. In the future we may want to tune 325*4824e7fdSDimitry Andric // the relative costs so as to maximize the quality of generated profiles. 326*4824e7fdSDimitry Andric int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; 327*4824e7fdSDimitry Andric int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; 328*4824e7fdSDimitry Andric if (Block.UnknownWeight) { 329*4824e7fdSDimitry Andric // Do not penalize changing weights of blocks w/o known profile count 330*4824e7fdSDimitry Andric AuxCostInc = 0; 331*4824e7fdSDimitry Andric AuxCostDec = 0; 332*4824e7fdSDimitry Andric } else { 333*4824e7fdSDimitry Andric // Increasing the count for "cold" blocks with zero initial count is more 334*4824e7fdSDimitry Andric // expensive than for "hot" ones 335*4824e7fdSDimitry Andric if (Block.Weight == 0) { 336*4824e7fdSDimitry Andric AuxCostInc = MinCostMaxFlow::AuxCostIncZero; 337*4824e7fdSDimitry Andric } 338*4824e7fdSDimitry Andric // Modifying the count of the entry block is expensive 339*4824e7fdSDimitry Andric if (Block.isEntry()) { 340*4824e7fdSDimitry Andric AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; 341*4824e7fdSDimitry Andric AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; 342*4824e7fdSDimitry Andric } 343*4824e7fdSDimitry Andric } 344*4824e7fdSDimitry Andric // For blocks with self-edges, do not penalize a reduction of the count, 345*4824e7fdSDimitry Andric // as all of the increase can be attributed to the self-edge 346*4824e7fdSDimitry Andric if (Block.HasSelfEdge) { 347*4824e7fdSDimitry Andric AuxCostDec = 0; 348*4824e7fdSDimitry Andric } 349*4824e7fdSDimitry Andric 350*4824e7fdSDimitry Andric Network.addEdge(Bin, Baux, AuxCostInc); 351*4824e7fdSDimitry Andric Network.addEdge(Baux, Bout, AuxCostInc); 352*4824e7fdSDimitry Andric if (Block.Weight > 0) { 353*4824e7fdSDimitry Andric Network.addEdge(Bout, Baux, AuxCostDec); 354*4824e7fdSDimitry Andric Network.addEdge(Baux, Bin, AuxCostDec); 355*4824e7fdSDimitry Andric } 356*4824e7fdSDimitry Andric } 357*4824e7fdSDimitry Andric 358*4824e7fdSDimitry Andric // Creating edges for every jump 359*4824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 360*4824e7fdSDimitry Andric uint64_t Src = Jump.Source; 361*4824e7fdSDimitry Andric uint64_t Dst = Jump.Target; 362*4824e7fdSDimitry Andric if (Src != Dst) { 363*4824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 364*4824e7fdSDimitry Andric uint64_t DstIn = 3 * Dst; 365*4824e7fdSDimitry Andric uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 366*4824e7fdSDimitry Andric Network.addEdge(SrcOut, DstIn, Cost); 367*4824e7fdSDimitry Andric } 368*4824e7fdSDimitry Andric } 369*4824e7fdSDimitry Andric 370*4824e7fdSDimitry Andric // Make sure we have a valid flow circulation 371*4824e7fdSDimitry Andric Network.addEdge(T, S, 0); 372*4824e7fdSDimitry Andric } 373*4824e7fdSDimitry Andric 374*4824e7fdSDimitry Andric /// Extract resulting block and edge counts from the flow network. 375*4824e7fdSDimitry Andric void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 376*4824e7fdSDimitry Andric uint64_t NumBlocks = Func.Blocks.size(); 377*4824e7fdSDimitry Andric 378*4824e7fdSDimitry Andric // Extract resulting block counts 379*4824e7fdSDimitry Andric for (uint64_t Src = 0; Src < NumBlocks; Src++) { 380*4824e7fdSDimitry Andric auto &Block = Func.Blocks[Src]; 381*4824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 382*4824e7fdSDimitry Andric int64_t Flow = 0; 383*4824e7fdSDimitry Andric for (auto &Adj : Network.getFlow(SrcOut)) { 384*4824e7fdSDimitry Andric uint64_t DstIn = Adj.first; 385*4824e7fdSDimitry Andric int64_t DstFlow = Adj.second; 386*4824e7fdSDimitry Andric bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 387*4824e7fdSDimitry Andric if (!IsAuxNode || Block.HasSelfEdge) { 388*4824e7fdSDimitry Andric Flow += DstFlow; 389*4824e7fdSDimitry Andric } 390*4824e7fdSDimitry Andric } 391*4824e7fdSDimitry Andric Block.Flow = Flow; 392*4824e7fdSDimitry Andric assert(Flow >= 0 && "negative block flow"); 393*4824e7fdSDimitry Andric } 394*4824e7fdSDimitry Andric 395*4824e7fdSDimitry Andric // Extract resulting jump counts 396*4824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 397*4824e7fdSDimitry Andric uint64_t Src = Jump.Source; 398*4824e7fdSDimitry Andric uint64_t Dst = Jump.Target; 399*4824e7fdSDimitry Andric int64_t Flow = 0; 400*4824e7fdSDimitry Andric if (Src != Dst) { 401*4824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 402*4824e7fdSDimitry Andric uint64_t DstIn = 3 * Dst; 403*4824e7fdSDimitry Andric Flow = Network.getFlow(SrcOut, DstIn); 404*4824e7fdSDimitry Andric } else { 405*4824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 406*4824e7fdSDimitry Andric uint64_t SrcAux = 3 * Src + 2; 407*4824e7fdSDimitry Andric int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 408*4824e7fdSDimitry Andric if (AuxFlow > 0) 409*4824e7fdSDimitry Andric Flow = AuxFlow; 410*4824e7fdSDimitry Andric } 411*4824e7fdSDimitry Andric Jump.Flow = Flow; 412*4824e7fdSDimitry Andric assert(Flow >= 0 && "negative jump flow"); 413*4824e7fdSDimitry Andric } 414*4824e7fdSDimitry Andric } 415*4824e7fdSDimitry Andric 416*4824e7fdSDimitry Andric #ifndef NDEBUG 417*4824e7fdSDimitry Andric /// Verify that the computed flow values satisfy flow conservation rules 418*4824e7fdSDimitry Andric void verifyWeights(const FlowFunction &Func) { 419*4824e7fdSDimitry Andric const uint64_t NumBlocks = Func.Blocks.size(); 420*4824e7fdSDimitry Andric auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 421*4824e7fdSDimitry Andric auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 422*4824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 423*4824e7fdSDimitry Andric InFlow[Jump.Target] += Jump.Flow; 424*4824e7fdSDimitry Andric OutFlow[Jump.Source] += Jump.Flow; 425*4824e7fdSDimitry Andric } 426*4824e7fdSDimitry Andric 427*4824e7fdSDimitry Andric uint64_t TotalInFlow = 0; 428*4824e7fdSDimitry Andric uint64_t TotalOutFlow = 0; 429*4824e7fdSDimitry Andric for (uint64_t I = 0; I < NumBlocks; I++) { 430*4824e7fdSDimitry Andric auto &Block = Func.Blocks[I]; 431*4824e7fdSDimitry Andric if (Block.isEntry()) { 432*4824e7fdSDimitry Andric TotalInFlow += Block.Flow; 433*4824e7fdSDimitry Andric assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 434*4824e7fdSDimitry Andric } else if (Block.isExit()) { 435*4824e7fdSDimitry Andric TotalOutFlow += Block.Flow; 436*4824e7fdSDimitry Andric assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 437*4824e7fdSDimitry Andric } else { 438*4824e7fdSDimitry Andric assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 439*4824e7fdSDimitry Andric assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 440*4824e7fdSDimitry Andric } 441*4824e7fdSDimitry Andric } 442*4824e7fdSDimitry Andric assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 443*4824e7fdSDimitry Andric } 444*4824e7fdSDimitry Andric #endif 445*4824e7fdSDimitry Andric 446*4824e7fdSDimitry Andric } // end of anonymous namespace 447*4824e7fdSDimitry Andric 448*4824e7fdSDimitry Andric /// Apply the profile inference algorithm for a given flow function 449*4824e7fdSDimitry Andric void llvm::applyFlowInference(FlowFunction &Func) { 450*4824e7fdSDimitry Andric // Create and apply an inference network model 451*4824e7fdSDimitry Andric auto InferenceNetwork = MinCostMaxFlow(); 452*4824e7fdSDimitry Andric initializeNetwork(InferenceNetwork, Func); 453*4824e7fdSDimitry Andric InferenceNetwork.run(); 454*4824e7fdSDimitry Andric 455*4824e7fdSDimitry Andric // Extract flow values for every block and every edge 456*4824e7fdSDimitry Andric extractWeights(InferenceNetwork, Func); 457*4824e7fdSDimitry Andric 458*4824e7fdSDimitry Andric #ifndef NDEBUG 459*4824e7fdSDimitry Andric // Verify the result 460*4824e7fdSDimitry Andric verifyWeights(Func); 461*4824e7fdSDimitry Andric #endif 462*4824e7fdSDimitry Andric } 463