14824e7fdSDimitry Andric //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 24824e7fdSDimitry Andric // 34824e7fdSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 44824e7fdSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 54824e7fdSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 64824e7fdSDimitry Andric // 74824e7fdSDimitry Andric //===----------------------------------------------------------------------===// 84824e7fdSDimitry Andric // 94824e7fdSDimitry Andric // This file implements a profile inference algorithm. Given an incomplete and 104824e7fdSDimitry Andric // possibly imprecise block counts, the algorithm reconstructs realistic block 114824e7fdSDimitry Andric // and edge counts that satisfy flow conservation rules, while minimally modify 124824e7fdSDimitry Andric // input block counts. 134824e7fdSDimitry Andric // 144824e7fdSDimitry Andric //===----------------------------------------------------------------------===// 154824e7fdSDimitry Andric 164824e7fdSDimitry Andric #include "llvm/Transforms/Utils/SampleProfileInference.h" 174824e7fdSDimitry Andric #include "llvm/Support/Debug.h" 184824e7fdSDimitry Andric #include <queue> 194824e7fdSDimitry Andric #include <set> 204824e7fdSDimitry Andric 214824e7fdSDimitry Andric using namespace llvm; 224824e7fdSDimitry Andric #define DEBUG_TYPE "sample-profile-inference" 234824e7fdSDimitry Andric 244824e7fdSDimitry Andric namespace { 254824e7fdSDimitry Andric 264824e7fdSDimitry Andric /// A value indicating an infinite flow/capacity/weight of a block/edge. 274824e7fdSDimitry Andric /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 284824e7fdSDimitry Andric /// during the execution. 294824e7fdSDimitry Andric static constexpr int64_t INF = ((int64_t)1) << 50; 304824e7fdSDimitry Andric 314824e7fdSDimitry Andric /// The minimum-cost maximum flow algorithm. 324824e7fdSDimitry Andric /// 334824e7fdSDimitry Andric /// The algorithm finds the maximum flow of minimum cost on a given (directed) 344824e7fdSDimitry Andric /// network using a modified version of the classical Moore-Bellman-Ford 354824e7fdSDimitry Andric /// approach. The algorithm applies a number of augmentation iterations in which 364824e7fdSDimitry Andric /// flow is sent along paths of positive capacity from the source to the sink. 374824e7fdSDimitry Andric /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 384824e7fdSDimitry Andric /// where m is the number of edges, n is the number of vertices, and v(f) is the 394824e7fdSDimitry Andric /// value of the maximum flow. However, the observed running time on typical 404824e7fdSDimitry Andric /// instances is sub-quadratic, that is, o(n^2). 414824e7fdSDimitry Andric /// 424824e7fdSDimitry Andric /// The input is a set of edges with specified costs and capacities, and a pair 434824e7fdSDimitry Andric /// of nodes (source and sink). The output is the flow along each edge of the 444824e7fdSDimitry Andric /// minimum total cost respecting the given edge capacities. 454824e7fdSDimitry Andric class MinCostMaxFlow { 464824e7fdSDimitry Andric public: 474824e7fdSDimitry Andric // Initialize algorithm's data structures for a network of a given size. 484824e7fdSDimitry Andric void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 494824e7fdSDimitry Andric Source = SourceNode; 504824e7fdSDimitry Andric Target = SinkNode; 514824e7fdSDimitry Andric 524824e7fdSDimitry Andric Nodes = std::vector<Node>(NodeCount); 534824e7fdSDimitry Andric Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 544824e7fdSDimitry Andric } 554824e7fdSDimitry Andric 564824e7fdSDimitry Andric // Run the algorithm. 574824e7fdSDimitry Andric int64_t run() { 584824e7fdSDimitry Andric // Find an augmenting path and update the flow along the path 594824e7fdSDimitry Andric size_t AugmentationIters = 0; 604824e7fdSDimitry Andric while (findAugmentingPath()) { 614824e7fdSDimitry Andric augmentFlowAlongPath(); 624824e7fdSDimitry Andric AugmentationIters++; 634824e7fdSDimitry Andric } 644824e7fdSDimitry Andric 654824e7fdSDimitry Andric // Compute the total flow and its cost 664824e7fdSDimitry Andric int64_t TotalCost = 0; 674824e7fdSDimitry Andric int64_t TotalFlow = 0; 684824e7fdSDimitry Andric for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 694824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 704824e7fdSDimitry Andric if (Edge.Flow > 0) { 714824e7fdSDimitry Andric TotalCost += Edge.Cost * Edge.Flow; 724824e7fdSDimitry Andric if (Src == Source) 734824e7fdSDimitry Andric TotalFlow += Edge.Flow; 744824e7fdSDimitry Andric } 754824e7fdSDimitry Andric } 764824e7fdSDimitry Andric } 774824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 784824e7fdSDimitry Andric << " iterations with " << TotalFlow << " total flow" 794824e7fdSDimitry Andric << " of " << TotalCost << " cost\n"); 804824e7fdSDimitry Andric (void)TotalFlow; 814824e7fdSDimitry Andric return TotalCost; 824824e7fdSDimitry Andric } 834824e7fdSDimitry Andric 844824e7fdSDimitry Andric /// Adding an edge to the network with a specified capacity and a cost. 854824e7fdSDimitry Andric /// Multiple edges between a pair of nodes are allowed but self-edges 864824e7fdSDimitry Andric /// are not supported. 874824e7fdSDimitry Andric void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 884824e7fdSDimitry Andric assert(Capacity > 0 && "adding an edge of zero capacity"); 894824e7fdSDimitry Andric assert(Src != Dst && "loop edge are not supported"); 904824e7fdSDimitry Andric 914824e7fdSDimitry Andric Edge SrcEdge; 924824e7fdSDimitry Andric SrcEdge.Dst = Dst; 934824e7fdSDimitry Andric SrcEdge.Cost = Cost; 944824e7fdSDimitry Andric SrcEdge.Capacity = Capacity; 954824e7fdSDimitry Andric SrcEdge.Flow = 0; 964824e7fdSDimitry Andric SrcEdge.RevEdgeIndex = Edges[Dst].size(); 974824e7fdSDimitry Andric 984824e7fdSDimitry Andric Edge DstEdge; 994824e7fdSDimitry Andric DstEdge.Dst = Src; 1004824e7fdSDimitry Andric DstEdge.Cost = -Cost; 1014824e7fdSDimitry Andric DstEdge.Capacity = 0; 1024824e7fdSDimitry Andric DstEdge.Flow = 0; 1034824e7fdSDimitry Andric DstEdge.RevEdgeIndex = Edges[Src].size(); 1044824e7fdSDimitry Andric 1054824e7fdSDimitry Andric Edges[Src].push_back(SrcEdge); 1064824e7fdSDimitry Andric Edges[Dst].push_back(DstEdge); 1074824e7fdSDimitry Andric } 1084824e7fdSDimitry Andric 1094824e7fdSDimitry Andric /// Adding an edge to the network of infinite capacity and a given cost. 1104824e7fdSDimitry Andric void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 1114824e7fdSDimitry Andric addEdge(Src, Dst, INF, Cost); 1124824e7fdSDimitry Andric } 1134824e7fdSDimitry Andric 1144824e7fdSDimitry Andric /// Get the total flow from a given source node. 1154824e7fdSDimitry Andric /// Returns a list of pairs (target node, amount of flow to the target). 1164824e7fdSDimitry Andric const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 1174824e7fdSDimitry Andric std::vector<std::pair<uint64_t, int64_t>> Flow; 1184824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 1194824e7fdSDimitry Andric if (Edge.Flow > 0) 1204824e7fdSDimitry Andric Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 1214824e7fdSDimitry Andric } 1224824e7fdSDimitry Andric return Flow; 1234824e7fdSDimitry Andric } 1244824e7fdSDimitry Andric 1254824e7fdSDimitry Andric /// Get the total flow between a pair of nodes. 1264824e7fdSDimitry Andric int64_t getFlow(uint64_t Src, uint64_t Dst) const { 1274824e7fdSDimitry Andric int64_t Flow = 0; 1284824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 1294824e7fdSDimitry Andric if (Edge.Dst == Dst) { 1304824e7fdSDimitry Andric Flow += Edge.Flow; 1314824e7fdSDimitry Andric } 1324824e7fdSDimitry Andric } 1334824e7fdSDimitry Andric return Flow; 1344824e7fdSDimitry Andric } 1354824e7fdSDimitry Andric 1364824e7fdSDimitry Andric /// A cost of increasing a block's count by one. 1374824e7fdSDimitry Andric static constexpr int64_t AuxCostInc = 10; 1384824e7fdSDimitry Andric /// A cost of decreasing a block's count by one. 1394824e7fdSDimitry Andric static constexpr int64_t AuxCostDec = 20; 1404824e7fdSDimitry Andric /// A cost of increasing a count of zero-weight block by one. 1414824e7fdSDimitry Andric static constexpr int64_t AuxCostIncZero = 11; 1424824e7fdSDimitry Andric /// A cost of increasing the entry block's count by one. 1434824e7fdSDimitry Andric static constexpr int64_t AuxCostIncEntry = 40; 1444824e7fdSDimitry Andric /// A cost of decreasing the entry block's count by one. 1454824e7fdSDimitry Andric static constexpr int64_t AuxCostDecEntry = 10; 1464824e7fdSDimitry Andric /// A cost of taking an unlikely jump. 1474824e7fdSDimitry Andric static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20; 1484824e7fdSDimitry Andric 1494824e7fdSDimitry Andric private: 1504824e7fdSDimitry Andric /// Check for existence of an augmenting path with a positive capacity. 1514824e7fdSDimitry Andric bool findAugmentingPath() { 1524824e7fdSDimitry Andric // Initialize data structures 1534824e7fdSDimitry Andric for (auto &Node : Nodes) { 1544824e7fdSDimitry Andric Node.Distance = INF; 1554824e7fdSDimitry Andric Node.ParentNode = uint64_t(-1); 1564824e7fdSDimitry Andric Node.ParentEdgeIndex = uint64_t(-1); 1574824e7fdSDimitry Andric Node.Taken = false; 1584824e7fdSDimitry Andric } 1594824e7fdSDimitry Andric 1604824e7fdSDimitry Andric std::queue<uint64_t> Queue; 1614824e7fdSDimitry Andric Queue.push(Source); 1624824e7fdSDimitry Andric Nodes[Source].Distance = 0; 1634824e7fdSDimitry Andric Nodes[Source].Taken = true; 1644824e7fdSDimitry Andric while (!Queue.empty()) { 1654824e7fdSDimitry Andric uint64_t Src = Queue.front(); 1664824e7fdSDimitry Andric Queue.pop(); 1674824e7fdSDimitry Andric Nodes[Src].Taken = false; 1684824e7fdSDimitry Andric // Although the residual network contains edges with negative costs 1694824e7fdSDimitry Andric // (in particular, backward edges), it can be shown that there are no 1704824e7fdSDimitry Andric // negative-weight cycles and the following two invariants are maintained: 1714824e7fdSDimitry Andric // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 1724824e7fdSDimitry Andric // where Dist is the length of the shortest path between two nodes. This 1734824e7fdSDimitry Andric // allows to prune the search-space of the path-finding algorithm using 1744824e7fdSDimitry Andric // the following early-stop criteria: 1754824e7fdSDimitry Andric // -- If we find a path with zero-distance from Source to Target, stop the 1764824e7fdSDimitry Andric // search, as the path is the shortest since Dist[Source, Target] >= 0; 1774824e7fdSDimitry Andric // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 1784824e7fdSDimitry Andric // process node V, as it is guaranteed _not_ to be on a shortest path 1794824e7fdSDimitry Andric // from Source to Target; it follows from inequalities 1804824e7fdSDimitry Andric // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 1814824e7fdSDimitry Andric // >= Dist[Source, V] 1824824e7fdSDimitry Andric if (Nodes[Target].Distance == 0) 1834824e7fdSDimitry Andric break; 1844824e7fdSDimitry Andric if (Nodes[Src].Distance > Nodes[Target].Distance) 1854824e7fdSDimitry Andric continue; 1864824e7fdSDimitry Andric 1874824e7fdSDimitry Andric // Process adjacent edges 1884824e7fdSDimitry Andric for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 1894824e7fdSDimitry Andric auto &Edge = Edges[Src][EdgeIdx]; 1904824e7fdSDimitry Andric if (Edge.Flow < Edge.Capacity) { 1914824e7fdSDimitry Andric uint64_t Dst = Edge.Dst; 1924824e7fdSDimitry Andric int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 1934824e7fdSDimitry Andric if (Nodes[Dst].Distance > NewDistance) { 1944824e7fdSDimitry Andric // Update the distance and the parent node/edge 1954824e7fdSDimitry Andric Nodes[Dst].Distance = NewDistance; 1964824e7fdSDimitry Andric Nodes[Dst].ParentNode = Src; 1974824e7fdSDimitry Andric Nodes[Dst].ParentEdgeIndex = EdgeIdx; 1984824e7fdSDimitry Andric // Add the node to the queue, if it is not there yet 1994824e7fdSDimitry Andric if (!Nodes[Dst].Taken) { 2004824e7fdSDimitry Andric Queue.push(Dst); 2014824e7fdSDimitry Andric Nodes[Dst].Taken = true; 2024824e7fdSDimitry Andric } 2034824e7fdSDimitry Andric } 2044824e7fdSDimitry Andric } 2054824e7fdSDimitry Andric } 2064824e7fdSDimitry Andric } 2074824e7fdSDimitry Andric 2084824e7fdSDimitry Andric return Nodes[Target].Distance != INF; 2094824e7fdSDimitry Andric } 2104824e7fdSDimitry Andric 2114824e7fdSDimitry Andric /// Update the current flow along the augmenting path. 2124824e7fdSDimitry Andric void augmentFlowAlongPath() { 2134824e7fdSDimitry Andric // Find path capacity 2144824e7fdSDimitry Andric int64_t PathCapacity = INF; 2154824e7fdSDimitry Andric uint64_t Now = Target; 2164824e7fdSDimitry Andric while (Now != Source) { 2174824e7fdSDimitry Andric uint64_t Pred = Nodes[Now].ParentNode; 2184824e7fdSDimitry Andric auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 2194824e7fdSDimitry Andric PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow); 2204824e7fdSDimitry Andric Now = Pred; 2214824e7fdSDimitry Andric } 2224824e7fdSDimitry Andric 223*0eae32dcSDimitry Andric assert(PathCapacity > 0 && "found an incorrect augmenting path"); 2244824e7fdSDimitry Andric 2254824e7fdSDimitry Andric // Update the flow along the path 2264824e7fdSDimitry Andric Now = Target; 2274824e7fdSDimitry Andric while (Now != Source) { 2284824e7fdSDimitry Andric uint64_t Pred = Nodes[Now].ParentNode; 2294824e7fdSDimitry Andric auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 2304824e7fdSDimitry Andric auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 2314824e7fdSDimitry Andric 2324824e7fdSDimitry Andric Edge.Flow += PathCapacity; 2334824e7fdSDimitry Andric RevEdge.Flow -= PathCapacity; 2344824e7fdSDimitry Andric 2354824e7fdSDimitry Andric Now = Pred; 2364824e7fdSDimitry Andric } 2374824e7fdSDimitry Andric } 2384824e7fdSDimitry Andric 2394824e7fdSDimitry Andric /// An node in a flow network. 2404824e7fdSDimitry Andric struct Node { 2414824e7fdSDimitry Andric /// The cost of the cheapest path from the source to the current node. 2424824e7fdSDimitry Andric int64_t Distance; 2434824e7fdSDimitry Andric /// The node preceding the current one in the path. 2444824e7fdSDimitry Andric uint64_t ParentNode; 2454824e7fdSDimitry Andric /// The index of the edge between ParentNode and the current node. 2464824e7fdSDimitry Andric uint64_t ParentEdgeIndex; 2474824e7fdSDimitry Andric /// An indicator of whether the current node is in a queue. 2484824e7fdSDimitry Andric bool Taken; 2494824e7fdSDimitry Andric }; 2504824e7fdSDimitry Andric /// An edge in a flow network. 2514824e7fdSDimitry Andric struct Edge { 2524824e7fdSDimitry Andric /// The cost of the edge. 2534824e7fdSDimitry Andric int64_t Cost; 2544824e7fdSDimitry Andric /// The capacity of the edge. 2554824e7fdSDimitry Andric int64_t Capacity; 2564824e7fdSDimitry Andric /// The current flow on the edge. 2574824e7fdSDimitry Andric int64_t Flow; 2584824e7fdSDimitry Andric /// The destination node of the edge. 2594824e7fdSDimitry Andric uint64_t Dst; 2604824e7fdSDimitry Andric /// The index of the reverse edge between Dst and the current node. 2614824e7fdSDimitry Andric uint64_t RevEdgeIndex; 2624824e7fdSDimitry Andric }; 2634824e7fdSDimitry Andric 2644824e7fdSDimitry Andric /// The set of network nodes. 2654824e7fdSDimitry Andric std::vector<Node> Nodes; 2664824e7fdSDimitry Andric /// The set of network edges. 2674824e7fdSDimitry Andric std::vector<std::vector<Edge>> Edges; 2684824e7fdSDimitry Andric /// Source node of the flow. 2694824e7fdSDimitry Andric uint64_t Source; 2704824e7fdSDimitry Andric /// Target (sink) node of the flow. 2714824e7fdSDimitry Andric uint64_t Target; 2724824e7fdSDimitry Andric }; 2734824e7fdSDimitry Andric 274*0eae32dcSDimitry Andric /// A post-processing adjustment of control flow. It applies two steps by 275*0eae32dcSDimitry Andric /// rerouting some flow and making it more realistic: 276*0eae32dcSDimitry Andric /// 277*0eae32dcSDimitry Andric /// - First, it removes all isolated components ("islands") with a positive flow 278*0eae32dcSDimitry Andric /// that are unreachable from the entry block. For every such component, we 279*0eae32dcSDimitry Andric /// find the shortest from the entry to an exit passing through the component, 280*0eae32dcSDimitry Andric /// and increase the flow by one unit along the path. 281*0eae32dcSDimitry Andric /// 282*0eae32dcSDimitry Andric /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks 283*0eae32dcSDimitry Andric /// with no sampled counts. Then it rebalnces the flow that goes through such 284*0eae32dcSDimitry Andric /// a subgraph so that each branch is taken with probability 50%. 285*0eae32dcSDimitry Andric /// An unknown subgraph is such that for every two nodes u and v: 286*0eae32dcSDimitry Andric /// - u dominates v and u is not unknown; 287*0eae32dcSDimitry Andric /// - v post-dominates u; and 288*0eae32dcSDimitry Andric /// - all inner-nodes of all (u,v)-paths are unknown. 289*0eae32dcSDimitry Andric /// 290*0eae32dcSDimitry Andric class FlowAdjuster { 291*0eae32dcSDimitry Andric public: 292*0eae32dcSDimitry Andric FlowAdjuster(FlowFunction &Func) : Func(Func) { 293*0eae32dcSDimitry Andric assert(Func.Blocks[Func.Entry].isEntry() && 294*0eae32dcSDimitry Andric "incorrect index of the entry block"); 295*0eae32dcSDimitry Andric } 296*0eae32dcSDimitry Andric 297*0eae32dcSDimitry Andric // Run the post-processing 298*0eae32dcSDimitry Andric void run() { 299*0eae32dcSDimitry Andric /// Adjust the flow to get rid of isolated components. 300*0eae32dcSDimitry Andric joinIsolatedComponents(); 301*0eae32dcSDimitry Andric 302*0eae32dcSDimitry Andric /// Rebalance the flow inside unknown subgraphs. 303*0eae32dcSDimitry Andric rebalanceUnknownSubgraphs(); 304*0eae32dcSDimitry Andric } 305*0eae32dcSDimitry Andric 306*0eae32dcSDimitry Andric /// The probability for the first successor of a unknown subgraph 307*0eae32dcSDimitry Andric static constexpr double UnknownFirstSuccProbability = 0.5; 308*0eae32dcSDimitry Andric 309*0eae32dcSDimitry Andric private: 310*0eae32dcSDimitry Andric void joinIsolatedComponents() { 311*0eae32dcSDimitry Andric // Find blocks that are reachable from the source 312*0eae32dcSDimitry Andric auto Visited = std::vector<bool>(NumBlocks(), false); 313*0eae32dcSDimitry Andric findReachable(Func.Entry, Visited); 314*0eae32dcSDimitry Andric 315*0eae32dcSDimitry Andric // Iterate over all non-reachable blocks and adjust their weights 316*0eae32dcSDimitry Andric for (uint64_t I = 0; I < NumBlocks(); I++) { 317*0eae32dcSDimitry Andric auto &Block = Func.Blocks[I]; 318*0eae32dcSDimitry Andric if (Block.Flow > 0 && !Visited[I]) { 319*0eae32dcSDimitry Andric // Find a path from the entry to an exit passing through the block I 320*0eae32dcSDimitry Andric auto Path = findShortestPath(I); 321*0eae32dcSDimitry Andric // Increase the flow along the path 322*0eae32dcSDimitry Andric assert(Path.size() > 0 && Path[0]->Source == Func.Entry && 323*0eae32dcSDimitry Andric "incorrectly computed path adjusting control flow"); 324*0eae32dcSDimitry Andric Func.Blocks[Func.Entry].Flow += 1; 325*0eae32dcSDimitry Andric for (auto &Jump : Path) { 326*0eae32dcSDimitry Andric Jump->Flow += 1; 327*0eae32dcSDimitry Andric Func.Blocks[Jump->Target].Flow += 1; 328*0eae32dcSDimitry Andric // Update reachability 329*0eae32dcSDimitry Andric findReachable(Jump->Target, Visited); 330*0eae32dcSDimitry Andric } 331*0eae32dcSDimitry Andric } 332*0eae32dcSDimitry Andric } 333*0eae32dcSDimitry Andric } 334*0eae32dcSDimitry Andric 335*0eae32dcSDimitry Andric /// Run BFS from a given block along the jumps with a positive flow and mark 336*0eae32dcSDimitry Andric /// all reachable blocks. 337*0eae32dcSDimitry Andric void findReachable(uint64_t Src, std::vector<bool> &Visited) { 338*0eae32dcSDimitry Andric if (Visited[Src]) 339*0eae32dcSDimitry Andric return; 340*0eae32dcSDimitry Andric std::queue<uint64_t> Queue; 341*0eae32dcSDimitry Andric Queue.push(Src); 342*0eae32dcSDimitry Andric Visited[Src] = true; 343*0eae32dcSDimitry Andric while (!Queue.empty()) { 344*0eae32dcSDimitry Andric Src = Queue.front(); 345*0eae32dcSDimitry Andric Queue.pop(); 346*0eae32dcSDimitry Andric for (auto Jump : Func.Blocks[Src].SuccJumps) { 347*0eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 348*0eae32dcSDimitry Andric if (Jump->Flow > 0 && !Visited[Dst]) { 349*0eae32dcSDimitry Andric Queue.push(Dst); 350*0eae32dcSDimitry Andric Visited[Dst] = true; 351*0eae32dcSDimitry Andric } 352*0eae32dcSDimitry Andric } 353*0eae32dcSDimitry Andric } 354*0eae32dcSDimitry Andric } 355*0eae32dcSDimitry Andric 356*0eae32dcSDimitry Andric /// Find the shortest path from the entry block to an exit block passing 357*0eae32dcSDimitry Andric /// through a given block. 358*0eae32dcSDimitry Andric std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { 359*0eae32dcSDimitry Andric // A path from the entry block to BlockIdx 360*0eae32dcSDimitry Andric auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); 361*0eae32dcSDimitry Andric // A path from BlockIdx to an exit block 362*0eae32dcSDimitry Andric auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); 363*0eae32dcSDimitry Andric 364*0eae32dcSDimitry Andric // Concatenate the two paths 365*0eae32dcSDimitry Andric std::vector<FlowJump *> Result; 366*0eae32dcSDimitry Andric Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); 367*0eae32dcSDimitry Andric Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); 368*0eae32dcSDimitry Andric return Result; 369*0eae32dcSDimitry Andric } 370*0eae32dcSDimitry Andric 371*0eae32dcSDimitry Andric /// Apply the Dijkstra algorithm to find the shortest path from a given 372*0eae32dcSDimitry Andric /// Source to a given Target block. 373*0eae32dcSDimitry Andric /// If Target == -1, then the path ends at an exit block. 374*0eae32dcSDimitry Andric std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { 375*0eae32dcSDimitry Andric // Quit early, if possible 376*0eae32dcSDimitry Andric if (Source == Target) 377*0eae32dcSDimitry Andric return std::vector<FlowJump *>(); 378*0eae32dcSDimitry Andric if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) 379*0eae32dcSDimitry Andric return std::vector<FlowJump *>(); 380*0eae32dcSDimitry Andric 381*0eae32dcSDimitry Andric // Initialize data structures 382*0eae32dcSDimitry Andric auto Distance = std::vector<int64_t>(NumBlocks(), INF); 383*0eae32dcSDimitry Andric auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); 384*0eae32dcSDimitry Andric Distance[Source] = 0; 385*0eae32dcSDimitry Andric std::set<std::pair<uint64_t, uint64_t>> Queue; 386*0eae32dcSDimitry Andric Queue.insert(std::make_pair(Distance[Source], Source)); 387*0eae32dcSDimitry Andric 388*0eae32dcSDimitry Andric // Run the Dijkstra algorithm 389*0eae32dcSDimitry Andric while (!Queue.empty()) { 390*0eae32dcSDimitry Andric uint64_t Src = Queue.begin()->second; 391*0eae32dcSDimitry Andric Queue.erase(Queue.begin()); 392*0eae32dcSDimitry Andric // If we found a solution, quit early 393*0eae32dcSDimitry Andric if (Src == Target || 394*0eae32dcSDimitry Andric (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) 395*0eae32dcSDimitry Andric break; 396*0eae32dcSDimitry Andric 397*0eae32dcSDimitry Andric for (auto Jump : Func.Blocks[Src].SuccJumps) { 398*0eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 399*0eae32dcSDimitry Andric int64_t JumpDist = jumpDistance(Jump); 400*0eae32dcSDimitry Andric if (Distance[Dst] > Distance[Src] + JumpDist) { 401*0eae32dcSDimitry Andric Queue.erase(std::make_pair(Distance[Dst], Dst)); 402*0eae32dcSDimitry Andric 403*0eae32dcSDimitry Andric Distance[Dst] = Distance[Src] + JumpDist; 404*0eae32dcSDimitry Andric Parent[Dst] = Jump; 405*0eae32dcSDimitry Andric 406*0eae32dcSDimitry Andric Queue.insert(std::make_pair(Distance[Dst], Dst)); 407*0eae32dcSDimitry Andric } 408*0eae32dcSDimitry Andric } 409*0eae32dcSDimitry Andric } 410*0eae32dcSDimitry Andric // If Target is not provided, find the closest exit block 411*0eae32dcSDimitry Andric if (Target == AnyExitBlock) { 412*0eae32dcSDimitry Andric for (uint64_t I = 0; I < NumBlocks(); I++) { 413*0eae32dcSDimitry Andric if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { 414*0eae32dcSDimitry Andric if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { 415*0eae32dcSDimitry Andric Target = I; 416*0eae32dcSDimitry Andric } 417*0eae32dcSDimitry Andric } 418*0eae32dcSDimitry Andric } 419*0eae32dcSDimitry Andric } 420*0eae32dcSDimitry Andric assert(Parent[Target] != nullptr && "a path does not exist"); 421*0eae32dcSDimitry Andric 422*0eae32dcSDimitry Andric // Extract the constructed path 423*0eae32dcSDimitry Andric std::vector<FlowJump *> Result; 424*0eae32dcSDimitry Andric uint64_t Now = Target; 425*0eae32dcSDimitry Andric while (Now != Source) { 426*0eae32dcSDimitry Andric assert(Now == Parent[Now]->Target && "incorrect parent jump"); 427*0eae32dcSDimitry Andric Result.push_back(Parent[Now]); 428*0eae32dcSDimitry Andric Now = Parent[Now]->Source; 429*0eae32dcSDimitry Andric } 430*0eae32dcSDimitry Andric // Reverse the path, since it is extracted from Target to Source 431*0eae32dcSDimitry Andric std::reverse(Result.begin(), Result.end()); 432*0eae32dcSDimitry Andric return Result; 433*0eae32dcSDimitry Andric } 434*0eae32dcSDimitry Andric 435*0eae32dcSDimitry Andric /// A distance of a path for a given jump. 436*0eae32dcSDimitry Andric /// In order to incite the path to use blocks/jumps with large positive flow, 437*0eae32dcSDimitry Andric /// and avoid changing branch probability of outgoing edges drastically, 438*0eae32dcSDimitry Andric /// set the distance as follows: 439*0eae32dcSDimitry Andric /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0) 440*0eae32dcSDimitry Andric /// if Block.Weight > 0, then distance = 1 441*0eae32dcSDimitry Andric /// otherwise distance >> 1 442*0eae32dcSDimitry Andric int64_t jumpDistance(FlowJump *Jump) const { 443*0eae32dcSDimitry Andric int64_t BaseDistance = 100; 444*0eae32dcSDimitry Andric if (Jump->IsUnlikely) 445*0eae32dcSDimitry Andric return MinCostMaxFlow::AuxCostUnlikely; 446*0eae32dcSDimitry Andric if (Jump->Flow > 0) 447*0eae32dcSDimitry Andric return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0); 448*0eae32dcSDimitry Andric if (Func.Blocks[Jump->Target].Weight > 0) 449*0eae32dcSDimitry Andric return BaseDistance; 450*0eae32dcSDimitry Andric return BaseDistance * (NumBlocks() + 1); 451*0eae32dcSDimitry Andric }; 452*0eae32dcSDimitry Andric 453*0eae32dcSDimitry Andric uint64_t NumBlocks() const { return Func.Blocks.size(); } 454*0eae32dcSDimitry Andric 455*0eae32dcSDimitry Andric /// Rebalance unknown subgraphs so as each branch splits with probabilities 456*0eae32dcSDimitry Andric /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability 457*0eae32dcSDimitry Andric void rebalanceUnknownSubgraphs() { 458*0eae32dcSDimitry Andric assert(UnknownFirstSuccProbability >= 0.0 && 459*0eae32dcSDimitry Andric UnknownFirstSuccProbability <= 1.0 && 460*0eae32dcSDimitry Andric "the share of the unknown successor should be between 0 and 1"); 461*0eae32dcSDimitry Andric // Try to find unknown subgraphs from each non-unknown block 462*0eae32dcSDimitry Andric for (uint64_t I = 0; I < Func.Blocks.size(); I++) { 463*0eae32dcSDimitry Andric auto SrcBlock = &Func.Blocks[I]; 464*0eae32dcSDimitry Andric // Do not attempt to find unknown successors from a unknown or a 465*0eae32dcSDimitry Andric // zero-flow block 466*0eae32dcSDimitry Andric if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) 467*0eae32dcSDimitry Andric continue; 468*0eae32dcSDimitry Andric 469*0eae32dcSDimitry Andric std::vector<FlowBlock *> UnknownSuccs; 470*0eae32dcSDimitry Andric FlowBlock *DstBlock = nullptr; 471*0eae32dcSDimitry Andric // Find a unknown subgraphs starting at block SrcBlock 472*0eae32dcSDimitry Andric if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs)) 473*0eae32dcSDimitry Andric continue; 474*0eae32dcSDimitry Andric // At the moment, we do not rebalance subgraphs containing cycles among 475*0eae32dcSDimitry Andric // unknown blocks 476*0eae32dcSDimitry Andric if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs)) 477*0eae32dcSDimitry Andric continue; 478*0eae32dcSDimitry Andric 479*0eae32dcSDimitry Andric // Rebalance the flow 480*0eae32dcSDimitry Andric rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs); 481*0eae32dcSDimitry Andric } 482*0eae32dcSDimitry Andric } 483*0eae32dcSDimitry Andric 484*0eae32dcSDimitry Andric /// Find a unknown subgraph starting at block SrcBlock. 485*0eae32dcSDimitry Andric /// If the search is successful, the method sets DstBlock and UnknownSuccs. 486*0eae32dcSDimitry Andric bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock, 487*0eae32dcSDimitry Andric std::vector<FlowBlock *> &UnknownSuccs) { 488*0eae32dcSDimitry Andric // Run BFS from SrcBlock and make sure all paths are going through unknown 489*0eae32dcSDimitry Andric // blocks and end at a non-unknown DstBlock 490*0eae32dcSDimitry Andric auto Visited = std::vector<bool>(NumBlocks(), false); 491*0eae32dcSDimitry Andric std::queue<uint64_t> Queue; 492*0eae32dcSDimitry Andric DstBlock = nullptr; 493*0eae32dcSDimitry Andric 494*0eae32dcSDimitry Andric Queue.push(SrcBlock->Index); 495*0eae32dcSDimitry Andric Visited[SrcBlock->Index] = true; 496*0eae32dcSDimitry Andric while (!Queue.empty()) { 497*0eae32dcSDimitry Andric auto &Block = Func.Blocks[Queue.front()]; 498*0eae32dcSDimitry Andric Queue.pop(); 499*0eae32dcSDimitry Andric // Process blocks reachable from Block 500*0eae32dcSDimitry Andric for (auto Jump : Block.SuccJumps) { 501*0eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 502*0eae32dcSDimitry Andric if (Visited[Dst]) 503*0eae32dcSDimitry Andric continue; 504*0eae32dcSDimitry Andric Visited[Dst] = true; 505*0eae32dcSDimitry Andric if (!Func.Blocks[Dst].UnknownWeight) { 506*0eae32dcSDimitry Andric // If we see non-unique non-unknown block reachable from SrcBlock, 507*0eae32dcSDimitry Andric // stop processing and skip rebalancing 508*0eae32dcSDimitry Andric FlowBlock *CandidateDstBlock = &Func.Blocks[Dst]; 509*0eae32dcSDimitry Andric if (DstBlock != nullptr && DstBlock != CandidateDstBlock) 510*0eae32dcSDimitry Andric return false; 511*0eae32dcSDimitry Andric DstBlock = CandidateDstBlock; 512*0eae32dcSDimitry Andric } else { 513*0eae32dcSDimitry Andric Queue.push(Dst); 514*0eae32dcSDimitry Andric UnknownSuccs.push_back(&Func.Blocks[Dst]); 515*0eae32dcSDimitry Andric } 516*0eae32dcSDimitry Andric } 517*0eae32dcSDimitry Andric } 518*0eae32dcSDimitry Andric 519*0eae32dcSDimitry Andric // If the list of unknown blocks is empty, we don't need rebalancing 520*0eae32dcSDimitry Andric if (UnknownSuccs.empty()) 521*0eae32dcSDimitry Andric return false; 522*0eae32dcSDimitry Andric // If all reachable nodes from SrcBlock are unknown, skip rebalancing 523*0eae32dcSDimitry Andric if (DstBlock == nullptr) 524*0eae32dcSDimitry Andric return false; 525*0eae32dcSDimitry Andric // If any of the unknown blocks is an exit block, skip rebalancing 526*0eae32dcSDimitry Andric for (auto Block : UnknownSuccs) { 527*0eae32dcSDimitry Andric if (Block->isExit()) 528*0eae32dcSDimitry Andric return false; 529*0eae32dcSDimitry Andric } 530*0eae32dcSDimitry Andric 531*0eae32dcSDimitry Andric return true; 532*0eae32dcSDimitry Andric } 533*0eae32dcSDimitry Andric 534*0eae32dcSDimitry Andric /// Verify if the given unknown subgraph is acyclic, and if yes, reorder 535*0eae32dcSDimitry Andric /// UnknownSuccs in the topological order (so that all jumps are "forward"). 536*0eae32dcSDimitry Andric bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock, 537*0eae32dcSDimitry Andric std::vector<FlowBlock *> &UnknownSuccs) { 538*0eae32dcSDimitry Andric // Extract local in-degrees in the considered subgraph 539*0eae32dcSDimitry Andric auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); 540*0eae32dcSDimitry Andric for (auto Jump : SrcBlock->SuccJumps) { 541*0eae32dcSDimitry Andric LocalInDegree[Jump->Target]++; 542*0eae32dcSDimitry Andric } 543*0eae32dcSDimitry Andric for (uint64_t I = 0; I < UnknownSuccs.size(); I++) { 544*0eae32dcSDimitry Andric for (auto Jump : UnknownSuccs[I]->SuccJumps) { 545*0eae32dcSDimitry Andric LocalInDegree[Jump->Target]++; 546*0eae32dcSDimitry Andric } 547*0eae32dcSDimitry Andric } 548*0eae32dcSDimitry Andric // A loop containing SrcBlock 549*0eae32dcSDimitry Andric if (LocalInDegree[SrcBlock->Index] > 0) 550*0eae32dcSDimitry Andric return false; 551*0eae32dcSDimitry Andric 552*0eae32dcSDimitry Andric std::vector<FlowBlock *> AcyclicOrder; 553*0eae32dcSDimitry Andric std::queue<uint64_t> Queue; 554*0eae32dcSDimitry Andric Queue.push(SrcBlock->Index); 555*0eae32dcSDimitry Andric while (!Queue.empty()) { 556*0eae32dcSDimitry Andric auto &Block = Func.Blocks[Queue.front()]; 557*0eae32dcSDimitry Andric Queue.pop(); 558*0eae32dcSDimitry Andric // Stop propagation once we reach DstBlock 559*0eae32dcSDimitry Andric if (Block.Index == DstBlock->Index) 560*0eae32dcSDimitry Andric break; 561*0eae32dcSDimitry Andric 562*0eae32dcSDimitry Andric AcyclicOrder.push_back(&Block); 563*0eae32dcSDimitry Andric // Add to the queue all successors with zero local in-degree 564*0eae32dcSDimitry Andric for (auto Jump : Block.SuccJumps) { 565*0eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 566*0eae32dcSDimitry Andric LocalInDegree[Dst]--; 567*0eae32dcSDimitry Andric if (LocalInDegree[Dst] == 0) { 568*0eae32dcSDimitry Andric Queue.push(Dst); 569*0eae32dcSDimitry Andric } 570*0eae32dcSDimitry Andric } 571*0eae32dcSDimitry Andric } 572*0eae32dcSDimitry Andric 573*0eae32dcSDimitry Andric // If there is a cycle in the subgraph, AcyclicOrder contains only a subset 574*0eae32dcSDimitry Andric // of all blocks 575*0eae32dcSDimitry Andric if (UnknownSuccs.size() + 1 != AcyclicOrder.size()) 576*0eae32dcSDimitry Andric return false; 577*0eae32dcSDimitry Andric UnknownSuccs = AcyclicOrder; 578*0eae32dcSDimitry Andric return true; 579*0eae32dcSDimitry Andric } 580*0eae32dcSDimitry Andric 581*0eae32dcSDimitry Andric /// Rebalance a given subgraph. 582*0eae32dcSDimitry Andric void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock, 583*0eae32dcSDimitry Andric std::vector<FlowBlock *> &UnknownSuccs) { 584*0eae32dcSDimitry Andric assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); 585*0eae32dcSDimitry Andric assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns"); 586*0eae32dcSDimitry Andric 587*0eae32dcSDimitry Andric for (auto Block : UnknownSuccs) { 588*0eae32dcSDimitry Andric // Block's flow is the sum of incoming flows 589*0eae32dcSDimitry Andric uint64_t TotalFlow = 0; 590*0eae32dcSDimitry Andric if (Block == SrcBlock) { 591*0eae32dcSDimitry Andric TotalFlow = Block->Flow; 592*0eae32dcSDimitry Andric } else { 593*0eae32dcSDimitry Andric for (auto Jump : Block->PredJumps) { 594*0eae32dcSDimitry Andric TotalFlow += Jump->Flow; 595*0eae32dcSDimitry Andric } 596*0eae32dcSDimitry Andric Block->Flow = TotalFlow; 597*0eae32dcSDimitry Andric } 598*0eae32dcSDimitry Andric 599*0eae32dcSDimitry Andric // Process all successor jumps and update corresponding flow values 600*0eae32dcSDimitry Andric for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) { 601*0eae32dcSDimitry Andric auto Jump = Block->SuccJumps[I]; 602*0eae32dcSDimitry Andric if (I + 1 == Block->SuccJumps.size()) { 603*0eae32dcSDimitry Andric Jump->Flow = TotalFlow; 604*0eae32dcSDimitry Andric continue; 605*0eae32dcSDimitry Andric } 606*0eae32dcSDimitry Andric uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability); 607*0eae32dcSDimitry Andric Jump->Flow = Flow; 608*0eae32dcSDimitry Andric TotalFlow -= Flow; 609*0eae32dcSDimitry Andric } 610*0eae32dcSDimitry Andric } 611*0eae32dcSDimitry Andric } 612*0eae32dcSDimitry Andric 613*0eae32dcSDimitry Andric /// A constant indicating an arbitrary exit block of a function. 614*0eae32dcSDimitry Andric static constexpr uint64_t AnyExitBlock = uint64_t(-1); 615*0eae32dcSDimitry Andric 616*0eae32dcSDimitry Andric /// The function. 617*0eae32dcSDimitry Andric FlowFunction &Func; 618*0eae32dcSDimitry Andric }; 619*0eae32dcSDimitry Andric 6204824e7fdSDimitry Andric /// Initializing flow network for a given function. 6214824e7fdSDimitry Andric /// 6224824e7fdSDimitry Andric /// Every block is split into three nodes that are responsible for (i) an 6234824e7fdSDimitry Andric /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 6244824e7fdSDimitry Andric /// reduction of the block weight. 6254824e7fdSDimitry Andric void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 6264824e7fdSDimitry Andric uint64_t NumBlocks = Func.Blocks.size(); 6274824e7fdSDimitry Andric assert(NumBlocks > 1 && "Too few blocks in a function"); 6284824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 6294824e7fdSDimitry Andric 6304824e7fdSDimitry Andric // Pre-process data: make sure the entry weight is at least 1 6314824e7fdSDimitry Andric if (Func.Blocks[Func.Entry].Weight == 0) { 6324824e7fdSDimitry Andric Func.Blocks[Func.Entry].Weight = 1; 6334824e7fdSDimitry Andric } 6344824e7fdSDimitry Andric // Introducing dummy source/sink pairs to allow flow circulation. 6354824e7fdSDimitry Andric // The nodes corresponding to blocks of Func have indicies in the range 6364824e7fdSDimitry Andric // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 6374824e7fdSDimitry Andric uint64_t S = 3 * NumBlocks; 6384824e7fdSDimitry Andric uint64_t T = S + 1; 6394824e7fdSDimitry Andric uint64_t S1 = S + 2; 6404824e7fdSDimitry Andric uint64_t T1 = S + 3; 6414824e7fdSDimitry Andric 6424824e7fdSDimitry Andric Network.initialize(3 * NumBlocks + 4, S1, T1); 6434824e7fdSDimitry Andric 6444824e7fdSDimitry Andric // Create three nodes for every block of the function 6454824e7fdSDimitry Andric for (uint64_t B = 0; B < NumBlocks; B++) { 6464824e7fdSDimitry Andric auto &Block = Func.Blocks[B]; 6474824e7fdSDimitry Andric assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 6484824e7fdSDimitry Andric "non-zero weight of a block w/o weight except for an entry"); 6494824e7fdSDimitry Andric 6504824e7fdSDimitry Andric // Split every block into two nodes 6514824e7fdSDimitry Andric uint64_t Bin = 3 * B; 6524824e7fdSDimitry Andric uint64_t Bout = 3 * B + 1; 6534824e7fdSDimitry Andric uint64_t Baux = 3 * B + 2; 6544824e7fdSDimitry Andric if (Block.Weight > 0) { 6554824e7fdSDimitry Andric Network.addEdge(S1, Bout, Block.Weight, 0); 6564824e7fdSDimitry Andric Network.addEdge(Bin, T1, Block.Weight, 0); 6574824e7fdSDimitry Andric } 6584824e7fdSDimitry Andric 6594824e7fdSDimitry Andric // Edges from S and to T 6604824e7fdSDimitry Andric assert((!Block.isEntry() || !Block.isExit()) && 6614824e7fdSDimitry Andric "a block cannot be an entry and an exit"); 6624824e7fdSDimitry Andric if (Block.isEntry()) { 6634824e7fdSDimitry Andric Network.addEdge(S, Bin, 0); 6644824e7fdSDimitry Andric } else if (Block.isExit()) { 6654824e7fdSDimitry Andric Network.addEdge(Bout, T, 0); 6664824e7fdSDimitry Andric } 6674824e7fdSDimitry Andric 6684824e7fdSDimitry Andric // An auxiliary node to allow increase/reduction of block counts: 6694824e7fdSDimitry Andric // We assume that decreasing block counts is more expensive than increasing, 6704824e7fdSDimitry Andric // and thus, setting separate costs here. In the future we may want to tune 6714824e7fdSDimitry Andric // the relative costs so as to maximize the quality of generated profiles. 6724824e7fdSDimitry Andric int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; 6734824e7fdSDimitry Andric int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; 6744824e7fdSDimitry Andric if (Block.UnknownWeight) { 6754824e7fdSDimitry Andric // Do not penalize changing weights of blocks w/o known profile count 6764824e7fdSDimitry Andric AuxCostInc = 0; 6774824e7fdSDimitry Andric AuxCostDec = 0; 6784824e7fdSDimitry Andric } else { 6794824e7fdSDimitry Andric // Increasing the count for "cold" blocks with zero initial count is more 6804824e7fdSDimitry Andric // expensive than for "hot" ones 6814824e7fdSDimitry Andric if (Block.Weight == 0) { 6824824e7fdSDimitry Andric AuxCostInc = MinCostMaxFlow::AuxCostIncZero; 6834824e7fdSDimitry Andric } 6844824e7fdSDimitry Andric // Modifying the count of the entry block is expensive 6854824e7fdSDimitry Andric if (Block.isEntry()) { 6864824e7fdSDimitry Andric AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; 6874824e7fdSDimitry Andric AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; 6884824e7fdSDimitry Andric } 6894824e7fdSDimitry Andric } 6904824e7fdSDimitry Andric // For blocks with self-edges, do not penalize a reduction of the count, 6914824e7fdSDimitry Andric // as all of the increase can be attributed to the self-edge 6924824e7fdSDimitry Andric if (Block.HasSelfEdge) { 6934824e7fdSDimitry Andric AuxCostDec = 0; 6944824e7fdSDimitry Andric } 6954824e7fdSDimitry Andric 6964824e7fdSDimitry Andric Network.addEdge(Bin, Baux, AuxCostInc); 6974824e7fdSDimitry Andric Network.addEdge(Baux, Bout, AuxCostInc); 6984824e7fdSDimitry Andric if (Block.Weight > 0) { 6994824e7fdSDimitry Andric Network.addEdge(Bout, Baux, AuxCostDec); 7004824e7fdSDimitry Andric Network.addEdge(Baux, Bin, AuxCostDec); 7014824e7fdSDimitry Andric } 7024824e7fdSDimitry Andric } 7034824e7fdSDimitry Andric 7044824e7fdSDimitry Andric // Creating edges for every jump 7054824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 7064824e7fdSDimitry Andric uint64_t Src = Jump.Source; 7074824e7fdSDimitry Andric uint64_t Dst = Jump.Target; 7084824e7fdSDimitry Andric if (Src != Dst) { 7094824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 7104824e7fdSDimitry Andric uint64_t DstIn = 3 * Dst; 7114824e7fdSDimitry Andric uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 7124824e7fdSDimitry Andric Network.addEdge(SrcOut, DstIn, Cost); 7134824e7fdSDimitry Andric } 7144824e7fdSDimitry Andric } 7154824e7fdSDimitry Andric 7164824e7fdSDimitry Andric // Make sure we have a valid flow circulation 7174824e7fdSDimitry Andric Network.addEdge(T, S, 0); 7184824e7fdSDimitry Andric } 7194824e7fdSDimitry Andric 7204824e7fdSDimitry Andric /// Extract resulting block and edge counts from the flow network. 7214824e7fdSDimitry Andric void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 7224824e7fdSDimitry Andric uint64_t NumBlocks = Func.Blocks.size(); 7234824e7fdSDimitry Andric 7244824e7fdSDimitry Andric // Extract resulting block counts 7254824e7fdSDimitry Andric for (uint64_t Src = 0; Src < NumBlocks; Src++) { 7264824e7fdSDimitry Andric auto &Block = Func.Blocks[Src]; 7274824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 7284824e7fdSDimitry Andric int64_t Flow = 0; 7294824e7fdSDimitry Andric for (auto &Adj : Network.getFlow(SrcOut)) { 7304824e7fdSDimitry Andric uint64_t DstIn = Adj.first; 7314824e7fdSDimitry Andric int64_t DstFlow = Adj.second; 7324824e7fdSDimitry Andric bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 7334824e7fdSDimitry Andric if (!IsAuxNode || Block.HasSelfEdge) { 7344824e7fdSDimitry Andric Flow += DstFlow; 7354824e7fdSDimitry Andric } 7364824e7fdSDimitry Andric } 7374824e7fdSDimitry Andric Block.Flow = Flow; 7384824e7fdSDimitry Andric assert(Flow >= 0 && "negative block flow"); 7394824e7fdSDimitry Andric } 7404824e7fdSDimitry Andric 7414824e7fdSDimitry Andric // Extract resulting jump counts 7424824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 7434824e7fdSDimitry Andric uint64_t Src = Jump.Source; 7444824e7fdSDimitry Andric uint64_t Dst = Jump.Target; 7454824e7fdSDimitry Andric int64_t Flow = 0; 7464824e7fdSDimitry Andric if (Src != Dst) { 7474824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 7484824e7fdSDimitry Andric uint64_t DstIn = 3 * Dst; 7494824e7fdSDimitry Andric Flow = Network.getFlow(SrcOut, DstIn); 7504824e7fdSDimitry Andric } else { 7514824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 7524824e7fdSDimitry Andric uint64_t SrcAux = 3 * Src + 2; 7534824e7fdSDimitry Andric int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 7544824e7fdSDimitry Andric if (AuxFlow > 0) 7554824e7fdSDimitry Andric Flow = AuxFlow; 7564824e7fdSDimitry Andric } 7574824e7fdSDimitry Andric Jump.Flow = Flow; 7584824e7fdSDimitry Andric assert(Flow >= 0 && "negative jump flow"); 7594824e7fdSDimitry Andric } 7604824e7fdSDimitry Andric } 7614824e7fdSDimitry Andric 7624824e7fdSDimitry Andric #ifndef NDEBUG 7634824e7fdSDimitry Andric /// Verify that the computed flow values satisfy flow conservation rules 7644824e7fdSDimitry Andric void verifyWeights(const FlowFunction &Func) { 7654824e7fdSDimitry Andric const uint64_t NumBlocks = Func.Blocks.size(); 7664824e7fdSDimitry Andric auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 7674824e7fdSDimitry Andric auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 7684824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 7694824e7fdSDimitry Andric InFlow[Jump.Target] += Jump.Flow; 7704824e7fdSDimitry Andric OutFlow[Jump.Source] += Jump.Flow; 7714824e7fdSDimitry Andric } 7724824e7fdSDimitry Andric 7734824e7fdSDimitry Andric uint64_t TotalInFlow = 0; 7744824e7fdSDimitry Andric uint64_t TotalOutFlow = 0; 7754824e7fdSDimitry Andric for (uint64_t I = 0; I < NumBlocks; I++) { 7764824e7fdSDimitry Andric auto &Block = Func.Blocks[I]; 7774824e7fdSDimitry Andric if (Block.isEntry()) { 7784824e7fdSDimitry Andric TotalInFlow += Block.Flow; 7794824e7fdSDimitry Andric assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 7804824e7fdSDimitry Andric } else if (Block.isExit()) { 7814824e7fdSDimitry Andric TotalOutFlow += Block.Flow; 7824824e7fdSDimitry Andric assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 7834824e7fdSDimitry Andric } else { 7844824e7fdSDimitry Andric assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 7854824e7fdSDimitry Andric assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 7864824e7fdSDimitry Andric } 7874824e7fdSDimitry Andric } 7884824e7fdSDimitry Andric assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 789*0eae32dcSDimitry Andric 790*0eae32dcSDimitry Andric // Verify that there are no isolated flow components 791*0eae32dcSDimitry Andric // One could modify FlowFunction to hold edges indexed by the sources, which 792*0eae32dcSDimitry Andric // will avoid a creation of the object 793*0eae32dcSDimitry Andric auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); 794*0eae32dcSDimitry Andric for (auto &Jump : Func.Jumps) { 795*0eae32dcSDimitry Andric if (Jump.Flow > 0) { 796*0eae32dcSDimitry Andric PositiveFlowEdges[Jump.Source].push_back(Jump.Target); 797*0eae32dcSDimitry Andric } 798*0eae32dcSDimitry Andric } 799*0eae32dcSDimitry Andric 800*0eae32dcSDimitry Andric // Run BFS from the source along edges with positive flow 801*0eae32dcSDimitry Andric std::queue<uint64_t> Queue; 802*0eae32dcSDimitry Andric auto Visited = std::vector<bool>(NumBlocks, false); 803*0eae32dcSDimitry Andric Queue.push(Func.Entry); 804*0eae32dcSDimitry Andric Visited[Func.Entry] = true; 805*0eae32dcSDimitry Andric while (!Queue.empty()) { 806*0eae32dcSDimitry Andric uint64_t Src = Queue.front(); 807*0eae32dcSDimitry Andric Queue.pop(); 808*0eae32dcSDimitry Andric for (uint64_t Dst : PositiveFlowEdges[Src]) { 809*0eae32dcSDimitry Andric if (!Visited[Dst]) { 810*0eae32dcSDimitry Andric Queue.push(Dst); 811*0eae32dcSDimitry Andric Visited[Dst] = true; 812*0eae32dcSDimitry Andric } 813*0eae32dcSDimitry Andric } 814*0eae32dcSDimitry Andric } 815*0eae32dcSDimitry Andric 816*0eae32dcSDimitry Andric // Verify that every block that has a positive flow is reached from the source 817*0eae32dcSDimitry Andric // along edges with a positive flow 818*0eae32dcSDimitry Andric for (uint64_t I = 0; I < NumBlocks; I++) { 819*0eae32dcSDimitry Andric auto &Block = Func.Blocks[I]; 820*0eae32dcSDimitry Andric assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); 821*0eae32dcSDimitry Andric } 8224824e7fdSDimitry Andric } 8234824e7fdSDimitry Andric #endif 8244824e7fdSDimitry Andric 8254824e7fdSDimitry Andric } // end of anonymous namespace 8264824e7fdSDimitry Andric 8274824e7fdSDimitry Andric /// Apply the profile inference algorithm for a given flow function 8284824e7fdSDimitry Andric void llvm::applyFlowInference(FlowFunction &Func) { 8294824e7fdSDimitry Andric // Create and apply an inference network model 8304824e7fdSDimitry Andric auto InferenceNetwork = MinCostMaxFlow(); 8314824e7fdSDimitry Andric initializeNetwork(InferenceNetwork, Func); 8324824e7fdSDimitry Andric InferenceNetwork.run(); 8334824e7fdSDimitry Andric 8344824e7fdSDimitry Andric // Extract flow values for every block and every edge 8354824e7fdSDimitry Andric extractWeights(InferenceNetwork, Func); 8364824e7fdSDimitry Andric 837*0eae32dcSDimitry Andric // Post-processing adjustments to the flow 838*0eae32dcSDimitry Andric auto Adjuster = FlowAdjuster(Func); 839*0eae32dcSDimitry Andric Adjuster.run(); 840*0eae32dcSDimitry Andric 8414824e7fdSDimitry Andric #ifndef NDEBUG 8424824e7fdSDimitry Andric // Verify the result 8434824e7fdSDimitry Andric verifyWeights(Func); 8444824e7fdSDimitry Andric #endif 8454824e7fdSDimitry Andric } 846