14824e7fdSDimitry Andric //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 24824e7fdSDimitry Andric // 34824e7fdSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 44824e7fdSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 54824e7fdSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 64824e7fdSDimitry Andric // 74824e7fdSDimitry Andric //===----------------------------------------------------------------------===// 84824e7fdSDimitry Andric // 94824e7fdSDimitry Andric // This file implements a profile inference algorithm. Given an incomplete and 104824e7fdSDimitry Andric // possibly imprecise block counts, the algorithm reconstructs realistic block 114824e7fdSDimitry Andric // and edge counts that satisfy flow conservation rules, while minimally modify 124824e7fdSDimitry Andric // input block counts. 134824e7fdSDimitry Andric // 144824e7fdSDimitry Andric //===----------------------------------------------------------------------===// 154824e7fdSDimitry Andric 164824e7fdSDimitry Andric #include "llvm/Transforms/Utils/SampleProfileInference.h" 17*04eeddc0SDimitry Andric #include "llvm/ADT/BitVector.h" 184824e7fdSDimitry Andric #include "llvm/Support/Debug.h" 194824e7fdSDimitry Andric #include <queue> 204824e7fdSDimitry Andric #include <set> 214824e7fdSDimitry Andric 224824e7fdSDimitry Andric using namespace llvm; 234824e7fdSDimitry Andric #define DEBUG_TYPE "sample-profile-inference" 244824e7fdSDimitry Andric 254824e7fdSDimitry Andric namespace { 264824e7fdSDimitry Andric 274824e7fdSDimitry Andric /// A value indicating an infinite flow/capacity/weight of a block/edge. 284824e7fdSDimitry Andric /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 294824e7fdSDimitry Andric /// during the execution. 304824e7fdSDimitry Andric static constexpr int64_t INF = ((int64_t)1) << 50; 314824e7fdSDimitry Andric 324824e7fdSDimitry Andric /// The minimum-cost maximum flow algorithm. 334824e7fdSDimitry Andric /// 344824e7fdSDimitry Andric /// The algorithm finds the maximum flow of minimum cost on a given (directed) 354824e7fdSDimitry Andric /// network using a modified version of the classical Moore-Bellman-Ford 364824e7fdSDimitry Andric /// approach. The algorithm applies a number of augmentation iterations in which 374824e7fdSDimitry Andric /// flow is sent along paths of positive capacity from the source to the sink. 384824e7fdSDimitry Andric /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 394824e7fdSDimitry Andric /// where m is the number of edges, n is the number of vertices, and v(f) is the 404824e7fdSDimitry Andric /// value of the maximum flow. However, the observed running time on typical 414824e7fdSDimitry Andric /// instances is sub-quadratic, that is, o(n^2). 424824e7fdSDimitry Andric /// 434824e7fdSDimitry Andric /// The input is a set of edges with specified costs and capacities, and a pair 444824e7fdSDimitry Andric /// of nodes (source and sink). The output is the flow along each edge of the 454824e7fdSDimitry Andric /// minimum total cost respecting the given edge capacities. 464824e7fdSDimitry Andric class MinCostMaxFlow { 474824e7fdSDimitry Andric public: 484824e7fdSDimitry Andric // Initialize algorithm's data structures for a network of a given size. 494824e7fdSDimitry Andric void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 504824e7fdSDimitry Andric Source = SourceNode; 514824e7fdSDimitry Andric Target = SinkNode; 524824e7fdSDimitry Andric 534824e7fdSDimitry Andric Nodes = std::vector<Node>(NodeCount); 544824e7fdSDimitry Andric Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 554824e7fdSDimitry Andric } 564824e7fdSDimitry Andric 574824e7fdSDimitry Andric // Run the algorithm. 584824e7fdSDimitry Andric int64_t run() { 594824e7fdSDimitry Andric // Find an augmenting path and update the flow along the path 604824e7fdSDimitry Andric size_t AugmentationIters = 0; 614824e7fdSDimitry Andric while (findAugmentingPath()) { 624824e7fdSDimitry Andric augmentFlowAlongPath(); 634824e7fdSDimitry Andric AugmentationIters++; 644824e7fdSDimitry Andric } 654824e7fdSDimitry Andric 664824e7fdSDimitry Andric // Compute the total flow and its cost 674824e7fdSDimitry Andric int64_t TotalCost = 0; 684824e7fdSDimitry Andric int64_t TotalFlow = 0; 694824e7fdSDimitry Andric for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 704824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 714824e7fdSDimitry Andric if (Edge.Flow > 0) { 724824e7fdSDimitry Andric TotalCost += Edge.Cost * Edge.Flow; 734824e7fdSDimitry Andric if (Src == Source) 744824e7fdSDimitry Andric TotalFlow += Edge.Flow; 754824e7fdSDimitry Andric } 764824e7fdSDimitry Andric } 774824e7fdSDimitry Andric } 784824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 794824e7fdSDimitry Andric << " iterations with " << TotalFlow << " total flow" 804824e7fdSDimitry Andric << " of " << TotalCost << " cost\n"); 814824e7fdSDimitry Andric (void)TotalFlow; 824824e7fdSDimitry Andric return TotalCost; 834824e7fdSDimitry Andric } 844824e7fdSDimitry Andric 854824e7fdSDimitry Andric /// Adding an edge to the network with a specified capacity and a cost. 864824e7fdSDimitry Andric /// Multiple edges between a pair of nodes are allowed but self-edges 874824e7fdSDimitry Andric /// are not supported. 884824e7fdSDimitry Andric void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 894824e7fdSDimitry Andric assert(Capacity > 0 && "adding an edge of zero capacity"); 904824e7fdSDimitry Andric assert(Src != Dst && "loop edge are not supported"); 914824e7fdSDimitry Andric 924824e7fdSDimitry Andric Edge SrcEdge; 934824e7fdSDimitry Andric SrcEdge.Dst = Dst; 944824e7fdSDimitry Andric SrcEdge.Cost = Cost; 954824e7fdSDimitry Andric SrcEdge.Capacity = Capacity; 964824e7fdSDimitry Andric SrcEdge.Flow = 0; 974824e7fdSDimitry Andric SrcEdge.RevEdgeIndex = Edges[Dst].size(); 984824e7fdSDimitry Andric 994824e7fdSDimitry Andric Edge DstEdge; 1004824e7fdSDimitry Andric DstEdge.Dst = Src; 1014824e7fdSDimitry Andric DstEdge.Cost = -Cost; 1024824e7fdSDimitry Andric DstEdge.Capacity = 0; 1034824e7fdSDimitry Andric DstEdge.Flow = 0; 1044824e7fdSDimitry Andric DstEdge.RevEdgeIndex = Edges[Src].size(); 1054824e7fdSDimitry Andric 1064824e7fdSDimitry Andric Edges[Src].push_back(SrcEdge); 1074824e7fdSDimitry Andric Edges[Dst].push_back(DstEdge); 1084824e7fdSDimitry Andric } 1094824e7fdSDimitry Andric 1104824e7fdSDimitry Andric /// Adding an edge to the network of infinite capacity and a given cost. 1114824e7fdSDimitry Andric void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 1124824e7fdSDimitry Andric addEdge(Src, Dst, INF, Cost); 1134824e7fdSDimitry Andric } 1144824e7fdSDimitry Andric 1154824e7fdSDimitry Andric /// Get the total flow from a given source node. 1164824e7fdSDimitry Andric /// Returns a list of pairs (target node, amount of flow to the target). 1174824e7fdSDimitry Andric const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 1184824e7fdSDimitry Andric std::vector<std::pair<uint64_t, int64_t>> Flow; 1194824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 1204824e7fdSDimitry Andric if (Edge.Flow > 0) 1214824e7fdSDimitry Andric Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 1224824e7fdSDimitry Andric } 1234824e7fdSDimitry Andric return Flow; 1244824e7fdSDimitry Andric } 1254824e7fdSDimitry Andric 1264824e7fdSDimitry Andric /// Get the total flow between a pair of nodes. 1274824e7fdSDimitry Andric int64_t getFlow(uint64_t Src, uint64_t Dst) const { 1284824e7fdSDimitry Andric int64_t Flow = 0; 1294824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 1304824e7fdSDimitry Andric if (Edge.Dst == Dst) { 1314824e7fdSDimitry Andric Flow += Edge.Flow; 1324824e7fdSDimitry Andric } 1334824e7fdSDimitry Andric } 1344824e7fdSDimitry Andric return Flow; 1354824e7fdSDimitry Andric } 1364824e7fdSDimitry Andric 1374824e7fdSDimitry Andric /// A cost of increasing a block's count by one. 1384824e7fdSDimitry Andric static constexpr int64_t AuxCostInc = 10; 1394824e7fdSDimitry Andric /// A cost of decreasing a block's count by one. 1404824e7fdSDimitry Andric static constexpr int64_t AuxCostDec = 20; 1414824e7fdSDimitry Andric /// A cost of increasing a count of zero-weight block by one. 1424824e7fdSDimitry Andric static constexpr int64_t AuxCostIncZero = 11; 1434824e7fdSDimitry Andric /// A cost of increasing the entry block's count by one. 1444824e7fdSDimitry Andric static constexpr int64_t AuxCostIncEntry = 40; 1454824e7fdSDimitry Andric /// A cost of decreasing the entry block's count by one. 1464824e7fdSDimitry Andric static constexpr int64_t AuxCostDecEntry = 10; 1474824e7fdSDimitry Andric /// A cost of taking an unlikely jump. 148*04eeddc0SDimitry Andric static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; 1494824e7fdSDimitry Andric 1504824e7fdSDimitry Andric private: 1514824e7fdSDimitry Andric /// Check for existence of an augmenting path with a positive capacity. 1524824e7fdSDimitry Andric bool findAugmentingPath() { 1534824e7fdSDimitry Andric // Initialize data structures 1544824e7fdSDimitry Andric for (auto &Node : Nodes) { 1554824e7fdSDimitry Andric Node.Distance = INF; 1564824e7fdSDimitry Andric Node.ParentNode = uint64_t(-1); 1574824e7fdSDimitry Andric Node.ParentEdgeIndex = uint64_t(-1); 1584824e7fdSDimitry Andric Node.Taken = false; 1594824e7fdSDimitry Andric } 1604824e7fdSDimitry Andric 1614824e7fdSDimitry Andric std::queue<uint64_t> Queue; 1624824e7fdSDimitry Andric Queue.push(Source); 1634824e7fdSDimitry Andric Nodes[Source].Distance = 0; 1644824e7fdSDimitry Andric Nodes[Source].Taken = true; 1654824e7fdSDimitry Andric while (!Queue.empty()) { 1664824e7fdSDimitry Andric uint64_t Src = Queue.front(); 1674824e7fdSDimitry Andric Queue.pop(); 1684824e7fdSDimitry Andric Nodes[Src].Taken = false; 1694824e7fdSDimitry Andric // Although the residual network contains edges with negative costs 1704824e7fdSDimitry Andric // (in particular, backward edges), it can be shown that there are no 1714824e7fdSDimitry Andric // negative-weight cycles and the following two invariants are maintained: 1724824e7fdSDimitry Andric // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 1734824e7fdSDimitry Andric // where Dist is the length of the shortest path between two nodes. This 1744824e7fdSDimitry Andric // allows to prune the search-space of the path-finding algorithm using 1754824e7fdSDimitry Andric // the following early-stop criteria: 1764824e7fdSDimitry Andric // -- If we find a path with zero-distance from Source to Target, stop the 1774824e7fdSDimitry Andric // search, as the path is the shortest since Dist[Source, Target] >= 0; 1784824e7fdSDimitry Andric // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 1794824e7fdSDimitry Andric // process node V, as it is guaranteed _not_ to be on a shortest path 1804824e7fdSDimitry Andric // from Source to Target; it follows from inequalities 1814824e7fdSDimitry Andric // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 1824824e7fdSDimitry Andric // >= Dist[Source, V] 1834824e7fdSDimitry Andric if (Nodes[Target].Distance == 0) 1844824e7fdSDimitry Andric break; 1854824e7fdSDimitry Andric if (Nodes[Src].Distance > Nodes[Target].Distance) 1864824e7fdSDimitry Andric continue; 1874824e7fdSDimitry Andric 1884824e7fdSDimitry Andric // Process adjacent edges 1894824e7fdSDimitry Andric for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 1904824e7fdSDimitry Andric auto &Edge = Edges[Src][EdgeIdx]; 1914824e7fdSDimitry Andric if (Edge.Flow < Edge.Capacity) { 1924824e7fdSDimitry Andric uint64_t Dst = Edge.Dst; 1934824e7fdSDimitry Andric int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 1944824e7fdSDimitry Andric if (Nodes[Dst].Distance > NewDistance) { 1954824e7fdSDimitry Andric // Update the distance and the parent node/edge 1964824e7fdSDimitry Andric Nodes[Dst].Distance = NewDistance; 1974824e7fdSDimitry Andric Nodes[Dst].ParentNode = Src; 1984824e7fdSDimitry Andric Nodes[Dst].ParentEdgeIndex = EdgeIdx; 1994824e7fdSDimitry Andric // Add the node to the queue, if it is not there yet 2004824e7fdSDimitry Andric if (!Nodes[Dst].Taken) { 2014824e7fdSDimitry Andric Queue.push(Dst); 2024824e7fdSDimitry Andric Nodes[Dst].Taken = true; 2034824e7fdSDimitry Andric } 2044824e7fdSDimitry Andric } 2054824e7fdSDimitry Andric } 2064824e7fdSDimitry Andric } 2074824e7fdSDimitry Andric } 2084824e7fdSDimitry Andric 2094824e7fdSDimitry Andric return Nodes[Target].Distance != INF; 2104824e7fdSDimitry Andric } 2114824e7fdSDimitry Andric 2124824e7fdSDimitry Andric /// Update the current flow along the augmenting path. 2134824e7fdSDimitry Andric void augmentFlowAlongPath() { 2144824e7fdSDimitry Andric // Find path capacity 2154824e7fdSDimitry Andric int64_t PathCapacity = INF; 2164824e7fdSDimitry Andric uint64_t Now = Target; 2174824e7fdSDimitry Andric while (Now != Source) { 2184824e7fdSDimitry Andric uint64_t Pred = Nodes[Now].ParentNode; 2194824e7fdSDimitry Andric auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 2204824e7fdSDimitry Andric PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow); 2214824e7fdSDimitry Andric Now = Pred; 2224824e7fdSDimitry Andric } 2234824e7fdSDimitry Andric 2240eae32dcSDimitry Andric assert(PathCapacity > 0 && "found an incorrect augmenting path"); 2254824e7fdSDimitry Andric 2264824e7fdSDimitry Andric // Update the flow along the path 2274824e7fdSDimitry Andric Now = Target; 2284824e7fdSDimitry Andric while (Now != Source) { 2294824e7fdSDimitry Andric uint64_t Pred = Nodes[Now].ParentNode; 2304824e7fdSDimitry Andric auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 2314824e7fdSDimitry Andric auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 2324824e7fdSDimitry Andric 2334824e7fdSDimitry Andric Edge.Flow += PathCapacity; 2344824e7fdSDimitry Andric RevEdge.Flow -= PathCapacity; 2354824e7fdSDimitry Andric 2364824e7fdSDimitry Andric Now = Pred; 2374824e7fdSDimitry Andric } 2384824e7fdSDimitry Andric } 2394824e7fdSDimitry Andric 240*04eeddc0SDimitry Andric /// A node in a flow network. 2414824e7fdSDimitry Andric struct Node { 2424824e7fdSDimitry Andric /// The cost of the cheapest path from the source to the current node. 2434824e7fdSDimitry Andric int64_t Distance; 2444824e7fdSDimitry Andric /// The node preceding the current one in the path. 2454824e7fdSDimitry Andric uint64_t ParentNode; 2464824e7fdSDimitry Andric /// The index of the edge between ParentNode and the current node. 2474824e7fdSDimitry Andric uint64_t ParentEdgeIndex; 2484824e7fdSDimitry Andric /// An indicator of whether the current node is in a queue. 2494824e7fdSDimitry Andric bool Taken; 2504824e7fdSDimitry Andric }; 2514824e7fdSDimitry Andric /// An edge in a flow network. 2524824e7fdSDimitry Andric struct Edge { 2534824e7fdSDimitry Andric /// The cost of the edge. 2544824e7fdSDimitry Andric int64_t Cost; 2554824e7fdSDimitry Andric /// The capacity of the edge. 2564824e7fdSDimitry Andric int64_t Capacity; 2574824e7fdSDimitry Andric /// The current flow on the edge. 2584824e7fdSDimitry Andric int64_t Flow; 2594824e7fdSDimitry Andric /// The destination node of the edge. 2604824e7fdSDimitry Andric uint64_t Dst; 2614824e7fdSDimitry Andric /// The index of the reverse edge between Dst and the current node. 2624824e7fdSDimitry Andric uint64_t RevEdgeIndex; 2634824e7fdSDimitry Andric }; 2644824e7fdSDimitry Andric 2654824e7fdSDimitry Andric /// The set of network nodes. 2664824e7fdSDimitry Andric std::vector<Node> Nodes; 2674824e7fdSDimitry Andric /// The set of network edges. 2684824e7fdSDimitry Andric std::vector<std::vector<Edge>> Edges; 2694824e7fdSDimitry Andric /// Source node of the flow. 2704824e7fdSDimitry Andric uint64_t Source; 2714824e7fdSDimitry Andric /// Target (sink) node of the flow. 2724824e7fdSDimitry Andric uint64_t Target; 2734824e7fdSDimitry Andric }; 2744824e7fdSDimitry Andric 2750eae32dcSDimitry Andric /// A post-processing adjustment of control flow. It applies two steps by 2760eae32dcSDimitry Andric /// rerouting some flow and making it more realistic: 2770eae32dcSDimitry Andric /// 2780eae32dcSDimitry Andric /// - First, it removes all isolated components ("islands") with a positive flow 2790eae32dcSDimitry Andric /// that are unreachable from the entry block. For every such component, we 2800eae32dcSDimitry Andric /// find the shortest from the entry to an exit passing through the component, 2810eae32dcSDimitry Andric /// and increase the flow by one unit along the path. 2820eae32dcSDimitry Andric /// 2830eae32dcSDimitry Andric /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks 2840eae32dcSDimitry Andric /// with no sampled counts. Then it rebalnces the flow that goes through such 2850eae32dcSDimitry Andric /// a subgraph so that each branch is taken with probability 50%. 2860eae32dcSDimitry Andric /// An unknown subgraph is such that for every two nodes u and v: 2870eae32dcSDimitry Andric /// - u dominates v and u is not unknown; 2880eae32dcSDimitry Andric /// - v post-dominates u; and 2890eae32dcSDimitry Andric /// - all inner-nodes of all (u,v)-paths are unknown. 2900eae32dcSDimitry Andric /// 2910eae32dcSDimitry Andric class FlowAdjuster { 2920eae32dcSDimitry Andric public: 2930eae32dcSDimitry Andric FlowAdjuster(FlowFunction &Func) : Func(Func) { 2940eae32dcSDimitry Andric assert(Func.Blocks[Func.Entry].isEntry() && 2950eae32dcSDimitry Andric "incorrect index of the entry block"); 2960eae32dcSDimitry Andric } 2970eae32dcSDimitry Andric 2980eae32dcSDimitry Andric // Run the post-processing 2990eae32dcSDimitry Andric void run() { 3000eae32dcSDimitry Andric /// Adjust the flow to get rid of isolated components. 3010eae32dcSDimitry Andric joinIsolatedComponents(); 3020eae32dcSDimitry Andric 3030eae32dcSDimitry Andric /// Rebalance the flow inside unknown subgraphs. 3040eae32dcSDimitry Andric rebalanceUnknownSubgraphs(); 3050eae32dcSDimitry Andric } 3060eae32dcSDimitry Andric 3070eae32dcSDimitry Andric private: 3080eae32dcSDimitry Andric void joinIsolatedComponents() { 3090eae32dcSDimitry Andric // Find blocks that are reachable from the source 310*04eeddc0SDimitry Andric auto Visited = BitVector(NumBlocks(), false); 3110eae32dcSDimitry Andric findReachable(Func.Entry, Visited); 3120eae32dcSDimitry Andric 3130eae32dcSDimitry Andric // Iterate over all non-reachable blocks and adjust their weights 3140eae32dcSDimitry Andric for (uint64_t I = 0; I < NumBlocks(); I++) { 3150eae32dcSDimitry Andric auto &Block = Func.Blocks[I]; 3160eae32dcSDimitry Andric if (Block.Flow > 0 && !Visited[I]) { 3170eae32dcSDimitry Andric // Find a path from the entry to an exit passing through the block I 3180eae32dcSDimitry Andric auto Path = findShortestPath(I); 3190eae32dcSDimitry Andric // Increase the flow along the path 3200eae32dcSDimitry Andric assert(Path.size() > 0 && Path[0]->Source == Func.Entry && 3210eae32dcSDimitry Andric "incorrectly computed path adjusting control flow"); 3220eae32dcSDimitry Andric Func.Blocks[Func.Entry].Flow += 1; 3230eae32dcSDimitry Andric for (auto &Jump : Path) { 3240eae32dcSDimitry Andric Jump->Flow += 1; 3250eae32dcSDimitry Andric Func.Blocks[Jump->Target].Flow += 1; 3260eae32dcSDimitry Andric // Update reachability 3270eae32dcSDimitry Andric findReachable(Jump->Target, Visited); 3280eae32dcSDimitry Andric } 3290eae32dcSDimitry Andric } 3300eae32dcSDimitry Andric } 3310eae32dcSDimitry Andric } 3320eae32dcSDimitry Andric 3330eae32dcSDimitry Andric /// Run BFS from a given block along the jumps with a positive flow and mark 3340eae32dcSDimitry Andric /// all reachable blocks. 335*04eeddc0SDimitry Andric void findReachable(uint64_t Src, BitVector &Visited) { 3360eae32dcSDimitry Andric if (Visited[Src]) 3370eae32dcSDimitry Andric return; 3380eae32dcSDimitry Andric std::queue<uint64_t> Queue; 3390eae32dcSDimitry Andric Queue.push(Src); 3400eae32dcSDimitry Andric Visited[Src] = true; 3410eae32dcSDimitry Andric while (!Queue.empty()) { 3420eae32dcSDimitry Andric Src = Queue.front(); 3430eae32dcSDimitry Andric Queue.pop(); 3440eae32dcSDimitry Andric for (auto Jump : Func.Blocks[Src].SuccJumps) { 3450eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 3460eae32dcSDimitry Andric if (Jump->Flow > 0 && !Visited[Dst]) { 3470eae32dcSDimitry Andric Queue.push(Dst); 3480eae32dcSDimitry Andric Visited[Dst] = true; 3490eae32dcSDimitry Andric } 3500eae32dcSDimitry Andric } 3510eae32dcSDimitry Andric } 3520eae32dcSDimitry Andric } 3530eae32dcSDimitry Andric 3540eae32dcSDimitry Andric /// Find the shortest path from the entry block to an exit block passing 3550eae32dcSDimitry Andric /// through a given block. 3560eae32dcSDimitry Andric std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { 3570eae32dcSDimitry Andric // A path from the entry block to BlockIdx 3580eae32dcSDimitry Andric auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); 3590eae32dcSDimitry Andric // A path from BlockIdx to an exit block 3600eae32dcSDimitry Andric auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); 3610eae32dcSDimitry Andric 3620eae32dcSDimitry Andric // Concatenate the two paths 3630eae32dcSDimitry Andric std::vector<FlowJump *> Result; 3640eae32dcSDimitry Andric Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); 3650eae32dcSDimitry Andric Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); 3660eae32dcSDimitry Andric return Result; 3670eae32dcSDimitry Andric } 3680eae32dcSDimitry Andric 3690eae32dcSDimitry Andric /// Apply the Dijkstra algorithm to find the shortest path from a given 3700eae32dcSDimitry Andric /// Source to a given Target block. 3710eae32dcSDimitry Andric /// If Target == -1, then the path ends at an exit block. 3720eae32dcSDimitry Andric std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { 3730eae32dcSDimitry Andric // Quit early, if possible 3740eae32dcSDimitry Andric if (Source == Target) 3750eae32dcSDimitry Andric return std::vector<FlowJump *>(); 3760eae32dcSDimitry Andric if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) 3770eae32dcSDimitry Andric return std::vector<FlowJump *>(); 3780eae32dcSDimitry Andric 3790eae32dcSDimitry Andric // Initialize data structures 3800eae32dcSDimitry Andric auto Distance = std::vector<int64_t>(NumBlocks(), INF); 3810eae32dcSDimitry Andric auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); 3820eae32dcSDimitry Andric Distance[Source] = 0; 3830eae32dcSDimitry Andric std::set<std::pair<uint64_t, uint64_t>> Queue; 3840eae32dcSDimitry Andric Queue.insert(std::make_pair(Distance[Source], Source)); 3850eae32dcSDimitry Andric 3860eae32dcSDimitry Andric // Run the Dijkstra algorithm 3870eae32dcSDimitry Andric while (!Queue.empty()) { 3880eae32dcSDimitry Andric uint64_t Src = Queue.begin()->second; 3890eae32dcSDimitry Andric Queue.erase(Queue.begin()); 3900eae32dcSDimitry Andric // If we found a solution, quit early 3910eae32dcSDimitry Andric if (Src == Target || 3920eae32dcSDimitry Andric (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) 3930eae32dcSDimitry Andric break; 3940eae32dcSDimitry Andric 3950eae32dcSDimitry Andric for (auto Jump : Func.Blocks[Src].SuccJumps) { 3960eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 3970eae32dcSDimitry Andric int64_t JumpDist = jumpDistance(Jump); 3980eae32dcSDimitry Andric if (Distance[Dst] > Distance[Src] + JumpDist) { 3990eae32dcSDimitry Andric Queue.erase(std::make_pair(Distance[Dst], Dst)); 4000eae32dcSDimitry Andric 4010eae32dcSDimitry Andric Distance[Dst] = Distance[Src] + JumpDist; 4020eae32dcSDimitry Andric Parent[Dst] = Jump; 4030eae32dcSDimitry Andric 4040eae32dcSDimitry Andric Queue.insert(std::make_pair(Distance[Dst], Dst)); 4050eae32dcSDimitry Andric } 4060eae32dcSDimitry Andric } 4070eae32dcSDimitry Andric } 4080eae32dcSDimitry Andric // If Target is not provided, find the closest exit block 4090eae32dcSDimitry Andric if (Target == AnyExitBlock) { 4100eae32dcSDimitry Andric for (uint64_t I = 0; I < NumBlocks(); I++) { 4110eae32dcSDimitry Andric if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { 4120eae32dcSDimitry Andric if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { 4130eae32dcSDimitry Andric Target = I; 4140eae32dcSDimitry Andric } 4150eae32dcSDimitry Andric } 4160eae32dcSDimitry Andric } 4170eae32dcSDimitry Andric } 4180eae32dcSDimitry Andric assert(Parent[Target] != nullptr && "a path does not exist"); 4190eae32dcSDimitry Andric 4200eae32dcSDimitry Andric // Extract the constructed path 4210eae32dcSDimitry Andric std::vector<FlowJump *> Result; 4220eae32dcSDimitry Andric uint64_t Now = Target; 4230eae32dcSDimitry Andric while (Now != Source) { 4240eae32dcSDimitry Andric assert(Now == Parent[Now]->Target && "incorrect parent jump"); 4250eae32dcSDimitry Andric Result.push_back(Parent[Now]); 4260eae32dcSDimitry Andric Now = Parent[Now]->Source; 4270eae32dcSDimitry Andric } 4280eae32dcSDimitry Andric // Reverse the path, since it is extracted from Target to Source 4290eae32dcSDimitry Andric std::reverse(Result.begin(), Result.end()); 4300eae32dcSDimitry Andric return Result; 4310eae32dcSDimitry Andric } 4320eae32dcSDimitry Andric 4330eae32dcSDimitry Andric /// A distance of a path for a given jump. 4340eae32dcSDimitry Andric /// In order to incite the path to use blocks/jumps with large positive flow, 4350eae32dcSDimitry Andric /// and avoid changing branch probability of outgoing edges drastically, 4360eae32dcSDimitry Andric /// set the distance as follows: 4370eae32dcSDimitry Andric /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0) 4380eae32dcSDimitry Andric /// if Block.Weight > 0, then distance = 1 4390eae32dcSDimitry Andric /// otherwise distance >> 1 4400eae32dcSDimitry Andric int64_t jumpDistance(FlowJump *Jump) const { 4410eae32dcSDimitry Andric int64_t BaseDistance = 100; 4420eae32dcSDimitry Andric if (Jump->IsUnlikely) 4430eae32dcSDimitry Andric return MinCostMaxFlow::AuxCostUnlikely; 4440eae32dcSDimitry Andric if (Jump->Flow > 0) 4450eae32dcSDimitry Andric return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0); 4460eae32dcSDimitry Andric if (Func.Blocks[Jump->Target].Weight > 0) 4470eae32dcSDimitry Andric return BaseDistance; 4480eae32dcSDimitry Andric return BaseDistance * (NumBlocks() + 1); 4490eae32dcSDimitry Andric }; 4500eae32dcSDimitry Andric 4510eae32dcSDimitry Andric uint64_t NumBlocks() const { return Func.Blocks.size(); } 4520eae32dcSDimitry Andric 453*04eeddc0SDimitry Andric /// Rebalance unknown subgraphs so that the flow is split evenly across the 454*04eeddc0SDimitry Andric /// outgoing branches of every block of the subgraph. The method iterates over 455*04eeddc0SDimitry Andric /// blocks with known weight and identifies unknown subgraphs rooted at the 456*04eeddc0SDimitry Andric /// blocks. Then it verifies if flow rebalancing is feasible and applies it. 4570eae32dcSDimitry Andric void rebalanceUnknownSubgraphs() { 458*04eeddc0SDimitry Andric // Try to find unknown subgraphs from each block 4590eae32dcSDimitry Andric for (uint64_t I = 0; I < Func.Blocks.size(); I++) { 4600eae32dcSDimitry Andric auto SrcBlock = &Func.Blocks[I]; 461*04eeddc0SDimitry Andric // Verify if rebalancing rooted at SrcBlock is feasible 462*04eeddc0SDimitry Andric if (!canRebalanceAtRoot(SrcBlock)) 4630eae32dcSDimitry Andric continue; 4640eae32dcSDimitry Andric 465*04eeddc0SDimitry Andric // Find an unknown subgraphs starting at SrcBlock. Along the way, 466*04eeddc0SDimitry Andric // fill in known destinations and intermediate unknown blocks. 467*04eeddc0SDimitry Andric std::vector<FlowBlock *> UnknownBlocks; 468*04eeddc0SDimitry Andric std::vector<FlowBlock *> KnownDstBlocks; 469*04eeddc0SDimitry Andric findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks); 470*04eeddc0SDimitry Andric 471*04eeddc0SDimitry Andric // Verify if rebalancing of the subgraph is feasible. If the search is 472*04eeddc0SDimitry Andric // successful, find the unique destination block (which can be null) 4730eae32dcSDimitry Andric FlowBlock *DstBlock = nullptr; 474*04eeddc0SDimitry Andric if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks, 475*04eeddc0SDimitry Andric DstBlock)) 4760eae32dcSDimitry Andric continue; 477*04eeddc0SDimitry Andric 478*04eeddc0SDimitry Andric // We cannot rebalance subgraphs containing cycles among unknown blocks 479*04eeddc0SDimitry Andric if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks)) 4800eae32dcSDimitry Andric continue; 4810eae32dcSDimitry Andric 4820eae32dcSDimitry Andric // Rebalance the flow 483*04eeddc0SDimitry Andric rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks); 4840eae32dcSDimitry Andric } 4850eae32dcSDimitry Andric } 4860eae32dcSDimitry Andric 487*04eeddc0SDimitry Andric /// Verify if rebalancing rooted at a given block is possible. 488*04eeddc0SDimitry Andric bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { 489*04eeddc0SDimitry Andric // Do not attempt to find unknown subgraphs from an unknown or a 490*04eeddc0SDimitry Andric // zero-flow block 491*04eeddc0SDimitry Andric if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) 492*04eeddc0SDimitry Andric return false; 493*04eeddc0SDimitry Andric 494*04eeddc0SDimitry Andric // Do not attempt to process subgraphs from a block w/o unknown sucessors 495*04eeddc0SDimitry Andric bool HasUnknownSuccs = false; 496*04eeddc0SDimitry Andric for (auto Jump : SrcBlock->SuccJumps) { 497*04eeddc0SDimitry Andric if (Func.Blocks[Jump->Target].UnknownWeight) { 498*04eeddc0SDimitry Andric HasUnknownSuccs = true; 499*04eeddc0SDimitry Andric break; 500*04eeddc0SDimitry Andric } 501*04eeddc0SDimitry Andric } 502*04eeddc0SDimitry Andric if (!HasUnknownSuccs) 503*04eeddc0SDimitry Andric return false; 504*04eeddc0SDimitry Andric 505*04eeddc0SDimitry Andric return true; 506*04eeddc0SDimitry Andric } 507*04eeddc0SDimitry Andric 508*04eeddc0SDimitry Andric /// Find an unknown subgraph starting at block SrcBlock. The method sets 509*04eeddc0SDimitry Andric /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. 510*04eeddc0SDimitry Andric void findUnknownSubgraph(const FlowBlock *SrcBlock, 511*04eeddc0SDimitry Andric std::vector<FlowBlock *> &KnownDstBlocks, 512*04eeddc0SDimitry Andric std::vector<FlowBlock *> &UnknownBlocks) { 5130eae32dcSDimitry Andric // Run BFS from SrcBlock and make sure all paths are going through unknown 5140eae32dcSDimitry Andric // blocks and end at a non-unknown DstBlock 515*04eeddc0SDimitry Andric auto Visited = BitVector(NumBlocks(), false); 5160eae32dcSDimitry Andric std::queue<uint64_t> Queue; 5170eae32dcSDimitry Andric 5180eae32dcSDimitry Andric Queue.push(SrcBlock->Index); 5190eae32dcSDimitry Andric Visited[SrcBlock->Index] = true; 5200eae32dcSDimitry Andric while (!Queue.empty()) { 5210eae32dcSDimitry Andric auto &Block = Func.Blocks[Queue.front()]; 5220eae32dcSDimitry Andric Queue.pop(); 5230eae32dcSDimitry Andric // Process blocks reachable from Block 5240eae32dcSDimitry Andric for (auto Jump : Block.SuccJumps) { 525*04eeddc0SDimitry Andric // If Jump can be ignored, skip it 526*04eeddc0SDimitry Andric if (ignoreJump(SrcBlock, nullptr, Jump)) 527*04eeddc0SDimitry Andric continue; 528*04eeddc0SDimitry Andric 5290eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 530*04eeddc0SDimitry Andric // If Dst has been visited, skip Jump 5310eae32dcSDimitry Andric if (Visited[Dst]) 5320eae32dcSDimitry Andric continue; 533*04eeddc0SDimitry Andric // Process block Dst 5340eae32dcSDimitry Andric Visited[Dst] = true; 5350eae32dcSDimitry Andric if (!Func.Blocks[Dst].UnknownWeight) { 536*04eeddc0SDimitry Andric KnownDstBlocks.push_back(&Func.Blocks[Dst]); 5370eae32dcSDimitry Andric } else { 5380eae32dcSDimitry Andric Queue.push(Dst); 539*04eeddc0SDimitry Andric UnknownBlocks.push_back(&Func.Blocks[Dst]); 540*04eeddc0SDimitry Andric } 5410eae32dcSDimitry Andric } 5420eae32dcSDimitry Andric } 5430eae32dcSDimitry Andric } 5440eae32dcSDimitry Andric 545*04eeddc0SDimitry Andric /// Verify if rebalancing of the subgraph is feasible. If the checks are 546*04eeddc0SDimitry Andric /// successful, set the unique destination block, DstBlock (can be null). 547*04eeddc0SDimitry Andric bool canRebalanceSubgraph(const FlowBlock *SrcBlock, 548*04eeddc0SDimitry Andric const std::vector<FlowBlock *> &KnownDstBlocks, 549*04eeddc0SDimitry Andric const std::vector<FlowBlock *> &UnknownBlocks, 550*04eeddc0SDimitry Andric FlowBlock *&DstBlock) { 5510eae32dcSDimitry Andric // If the list of unknown blocks is empty, we don't need rebalancing 552*04eeddc0SDimitry Andric if (UnknownBlocks.empty()) 5530eae32dcSDimitry Andric return false; 554*04eeddc0SDimitry Andric 555*04eeddc0SDimitry Andric // If there are multiple known sinks, we can't rebalance 556*04eeddc0SDimitry Andric if (KnownDstBlocks.size() > 1) 5570eae32dcSDimitry Andric return false; 558*04eeddc0SDimitry Andric DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); 559*04eeddc0SDimitry Andric 560*04eeddc0SDimitry Andric // Verify sinks of the subgraph 561*04eeddc0SDimitry Andric for (auto Block : UnknownBlocks) { 562*04eeddc0SDimitry Andric if (Block->SuccJumps.empty()) { 563*04eeddc0SDimitry Andric // If there are multiple (known and unknown) sinks, we can't rebalance 564*04eeddc0SDimitry Andric if (DstBlock != nullptr) 565*04eeddc0SDimitry Andric return false; 566*04eeddc0SDimitry Andric continue; 567*04eeddc0SDimitry Andric } 568*04eeddc0SDimitry Andric size_t NumIgnoredJumps = 0; 569*04eeddc0SDimitry Andric for (auto Jump : Block->SuccJumps) { 570*04eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 571*04eeddc0SDimitry Andric NumIgnoredJumps++; 572*04eeddc0SDimitry Andric } 573*04eeddc0SDimitry Andric // If there is a non-sink block in UnknownBlocks with all jumps ignored, 574*04eeddc0SDimitry Andric // then we can't rebalance 575*04eeddc0SDimitry Andric if (NumIgnoredJumps == Block->SuccJumps.size()) 5760eae32dcSDimitry Andric return false; 5770eae32dcSDimitry Andric } 5780eae32dcSDimitry Andric 5790eae32dcSDimitry Andric return true; 5800eae32dcSDimitry Andric } 5810eae32dcSDimitry Andric 582*04eeddc0SDimitry Andric /// Decide whether the Jump is ignored while processing an unknown subgraphs 583*04eeddc0SDimitry Andric /// rooted at basic block SrcBlock with the destination block, DstBlock. 584*04eeddc0SDimitry Andric bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 585*04eeddc0SDimitry Andric const FlowJump *Jump) { 586*04eeddc0SDimitry Andric // Ignore unlikely jumps with zero flow 587*04eeddc0SDimitry Andric if (Jump->IsUnlikely && Jump->Flow == 0) 588*04eeddc0SDimitry Andric return true; 589*04eeddc0SDimitry Andric 590*04eeddc0SDimitry Andric auto JumpSource = &Func.Blocks[Jump->Source]; 591*04eeddc0SDimitry Andric auto JumpTarget = &Func.Blocks[Jump->Target]; 592*04eeddc0SDimitry Andric 593*04eeddc0SDimitry Andric // Do not ignore jumps coming into DstBlock 594*04eeddc0SDimitry Andric if (DstBlock != nullptr && JumpTarget == DstBlock) 595*04eeddc0SDimitry Andric return false; 596*04eeddc0SDimitry Andric 597*04eeddc0SDimitry Andric // Ignore jumps out of SrcBlock to known blocks 598*04eeddc0SDimitry Andric if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) 599*04eeddc0SDimitry Andric return true; 600*04eeddc0SDimitry Andric 601*04eeddc0SDimitry Andric // Ignore jumps to known blocks with zero flow 602*04eeddc0SDimitry Andric if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) 603*04eeddc0SDimitry Andric return true; 604*04eeddc0SDimitry Andric 605*04eeddc0SDimitry Andric return false; 606*04eeddc0SDimitry Andric } 607*04eeddc0SDimitry Andric 6080eae32dcSDimitry Andric /// Verify if the given unknown subgraph is acyclic, and if yes, reorder 609*04eeddc0SDimitry Andric /// UnknownBlocks in the topological order (so that all jumps are "forward"). 610*04eeddc0SDimitry Andric bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 611*04eeddc0SDimitry Andric std::vector<FlowBlock *> &UnknownBlocks) { 6120eae32dcSDimitry Andric // Extract local in-degrees in the considered subgraph 6130eae32dcSDimitry Andric auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); 614*04eeddc0SDimitry Andric auto fillInDegree = [&](const FlowBlock *Block) { 615*04eeddc0SDimitry Andric for (auto Jump : Block->SuccJumps) { 616*04eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 617*04eeddc0SDimitry Andric continue; 6180eae32dcSDimitry Andric LocalInDegree[Jump->Target]++; 6190eae32dcSDimitry Andric } 620*04eeddc0SDimitry Andric }; 621*04eeddc0SDimitry Andric fillInDegree(SrcBlock); 622*04eeddc0SDimitry Andric for (auto Block : UnknownBlocks) { 623*04eeddc0SDimitry Andric fillInDegree(Block); 6240eae32dcSDimitry Andric } 6250eae32dcSDimitry Andric // A loop containing SrcBlock 6260eae32dcSDimitry Andric if (LocalInDegree[SrcBlock->Index] > 0) 6270eae32dcSDimitry Andric return false; 6280eae32dcSDimitry Andric 6290eae32dcSDimitry Andric std::vector<FlowBlock *> AcyclicOrder; 6300eae32dcSDimitry Andric std::queue<uint64_t> Queue; 6310eae32dcSDimitry Andric Queue.push(SrcBlock->Index); 6320eae32dcSDimitry Andric while (!Queue.empty()) { 633*04eeddc0SDimitry Andric FlowBlock *Block = &Func.Blocks[Queue.front()]; 6340eae32dcSDimitry Andric Queue.pop(); 635*04eeddc0SDimitry Andric // Stop propagation once we reach DstBlock, if any 636*04eeddc0SDimitry Andric if (DstBlock != nullptr && Block == DstBlock) 6370eae32dcSDimitry Andric break; 6380eae32dcSDimitry Andric 639*04eeddc0SDimitry Andric // Keep an acyclic order of unknown blocks 640*04eeddc0SDimitry Andric if (Block->UnknownWeight && Block != SrcBlock) 641*04eeddc0SDimitry Andric AcyclicOrder.push_back(Block); 642*04eeddc0SDimitry Andric 6430eae32dcSDimitry Andric // Add to the queue all successors with zero local in-degree 644*04eeddc0SDimitry Andric for (auto Jump : Block->SuccJumps) { 645*04eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 646*04eeddc0SDimitry Andric continue; 6470eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 6480eae32dcSDimitry Andric LocalInDegree[Dst]--; 6490eae32dcSDimitry Andric if (LocalInDegree[Dst] == 0) { 6500eae32dcSDimitry Andric Queue.push(Dst); 6510eae32dcSDimitry Andric } 6520eae32dcSDimitry Andric } 6530eae32dcSDimitry Andric } 6540eae32dcSDimitry Andric 6550eae32dcSDimitry Andric // If there is a cycle in the subgraph, AcyclicOrder contains only a subset 6560eae32dcSDimitry Andric // of all blocks 657*04eeddc0SDimitry Andric if (UnknownBlocks.size() != AcyclicOrder.size()) 6580eae32dcSDimitry Andric return false; 659*04eeddc0SDimitry Andric UnknownBlocks = AcyclicOrder; 6600eae32dcSDimitry Andric return true; 6610eae32dcSDimitry Andric } 6620eae32dcSDimitry Andric 663*04eeddc0SDimitry Andric /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and 664*04eeddc0SDimitry Andric /// having UnknownBlocks intermediate blocks. 665*04eeddc0SDimitry Andric void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, 666*04eeddc0SDimitry Andric const FlowBlock *DstBlock, 667*04eeddc0SDimitry Andric const std::vector<FlowBlock *> &UnknownBlocks) { 6680eae32dcSDimitry Andric assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); 6690eae32dcSDimitry Andric 670*04eeddc0SDimitry Andric // Ditribute flow from the source block 671*04eeddc0SDimitry Andric uint64_t BlockFlow = 0; 672*04eeddc0SDimitry Andric // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps 673*04eeddc0SDimitry Andric for (auto Jump : SrcBlock->SuccJumps) { 674*04eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 6750eae32dcSDimitry Andric continue; 676*04eeddc0SDimitry Andric BlockFlow += Jump->Flow; 6770eae32dcSDimitry Andric } 678*04eeddc0SDimitry Andric rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); 679*04eeddc0SDimitry Andric 680*04eeddc0SDimitry Andric // Ditribute flow from the remaining blocks 681*04eeddc0SDimitry Andric for (auto Block : UnknownBlocks) { 682*04eeddc0SDimitry Andric assert(Block->UnknownWeight && "incorrect unknown subgraph"); 683*04eeddc0SDimitry Andric uint64_t BlockFlow = 0; 684*04eeddc0SDimitry Andric // Block's flow is the sum of incoming flows 685*04eeddc0SDimitry Andric for (auto Jump : Block->PredJumps) { 686*04eeddc0SDimitry Andric BlockFlow += Jump->Flow; 687*04eeddc0SDimitry Andric } 688*04eeddc0SDimitry Andric Block->Flow = BlockFlow; 689*04eeddc0SDimitry Andric rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); 690*04eeddc0SDimitry Andric } 691*04eeddc0SDimitry Andric } 692*04eeddc0SDimitry Andric 693*04eeddc0SDimitry Andric /// Redistribute flow for a block in a subgraph rooted at SrcBlock, 694*04eeddc0SDimitry Andric /// and ending at DstBlock. 695*04eeddc0SDimitry Andric void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 696*04eeddc0SDimitry Andric const FlowBlock *Block, uint64_t BlockFlow) { 697*04eeddc0SDimitry Andric // Process all successor jumps and update corresponding flow values 698*04eeddc0SDimitry Andric size_t BlockDegree = 0; 699*04eeddc0SDimitry Andric for (auto Jump : Block->SuccJumps) { 700*04eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 701*04eeddc0SDimitry Andric continue; 702*04eeddc0SDimitry Andric BlockDegree++; 703*04eeddc0SDimitry Andric } 704*04eeddc0SDimitry Andric // If all successor jumps of the block are ignored, skip it 705*04eeddc0SDimitry Andric if (DstBlock == nullptr && BlockDegree == 0) 706*04eeddc0SDimitry Andric return; 707*04eeddc0SDimitry Andric assert(BlockDegree > 0 && "all outgoing jumps are ignored"); 708*04eeddc0SDimitry Andric 709*04eeddc0SDimitry Andric // Each of the Block's successors gets the following amount of flow. 710*04eeddc0SDimitry Andric // Rounding the value up so that all flow is propagated 711*04eeddc0SDimitry Andric uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; 712*04eeddc0SDimitry Andric for (auto Jump : Block->SuccJumps) { 713*04eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 714*04eeddc0SDimitry Andric continue; 715*04eeddc0SDimitry Andric uint64_t Flow = std::min(SuccFlow, BlockFlow); 7160eae32dcSDimitry Andric Jump->Flow = Flow; 717*04eeddc0SDimitry Andric BlockFlow -= Flow; 7180eae32dcSDimitry Andric } 719*04eeddc0SDimitry Andric assert(BlockFlow == 0 && "not all flow is propagated"); 7200eae32dcSDimitry Andric } 7210eae32dcSDimitry Andric 7220eae32dcSDimitry Andric /// A constant indicating an arbitrary exit block of a function. 7230eae32dcSDimitry Andric static constexpr uint64_t AnyExitBlock = uint64_t(-1); 7240eae32dcSDimitry Andric 7250eae32dcSDimitry Andric /// The function. 7260eae32dcSDimitry Andric FlowFunction &Func; 7270eae32dcSDimitry Andric }; 7280eae32dcSDimitry Andric 7294824e7fdSDimitry Andric /// Initializing flow network for a given function. 7304824e7fdSDimitry Andric /// 7314824e7fdSDimitry Andric /// Every block is split into three nodes that are responsible for (i) an 7324824e7fdSDimitry Andric /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 7334824e7fdSDimitry Andric /// reduction of the block weight. 7344824e7fdSDimitry Andric void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 7354824e7fdSDimitry Andric uint64_t NumBlocks = Func.Blocks.size(); 7364824e7fdSDimitry Andric assert(NumBlocks > 1 && "Too few blocks in a function"); 7374824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 7384824e7fdSDimitry Andric 7394824e7fdSDimitry Andric // Pre-process data: make sure the entry weight is at least 1 7404824e7fdSDimitry Andric if (Func.Blocks[Func.Entry].Weight == 0) { 7414824e7fdSDimitry Andric Func.Blocks[Func.Entry].Weight = 1; 7424824e7fdSDimitry Andric } 7434824e7fdSDimitry Andric // Introducing dummy source/sink pairs to allow flow circulation. 7444824e7fdSDimitry Andric // The nodes corresponding to blocks of Func have indicies in the range 7454824e7fdSDimitry Andric // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 7464824e7fdSDimitry Andric uint64_t S = 3 * NumBlocks; 7474824e7fdSDimitry Andric uint64_t T = S + 1; 7484824e7fdSDimitry Andric uint64_t S1 = S + 2; 7494824e7fdSDimitry Andric uint64_t T1 = S + 3; 7504824e7fdSDimitry Andric 7514824e7fdSDimitry Andric Network.initialize(3 * NumBlocks + 4, S1, T1); 7524824e7fdSDimitry Andric 7534824e7fdSDimitry Andric // Create three nodes for every block of the function 7544824e7fdSDimitry Andric for (uint64_t B = 0; B < NumBlocks; B++) { 7554824e7fdSDimitry Andric auto &Block = Func.Blocks[B]; 7564824e7fdSDimitry Andric assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 7574824e7fdSDimitry Andric "non-zero weight of a block w/o weight except for an entry"); 7584824e7fdSDimitry Andric 7594824e7fdSDimitry Andric // Split every block into two nodes 7604824e7fdSDimitry Andric uint64_t Bin = 3 * B; 7614824e7fdSDimitry Andric uint64_t Bout = 3 * B + 1; 7624824e7fdSDimitry Andric uint64_t Baux = 3 * B + 2; 7634824e7fdSDimitry Andric if (Block.Weight > 0) { 7644824e7fdSDimitry Andric Network.addEdge(S1, Bout, Block.Weight, 0); 7654824e7fdSDimitry Andric Network.addEdge(Bin, T1, Block.Weight, 0); 7664824e7fdSDimitry Andric } 7674824e7fdSDimitry Andric 7684824e7fdSDimitry Andric // Edges from S and to T 7694824e7fdSDimitry Andric assert((!Block.isEntry() || !Block.isExit()) && 7704824e7fdSDimitry Andric "a block cannot be an entry and an exit"); 7714824e7fdSDimitry Andric if (Block.isEntry()) { 7724824e7fdSDimitry Andric Network.addEdge(S, Bin, 0); 7734824e7fdSDimitry Andric } else if (Block.isExit()) { 7744824e7fdSDimitry Andric Network.addEdge(Bout, T, 0); 7754824e7fdSDimitry Andric } 7764824e7fdSDimitry Andric 7774824e7fdSDimitry Andric // An auxiliary node to allow increase/reduction of block counts: 7784824e7fdSDimitry Andric // We assume that decreasing block counts is more expensive than increasing, 7794824e7fdSDimitry Andric // and thus, setting separate costs here. In the future we may want to tune 7804824e7fdSDimitry Andric // the relative costs so as to maximize the quality of generated profiles. 7814824e7fdSDimitry Andric int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; 7824824e7fdSDimitry Andric int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; 7834824e7fdSDimitry Andric if (Block.UnknownWeight) { 7844824e7fdSDimitry Andric // Do not penalize changing weights of blocks w/o known profile count 7854824e7fdSDimitry Andric AuxCostInc = 0; 7864824e7fdSDimitry Andric AuxCostDec = 0; 7874824e7fdSDimitry Andric } else { 7884824e7fdSDimitry Andric // Increasing the count for "cold" blocks with zero initial count is more 7894824e7fdSDimitry Andric // expensive than for "hot" ones 7904824e7fdSDimitry Andric if (Block.Weight == 0) { 7914824e7fdSDimitry Andric AuxCostInc = MinCostMaxFlow::AuxCostIncZero; 7924824e7fdSDimitry Andric } 7934824e7fdSDimitry Andric // Modifying the count of the entry block is expensive 7944824e7fdSDimitry Andric if (Block.isEntry()) { 7954824e7fdSDimitry Andric AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; 7964824e7fdSDimitry Andric AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; 7974824e7fdSDimitry Andric } 7984824e7fdSDimitry Andric } 7994824e7fdSDimitry Andric // For blocks with self-edges, do not penalize a reduction of the count, 8004824e7fdSDimitry Andric // as all of the increase can be attributed to the self-edge 8014824e7fdSDimitry Andric if (Block.HasSelfEdge) { 8024824e7fdSDimitry Andric AuxCostDec = 0; 8034824e7fdSDimitry Andric } 8044824e7fdSDimitry Andric 8054824e7fdSDimitry Andric Network.addEdge(Bin, Baux, AuxCostInc); 8064824e7fdSDimitry Andric Network.addEdge(Baux, Bout, AuxCostInc); 8074824e7fdSDimitry Andric if (Block.Weight > 0) { 8084824e7fdSDimitry Andric Network.addEdge(Bout, Baux, AuxCostDec); 8094824e7fdSDimitry Andric Network.addEdge(Baux, Bin, AuxCostDec); 8104824e7fdSDimitry Andric } 8114824e7fdSDimitry Andric } 8124824e7fdSDimitry Andric 8134824e7fdSDimitry Andric // Creating edges for every jump 8144824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 8154824e7fdSDimitry Andric uint64_t Src = Jump.Source; 8164824e7fdSDimitry Andric uint64_t Dst = Jump.Target; 8174824e7fdSDimitry Andric if (Src != Dst) { 8184824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 8194824e7fdSDimitry Andric uint64_t DstIn = 3 * Dst; 8204824e7fdSDimitry Andric uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 8214824e7fdSDimitry Andric Network.addEdge(SrcOut, DstIn, Cost); 8224824e7fdSDimitry Andric } 8234824e7fdSDimitry Andric } 8244824e7fdSDimitry Andric 8254824e7fdSDimitry Andric // Make sure we have a valid flow circulation 8264824e7fdSDimitry Andric Network.addEdge(T, S, 0); 8274824e7fdSDimitry Andric } 8284824e7fdSDimitry Andric 8294824e7fdSDimitry Andric /// Extract resulting block and edge counts from the flow network. 8304824e7fdSDimitry Andric void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 8314824e7fdSDimitry Andric uint64_t NumBlocks = Func.Blocks.size(); 8324824e7fdSDimitry Andric 8334824e7fdSDimitry Andric // Extract resulting block counts 8344824e7fdSDimitry Andric for (uint64_t Src = 0; Src < NumBlocks; Src++) { 8354824e7fdSDimitry Andric auto &Block = Func.Blocks[Src]; 8364824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 8374824e7fdSDimitry Andric int64_t Flow = 0; 8384824e7fdSDimitry Andric for (auto &Adj : Network.getFlow(SrcOut)) { 8394824e7fdSDimitry Andric uint64_t DstIn = Adj.first; 8404824e7fdSDimitry Andric int64_t DstFlow = Adj.second; 8414824e7fdSDimitry Andric bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 8424824e7fdSDimitry Andric if (!IsAuxNode || Block.HasSelfEdge) { 8434824e7fdSDimitry Andric Flow += DstFlow; 8444824e7fdSDimitry Andric } 8454824e7fdSDimitry Andric } 8464824e7fdSDimitry Andric Block.Flow = Flow; 8474824e7fdSDimitry Andric assert(Flow >= 0 && "negative block flow"); 8484824e7fdSDimitry Andric } 8494824e7fdSDimitry Andric 8504824e7fdSDimitry Andric // Extract resulting jump counts 8514824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 8524824e7fdSDimitry Andric uint64_t Src = Jump.Source; 8534824e7fdSDimitry Andric uint64_t Dst = Jump.Target; 8544824e7fdSDimitry Andric int64_t Flow = 0; 8554824e7fdSDimitry Andric if (Src != Dst) { 8564824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 8574824e7fdSDimitry Andric uint64_t DstIn = 3 * Dst; 8584824e7fdSDimitry Andric Flow = Network.getFlow(SrcOut, DstIn); 8594824e7fdSDimitry Andric } else { 8604824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 8614824e7fdSDimitry Andric uint64_t SrcAux = 3 * Src + 2; 8624824e7fdSDimitry Andric int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 8634824e7fdSDimitry Andric if (AuxFlow > 0) 8644824e7fdSDimitry Andric Flow = AuxFlow; 8654824e7fdSDimitry Andric } 8664824e7fdSDimitry Andric Jump.Flow = Flow; 8674824e7fdSDimitry Andric assert(Flow >= 0 && "negative jump flow"); 8684824e7fdSDimitry Andric } 8694824e7fdSDimitry Andric } 8704824e7fdSDimitry Andric 8714824e7fdSDimitry Andric #ifndef NDEBUG 8724824e7fdSDimitry Andric /// Verify that the computed flow values satisfy flow conservation rules 8734824e7fdSDimitry Andric void verifyWeights(const FlowFunction &Func) { 8744824e7fdSDimitry Andric const uint64_t NumBlocks = Func.Blocks.size(); 8754824e7fdSDimitry Andric auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 8764824e7fdSDimitry Andric auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 8774824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 8784824e7fdSDimitry Andric InFlow[Jump.Target] += Jump.Flow; 8794824e7fdSDimitry Andric OutFlow[Jump.Source] += Jump.Flow; 8804824e7fdSDimitry Andric } 8814824e7fdSDimitry Andric 8824824e7fdSDimitry Andric uint64_t TotalInFlow = 0; 8834824e7fdSDimitry Andric uint64_t TotalOutFlow = 0; 8844824e7fdSDimitry Andric for (uint64_t I = 0; I < NumBlocks; I++) { 8854824e7fdSDimitry Andric auto &Block = Func.Blocks[I]; 8864824e7fdSDimitry Andric if (Block.isEntry()) { 8874824e7fdSDimitry Andric TotalInFlow += Block.Flow; 8884824e7fdSDimitry Andric assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 8894824e7fdSDimitry Andric } else if (Block.isExit()) { 8904824e7fdSDimitry Andric TotalOutFlow += Block.Flow; 8914824e7fdSDimitry Andric assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 8924824e7fdSDimitry Andric } else { 8934824e7fdSDimitry Andric assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 8944824e7fdSDimitry Andric assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 8954824e7fdSDimitry Andric } 8964824e7fdSDimitry Andric } 8974824e7fdSDimitry Andric assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 8980eae32dcSDimitry Andric 8990eae32dcSDimitry Andric // Verify that there are no isolated flow components 9000eae32dcSDimitry Andric // One could modify FlowFunction to hold edges indexed by the sources, which 9010eae32dcSDimitry Andric // will avoid a creation of the object 9020eae32dcSDimitry Andric auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); 9030eae32dcSDimitry Andric for (auto &Jump : Func.Jumps) { 9040eae32dcSDimitry Andric if (Jump.Flow > 0) { 9050eae32dcSDimitry Andric PositiveFlowEdges[Jump.Source].push_back(Jump.Target); 9060eae32dcSDimitry Andric } 9070eae32dcSDimitry Andric } 9080eae32dcSDimitry Andric 9090eae32dcSDimitry Andric // Run BFS from the source along edges with positive flow 9100eae32dcSDimitry Andric std::queue<uint64_t> Queue; 911*04eeddc0SDimitry Andric auto Visited = BitVector(NumBlocks, false); 9120eae32dcSDimitry Andric Queue.push(Func.Entry); 9130eae32dcSDimitry Andric Visited[Func.Entry] = true; 9140eae32dcSDimitry Andric while (!Queue.empty()) { 9150eae32dcSDimitry Andric uint64_t Src = Queue.front(); 9160eae32dcSDimitry Andric Queue.pop(); 9170eae32dcSDimitry Andric for (uint64_t Dst : PositiveFlowEdges[Src]) { 9180eae32dcSDimitry Andric if (!Visited[Dst]) { 9190eae32dcSDimitry Andric Queue.push(Dst); 9200eae32dcSDimitry Andric Visited[Dst] = true; 9210eae32dcSDimitry Andric } 9220eae32dcSDimitry Andric } 9230eae32dcSDimitry Andric } 9240eae32dcSDimitry Andric 9250eae32dcSDimitry Andric // Verify that every block that has a positive flow is reached from the source 9260eae32dcSDimitry Andric // along edges with a positive flow 9270eae32dcSDimitry Andric for (uint64_t I = 0; I < NumBlocks; I++) { 9280eae32dcSDimitry Andric auto &Block = Func.Blocks[I]; 9290eae32dcSDimitry Andric assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); 9300eae32dcSDimitry Andric } 9314824e7fdSDimitry Andric } 9324824e7fdSDimitry Andric #endif 9334824e7fdSDimitry Andric 9344824e7fdSDimitry Andric } // end of anonymous namespace 9354824e7fdSDimitry Andric 9364824e7fdSDimitry Andric /// Apply the profile inference algorithm for a given flow function 9374824e7fdSDimitry Andric void llvm::applyFlowInference(FlowFunction &Func) { 9384824e7fdSDimitry Andric // Create and apply an inference network model 9394824e7fdSDimitry Andric auto InferenceNetwork = MinCostMaxFlow(); 9404824e7fdSDimitry Andric initializeNetwork(InferenceNetwork, Func); 9414824e7fdSDimitry Andric InferenceNetwork.run(); 9424824e7fdSDimitry Andric 9434824e7fdSDimitry Andric // Extract flow values for every block and every edge 9444824e7fdSDimitry Andric extractWeights(InferenceNetwork, Func); 9454824e7fdSDimitry Andric 9460eae32dcSDimitry Andric // Post-processing adjustments to the flow 9470eae32dcSDimitry Andric auto Adjuster = FlowAdjuster(Func); 9480eae32dcSDimitry Andric Adjuster.run(); 9490eae32dcSDimitry Andric 9504824e7fdSDimitry Andric #ifndef NDEBUG 9514824e7fdSDimitry Andric // Verify the result 9524824e7fdSDimitry Andric verifyWeights(Func); 9534824e7fdSDimitry Andric #endif 9544824e7fdSDimitry Andric } 955