14824e7fdSDimitry Andric //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 24824e7fdSDimitry Andric // 34824e7fdSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 44824e7fdSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 54824e7fdSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 64824e7fdSDimitry Andric // 74824e7fdSDimitry Andric //===----------------------------------------------------------------------===// 84824e7fdSDimitry Andric // 94824e7fdSDimitry Andric // This file implements a profile inference algorithm. Given an incomplete and 104824e7fdSDimitry Andric // possibly imprecise block counts, the algorithm reconstructs realistic block 114824e7fdSDimitry Andric // and edge counts that satisfy flow conservation rules, while minimally modify 124824e7fdSDimitry Andric // input block counts. 134824e7fdSDimitry Andric // 144824e7fdSDimitry Andric //===----------------------------------------------------------------------===// 154824e7fdSDimitry Andric 164824e7fdSDimitry Andric #include "llvm/Transforms/Utils/SampleProfileInference.h" 1704eeddc0SDimitry Andric #include "llvm/ADT/BitVector.h" 18*81ad6265SDimitry Andric #include "llvm/Support/CommandLine.h" 194824e7fdSDimitry Andric #include "llvm/Support/Debug.h" 204824e7fdSDimitry Andric #include <queue> 214824e7fdSDimitry Andric #include <set> 22*81ad6265SDimitry Andric #include <stack> 234824e7fdSDimitry Andric 244824e7fdSDimitry Andric using namespace llvm; 254824e7fdSDimitry Andric #define DEBUG_TYPE "sample-profile-inference" 264824e7fdSDimitry Andric 274824e7fdSDimitry Andric namespace { 284824e7fdSDimitry Andric 29*81ad6265SDimitry Andric static cl::opt<bool> SampleProfileEvenCountDistribution( 30*81ad6265SDimitry Andric "sample-profile-even-count-distribution", cl::init(true), cl::Hidden, 31*81ad6265SDimitry Andric cl::desc("Try to evenly distribute counts when there are multiple equally " 32*81ad6265SDimitry Andric "likely options.")); 33*81ad6265SDimitry Andric 34*81ad6265SDimitry Andric static cl::opt<unsigned> SampleProfileMaxDfsCalls( 35*81ad6265SDimitry Andric "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden, 36*81ad6265SDimitry Andric cl::desc("Maximum number of dfs iterations for even count distribution.")); 37*81ad6265SDimitry Andric 38*81ad6265SDimitry Andric static cl::opt<unsigned> SampleProfileProfiCostInc( 39*81ad6265SDimitry Andric "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden, 40*81ad6265SDimitry Andric cl::desc("A cost of increasing a block's count by one.")); 41*81ad6265SDimitry Andric 42*81ad6265SDimitry Andric static cl::opt<unsigned> SampleProfileProfiCostDec( 43*81ad6265SDimitry Andric "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden, 44*81ad6265SDimitry Andric cl::desc("A cost of decreasing a block's count by one.")); 45*81ad6265SDimitry Andric 46*81ad6265SDimitry Andric static cl::opt<unsigned> SampleProfileProfiCostIncZero( 47*81ad6265SDimitry Andric "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden, 48*81ad6265SDimitry Andric cl::desc("A cost of increasing a count of zero-weight block by one.")); 49*81ad6265SDimitry Andric 50*81ad6265SDimitry Andric static cl::opt<unsigned> SampleProfileProfiCostIncEntry( 51*81ad6265SDimitry Andric "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden, 52*81ad6265SDimitry Andric cl::desc("A cost of increasing the entry block's count by one.")); 53*81ad6265SDimitry Andric 54*81ad6265SDimitry Andric static cl::opt<unsigned> SampleProfileProfiCostDecEntry( 55*81ad6265SDimitry Andric "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden, 56*81ad6265SDimitry Andric cl::desc("A cost of decreasing the entry block's count by one.")); 57*81ad6265SDimitry Andric 584824e7fdSDimitry Andric /// A value indicating an infinite flow/capacity/weight of a block/edge. 594824e7fdSDimitry Andric /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 604824e7fdSDimitry Andric /// during the execution. 614824e7fdSDimitry Andric static constexpr int64_t INF = ((int64_t)1) << 50; 624824e7fdSDimitry Andric 634824e7fdSDimitry Andric /// The minimum-cost maximum flow algorithm. 644824e7fdSDimitry Andric /// 654824e7fdSDimitry Andric /// The algorithm finds the maximum flow of minimum cost on a given (directed) 664824e7fdSDimitry Andric /// network using a modified version of the classical Moore-Bellman-Ford 674824e7fdSDimitry Andric /// approach. The algorithm applies a number of augmentation iterations in which 684824e7fdSDimitry Andric /// flow is sent along paths of positive capacity from the source to the sink. 694824e7fdSDimitry Andric /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 704824e7fdSDimitry Andric /// where m is the number of edges, n is the number of vertices, and v(f) is the 714824e7fdSDimitry Andric /// value of the maximum flow. However, the observed running time on typical 724824e7fdSDimitry Andric /// instances is sub-quadratic, that is, o(n^2). 734824e7fdSDimitry Andric /// 744824e7fdSDimitry Andric /// The input is a set of edges with specified costs and capacities, and a pair 754824e7fdSDimitry Andric /// of nodes (source and sink). The output is the flow along each edge of the 764824e7fdSDimitry Andric /// minimum total cost respecting the given edge capacities. 774824e7fdSDimitry Andric class MinCostMaxFlow { 784824e7fdSDimitry Andric public: 794824e7fdSDimitry Andric // Initialize algorithm's data structures for a network of a given size. 804824e7fdSDimitry Andric void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 814824e7fdSDimitry Andric Source = SourceNode; 824824e7fdSDimitry Andric Target = SinkNode; 834824e7fdSDimitry Andric 844824e7fdSDimitry Andric Nodes = std::vector<Node>(NodeCount); 854824e7fdSDimitry Andric Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 86*81ad6265SDimitry Andric if (SampleProfileEvenCountDistribution) 87*81ad6265SDimitry Andric AugmentingEdges = 88*81ad6265SDimitry Andric std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>()); 894824e7fdSDimitry Andric } 904824e7fdSDimitry Andric 914824e7fdSDimitry Andric // Run the algorithm. 924824e7fdSDimitry Andric int64_t run() { 93*81ad6265SDimitry Andric // Iteratively find an augmentation path/dag in the network and send the 94*81ad6265SDimitry Andric // flow along its edges 95*81ad6265SDimitry Andric size_t AugmentationIters = applyFlowAugmentation(); 964824e7fdSDimitry Andric 974824e7fdSDimitry Andric // Compute the total flow and its cost 984824e7fdSDimitry Andric int64_t TotalCost = 0; 994824e7fdSDimitry Andric int64_t TotalFlow = 0; 1004824e7fdSDimitry Andric for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 1014824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 1024824e7fdSDimitry Andric if (Edge.Flow > 0) { 1034824e7fdSDimitry Andric TotalCost += Edge.Cost * Edge.Flow; 1044824e7fdSDimitry Andric if (Src == Source) 1054824e7fdSDimitry Andric TotalFlow += Edge.Flow; 1064824e7fdSDimitry Andric } 1074824e7fdSDimitry Andric } 1084824e7fdSDimitry Andric } 1094824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 1104824e7fdSDimitry Andric << " iterations with " << TotalFlow << " total flow" 1114824e7fdSDimitry Andric << " of " << TotalCost << " cost\n"); 1124824e7fdSDimitry Andric (void)TotalFlow; 113*81ad6265SDimitry Andric (void)AugmentationIters; 1144824e7fdSDimitry Andric return TotalCost; 1154824e7fdSDimitry Andric } 1164824e7fdSDimitry Andric 1174824e7fdSDimitry Andric /// Adding an edge to the network with a specified capacity and a cost. 1184824e7fdSDimitry Andric /// Multiple edges between a pair of nodes are allowed but self-edges 1194824e7fdSDimitry Andric /// are not supported. 1204824e7fdSDimitry Andric void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 1214824e7fdSDimitry Andric assert(Capacity > 0 && "adding an edge of zero capacity"); 1224824e7fdSDimitry Andric assert(Src != Dst && "loop edge are not supported"); 1234824e7fdSDimitry Andric 1244824e7fdSDimitry Andric Edge SrcEdge; 1254824e7fdSDimitry Andric SrcEdge.Dst = Dst; 1264824e7fdSDimitry Andric SrcEdge.Cost = Cost; 1274824e7fdSDimitry Andric SrcEdge.Capacity = Capacity; 1284824e7fdSDimitry Andric SrcEdge.Flow = 0; 1294824e7fdSDimitry Andric SrcEdge.RevEdgeIndex = Edges[Dst].size(); 1304824e7fdSDimitry Andric 1314824e7fdSDimitry Andric Edge DstEdge; 1324824e7fdSDimitry Andric DstEdge.Dst = Src; 1334824e7fdSDimitry Andric DstEdge.Cost = -Cost; 1344824e7fdSDimitry Andric DstEdge.Capacity = 0; 1354824e7fdSDimitry Andric DstEdge.Flow = 0; 1364824e7fdSDimitry Andric DstEdge.RevEdgeIndex = Edges[Src].size(); 1374824e7fdSDimitry Andric 1384824e7fdSDimitry Andric Edges[Src].push_back(SrcEdge); 1394824e7fdSDimitry Andric Edges[Dst].push_back(DstEdge); 1404824e7fdSDimitry Andric } 1414824e7fdSDimitry Andric 1424824e7fdSDimitry Andric /// Adding an edge to the network of infinite capacity and a given cost. 1434824e7fdSDimitry Andric void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 1444824e7fdSDimitry Andric addEdge(Src, Dst, INF, Cost); 1454824e7fdSDimitry Andric } 1464824e7fdSDimitry Andric 1474824e7fdSDimitry Andric /// Get the total flow from a given source node. 1484824e7fdSDimitry Andric /// Returns a list of pairs (target node, amount of flow to the target). 1494824e7fdSDimitry Andric const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 1504824e7fdSDimitry Andric std::vector<std::pair<uint64_t, int64_t>> Flow; 1514824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 1524824e7fdSDimitry Andric if (Edge.Flow > 0) 1534824e7fdSDimitry Andric Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 1544824e7fdSDimitry Andric } 1554824e7fdSDimitry Andric return Flow; 1564824e7fdSDimitry Andric } 1574824e7fdSDimitry Andric 1584824e7fdSDimitry Andric /// Get the total flow between a pair of nodes. 1594824e7fdSDimitry Andric int64_t getFlow(uint64_t Src, uint64_t Dst) const { 1604824e7fdSDimitry Andric int64_t Flow = 0; 1614824e7fdSDimitry Andric for (auto &Edge : Edges[Src]) { 1624824e7fdSDimitry Andric if (Edge.Dst == Dst) { 1634824e7fdSDimitry Andric Flow += Edge.Flow; 1644824e7fdSDimitry Andric } 1654824e7fdSDimitry Andric } 1664824e7fdSDimitry Andric return Flow; 1674824e7fdSDimitry Andric } 1684824e7fdSDimitry Andric 1694824e7fdSDimitry Andric /// A cost of taking an unlikely jump. 17004eeddc0SDimitry Andric static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; 171*81ad6265SDimitry Andric /// Minimum BaseDistance for the jump distance values in island joining. 172*81ad6265SDimitry Andric static constexpr uint64_t MinBaseDistance = 10000; 1734824e7fdSDimitry Andric 1744824e7fdSDimitry Andric private: 175*81ad6265SDimitry Andric /// Iteratively find an augmentation path/dag in the network and send the 176*81ad6265SDimitry Andric /// flow along its edges. The method returns the number of applied iterations. 177*81ad6265SDimitry Andric size_t applyFlowAugmentation() { 178*81ad6265SDimitry Andric size_t AugmentationIters = 0; 179*81ad6265SDimitry Andric while (findAugmentingPath()) { 180*81ad6265SDimitry Andric uint64_t PathCapacity = computeAugmentingPathCapacity(); 181*81ad6265SDimitry Andric while (PathCapacity > 0) { 182*81ad6265SDimitry Andric bool Progress = false; 183*81ad6265SDimitry Andric if (SampleProfileEvenCountDistribution) { 184*81ad6265SDimitry Andric // Identify node/edge candidates for augmentation 185*81ad6265SDimitry Andric identifyShortestEdges(PathCapacity); 186*81ad6265SDimitry Andric 187*81ad6265SDimitry Andric // Find an augmenting DAG 188*81ad6265SDimitry Andric auto AugmentingOrder = findAugmentingDAG(); 189*81ad6265SDimitry Andric 190*81ad6265SDimitry Andric // Apply the DAG augmentation 191*81ad6265SDimitry Andric Progress = augmentFlowAlongDAG(AugmentingOrder); 192*81ad6265SDimitry Andric PathCapacity = computeAugmentingPathCapacity(); 193*81ad6265SDimitry Andric } 194*81ad6265SDimitry Andric 195*81ad6265SDimitry Andric if (!Progress) { 196*81ad6265SDimitry Andric augmentFlowAlongPath(PathCapacity); 197*81ad6265SDimitry Andric PathCapacity = 0; 198*81ad6265SDimitry Andric } 199*81ad6265SDimitry Andric 200*81ad6265SDimitry Andric AugmentationIters++; 201*81ad6265SDimitry Andric } 202*81ad6265SDimitry Andric } 203*81ad6265SDimitry Andric return AugmentationIters; 204*81ad6265SDimitry Andric } 205*81ad6265SDimitry Andric 206*81ad6265SDimitry Andric /// Compute the capacity of the cannonical augmenting path. If the path is 207*81ad6265SDimitry Andric /// saturated (that is, no flow can be sent along the path), then return 0. 208*81ad6265SDimitry Andric uint64_t computeAugmentingPathCapacity() { 209*81ad6265SDimitry Andric uint64_t PathCapacity = INF; 210*81ad6265SDimitry Andric uint64_t Now = Target; 211*81ad6265SDimitry Andric while (Now != Source) { 212*81ad6265SDimitry Andric uint64_t Pred = Nodes[Now].ParentNode; 213*81ad6265SDimitry Andric auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 214*81ad6265SDimitry Andric 215*81ad6265SDimitry Andric assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow"); 216*81ad6265SDimitry Andric uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow); 217*81ad6265SDimitry Andric PathCapacity = std::min(PathCapacity, EdgeCapacity); 218*81ad6265SDimitry Andric 219*81ad6265SDimitry Andric Now = Pred; 220*81ad6265SDimitry Andric } 221*81ad6265SDimitry Andric return PathCapacity; 222*81ad6265SDimitry Andric } 223*81ad6265SDimitry Andric 2244824e7fdSDimitry Andric /// Check for existence of an augmenting path with a positive capacity. 2254824e7fdSDimitry Andric bool findAugmentingPath() { 2264824e7fdSDimitry Andric // Initialize data structures 2274824e7fdSDimitry Andric for (auto &Node : Nodes) { 2284824e7fdSDimitry Andric Node.Distance = INF; 2294824e7fdSDimitry Andric Node.ParentNode = uint64_t(-1); 2304824e7fdSDimitry Andric Node.ParentEdgeIndex = uint64_t(-1); 2314824e7fdSDimitry Andric Node.Taken = false; 2324824e7fdSDimitry Andric } 2334824e7fdSDimitry Andric 2344824e7fdSDimitry Andric std::queue<uint64_t> Queue; 2354824e7fdSDimitry Andric Queue.push(Source); 2364824e7fdSDimitry Andric Nodes[Source].Distance = 0; 2374824e7fdSDimitry Andric Nodes[Source].Taken = true; 2384824e7fdSDimitry Andric while (!Queue.empty()) { 2394824e7fdSDimitry Andric uint64_t Src = Queue.front(); 2404824e7fdSDimitry Andric Queue.pop(); 2414824e7fdSDimitry Andric Nodes[Src].Taken = false; 2424824e7fdSDimitry Andric // Although the residual network contains edges with negative costs 2434824e7fdSDimitry Andric // (in particular, backward edges), it can be shown that there are no 2444824e7fdSDimitry Andric // negative-weight cycles and the following two invariants are maintained: 2454824e7fdSDimitry Andric // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 2464824e7fdSDimitry Andric // where Dist is the length of the shortest path between two nodes. This 2474824e7fdSDimitry Andric // allows to prune the search-space of the path-finding algorithm using 2484824e7fdSDimitry Andric // the following early-stop criteria: 2494824e7fdSDimitry Andric // -- If we find a path with zero-distance from Source to Target, stop the 2504824e7fdSDimitry Andric // search, as the path is the shortest since Dist[Source, Target] >= 0; 2514824e7fdSDimitry Andric // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 2524824e7fdSDimitry Andric // process node V, as it is guaranteed _not_ to be on a shortest path 2534824e7fdSDimitry Andric // from Source to Target; it follows from inequalities 2544824e7fdSDimitry Andric // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 2554824e7fdSDimitry Andric // >= Dist[Source, V] 256*81ad6265SDimitry Andric if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0) 2574824e7fdSDimitry Andric break; 2584824e7fdSDimitry Andric if (Nodes[Src].Distance > Nodes[Target].Distance) 2594824e7fdSDimitry Andric continue; 2604824e7fdSDimitry Andric 2614824e7fdSDimitry Andric // Process adjacent edges 2624824e7fdSDimitry Andric for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 2634824e7fdSDimitry Andric auto &Edge = Edges[Src][EdgeIdx]; 2644824e7fdSDimitry Andric if (Edge.Flow < Edge.Capacity) { 2654824e7fdSDimitry Andric uint64_t Dst = Edge.Dst; 2664824e7fdSDimitry Andric int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 2674824e7fdSDimitry Andric if (Nodes[Dst].Distance > NewDistance) { 2684824e7fdSDimitry Andric // Update the distance and the parent node/edge 2694824e7fdSDimitry Andric Nodes[Dst].Distance = NewDistance; 2704824e7fdSDimitry Andric Nodes[Dst].ParentNode = Src; 2714824e7fdSDimitry Andric Nodes[Dst].ParentEdgeIndex = EdgeIdx; 2724824e7fdSDimitry Andric // Add the node to the queue, if it is not there yet 2734824e7fdSDimitry Andric if (!Nodes[Dst].Taken) { 2744824e7fdSDimitry Andric Queue.push(Dst); 2754824e7fdSDimitry Andric Nodes[Dst].Taken = true; 2764824e7fdSDimitry Andric } 2774824e7fdSDimitry Andric } 2784824e7fdSDimitry Andric } 2794824e7fdSDimitry Andric } 2804824e7fdSDimitry Andric } 2814824e7fdSDimitry Andric 2824824e7fdSDimitry Andric return Nodes[Target].Distance != INF; 2834824e7fdSDimitry Andric } 2844824e7fdSDimitry Andric 2854824e7fdSDimitry Andric /// Update the current flow along the augmenting path. 286*81ad6265SDimitry Andric void augmentFlowAlongPath(uint64_t PathCapacity) { 2870eae32dcSDimitry Andric assert(PathCapacity > 0 && "found an incorrect augmenting path"); 288*81ad6265SDimitry Andric uint64_t Now = Target; 2894824e7fdSDimitry Andric while (Now != Source) { 2904824e7fdSDimitry Andric uint64_t Pred = Nodes[Now].ParentNode; 2914824e7fdSDimitry Andric auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 2924824e7fdSDimitry Andric auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 2934824e7fdSDimitry Andric 2944824e7fdSDimitry Andric Edge.Flow += PathCapacity; 2954824e7fdSDimitry Andric RevEdge.Flow -= PathCapacity; 2964824e7fdSDimitry Andric 2974824e7fdSDimitry Andric Now = Pred; 2984824e7fdSDimitry Andric } 2994824e7fdSDimitry Andric } 3004824e7fdSDimitry Andric 301*81ad6265SDimitry Andric /// Find an Augmenting DAG order using a modified version of DFS in which we 302*81ad6265SDimitry Andric /// can visit a node multiple times. In the DFS search, when scanning each 303*81ad6265SDimitry Andric /// edge out of a node, continue search at Edge.Dst endpoint if it has not 304*81ad6265SDimitry Andric /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm 305*81ad6265SDimitry Andric /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time. 306*81ad6265SDimitry Andric /// It returns an Augmenting Order (Taken nodes in decreasing Finish time) 307*81ad6265SDimitry Andric /// that starts with Source and ends with Target. 308*81ad6265SDimitry Andric std::vector<uint64_t> findAugmentingDAG() { 309*81ad6265SDimitry Andric // We use a stack based implemenation of DFS to avoid recursion. 310*81ad6265SDimitry Andric // Defining DFS data structures: 311*81ad6265SDimitry Andric // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that 312*81ad6265SDimitry Andric // - we are currently visiting Nodes[NodeIdx] and 313*81ad6265SDimitry Andric // - the next edge to scan is Edges[NodeIdx][EdgeIdx] 314*81ad6265SDimitry Andric typedef std::pair<uint64_t, uint64_t> StackItemType; 315*81ad6265SDimitry Andric std::stack<StackItemType> Stack; 316*81ad6265SDimitry Andric std::vector<uint64_t> AugmentingOrder; 317*81ad6265SDimitry Andric 318*81ad6265SDimitry Andric // Phase 0: Initialize Node attributes and Time for DFS run 319*81ad6265SDimitry Andric for (auto &Node : Nodes) { 320*81ad6265SDimitry Andric Node.Discovery = 0; 321*81ad6265SDimitry Andric Node.Finish = 0; 322*81ad6265SDimitry Andric Node.NumCalls = 0; 323*81ad6265SDimitry Andric Node.Taken = false; 324*81ad6265SDimitry Andric } 325*81ad6265SDimitry Andric uint64_t Time = 0; 326*81ad6265SDimitry Andric // Mark Target as Taken 327*81ad6265SDimitry Andric // Taken attribute will be propagated backwards from Target towards Source 328*81ad6265SDimitry Andric Nodes[Target].Taken = true; 329*81ad6265SDimitry Andric 330*81ad6265SDimitry Andric // Phase 1: Start DFS traversal from Source 331*81ad6265SDimitry Andric Stack.emplace(Source, 0); 332*81ad6265SDimitry Andric Nodes[Source].Discovery = ++Time; 333*81ad6265SDimitry Andric while (!Stack.empty()) { 334*81ad6265SDimitry Andric auto NodeIdx = Stack.top().first; 335*81ad6265SDimitry Andric auto EdgeIdx = Stack.top().second; 336*81ad6265SDimitry Andric 337*81ad6265SDimitry Andric // If we haven't scanned all edges out of NodeIdx, continue scanning 338*81ad6265SDimitry Andric if (EdgeIdx < Edges[NodeIdx].size()) { 339*81ad6265SDimitry Andric auto &Edge = Edges[NodeIdx][EdgeIdx]; 340*81ad6265SDimitry Andric auto &Dst = Nodes[Edge.Dst]; 341*81ad6265SDimitry Andric Stack.top().second++; 342*81ad6265SDimitry Andric 343*81ad6265SDimitry Andric if (Edge.OnShortestPath) { 344*81ad6265SDimitry Andric // If we haven't seen Edge.Dst so far, continue DFS search there 345*81ad6265SDimitry Andric if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) { 346*81ad6265SDimitry Andric Dst.Discovery = ++Time; 347*81ad6265SDimitry Andric Stack.emplace(Edge.Dst, 0); 348*81ad6265SDimitry Andric Dst.NumCalls++; 349*81ad6265SDimitry Andric } else if (Dst.Taken && Dst.Finish != 0) { 350*81ad6265SDimitry Andric // Else, if Edge.Dst already have a path to Target, so that NodeIdx 351*81ad6265SDimitry Andric Nodes[NodeIdx].Taken = true; 352*81ad6265SDimitry Andric } 353*81ad6265SDimitry Andric } 354*81ad6265SDimitry Andric } else { 355*81ad6265SDimitry Andric // If we are done scanning all edge out of NodeIdx 356*81ad6265SDimitry Andric Stack.pop(); 357*81ad6265SDimitry Andric // If we haven't found a path from NodeIdx to Target, forget about it 358*81ad6265SDimitry Andric if (!Nodes[NodeIdx].Taken) { 359*81ad6265SDimitry Andric Nodes[NodeIdx].Discovery = 0; 360*81ad6265SDimitry Andric } else { 361*81ad6265SDimitry Andric // If we have found a path from NodeIdx to Target, then finish NodeIdx 362*81ad6265SDimitry Andric // and propagate Taken flag to DFS parent unless at the Source 363*81ad6265SDimitry Andric Nodes[NodeIdx].Finish = ++Time; 364*81ad6265SDimitry Andric // NodeIdx == Source if and only if the stack is empty 365*81ad6265SDimitry Andric if (NodeIdx != Source) { 366*81ad6265SDimitry Andric assert(!Stack.empty() && "empty stack while running dfs"); 367*81ad6265SDimitry Andric Nodes[Stack.top().first].Taken = true; 368*81ad6265SDimitry Andric } 369*81ad6265SDimitry Andric AugmentingOrder.push_back(NodeIdx); 370*81ad6265SDimitry Andric } 371*81ad6265SDimitry Andric } 372*81ad6265SDimitry Andric } 373*81ad6265SDimitry Andric // Nodes are collected decreasing Finish time, so the order is reversed 374*81ad6265SDimitry Andric std::reverse(AugmentingOrder.begin(), AugmentingOrder.end()); 375*81ad6265SDimitry Andric 376*81ad6265SDimitry Andric // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges 377*81ad6265SDimitry Andric for (size_t Src : AugmentingOrder) { 378*81ad6265SDimitry Andric AugmentingEdges[Src].clear(); 379*81ad6265SDimitry Andric for (auto &Edge : Edges[Src]) { 380*81ad6265SDimitry Andric uint64_t Dst = Edge.Dst; 381*81ad6265SDimitry Andric if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken && 382*81ad6265SDimitry Andric Nodes[Dst].Finish < Nodes[Src].Finish) { 383*81ad6265SDimitry Andric AugmentingEdges[Src].push_back(&Edge); 384*81ad6265SDimitry Andric } 385*81ad6265SDimitry Andric } 386*81ad6265SDimitry Andric assert((Src == Target || !AugmentingEdges[Src].empty()) && 387*81ad6265SDimitry Andric "incorrectly constructed augmenting edges"); 388*81ad6265SDimitry Andric } 389*81ad6265SDimitry Andric 390*81ad6265SDimitry Andric return AugmentingOrder; 391*81ad6265SDimitry Andric } 392*81ad6265SDimitry Andric 393*81ad6265SDimitry Andric /// Update the current flow along the given (acyclic) subgraph specified by 394*81ad6265SDimitry Andric /// the vertex order, AugmentingOrder. The objective is to send as much flow 395*81ad6265SDimitry Andric /// as possible while evenly distributing flow among successors of each node. 396*81ad6265SDimitry Andric /// After the update at least one edge is saturated. 397*81ad6265SDimitry Andric bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) { 398*81ad6265SDimitry Andric // Phase 0: Initialization 399*81ad6265SDimitry Andric for (uint64_t Src : AugmentingOrder) { 400*81ad6265SDimitry Andric Nodes[Src].FracFlow = 0; 401*81ad6265SDimitry Andric Nodes[Src].IntFlow = 0; 402*81ad6265SDimitry Andric for (auto &Edge : AugmentingEdges[Src]) { 403*81ad6265SDimitry Andric Edge->AugmentedFlow = 0; 404*81ad6265SDimitry Andric } 405*81ad6265SDimitry Andric } 406*81ad6265SDimitry Andric 407*81ad6265SDimitry Andric // Phase 1: Send a unit of fractional flow along the DAG 408*81ad6265SDimitry Andric uint64_t MaxFlowAmount = INF; 409*81ad6265SDimitry Andric Nodes[Source].FracFlow = 1.0; 410*81ad6265SDimitry Andric for (uint64_t Src : AugmentingOrder) { 411*81ad6265SDimitry Andric assert((Src == Target || Nodes[Src].FracFlow > 0.0) && 412*81ad6265SDimitry Andric "incorrectly computed fractional flow"); 413*81ad6265SDimitry Andric // Distribute flow evenly among successors of Src 414*81ad6265SDimitry Andric uint64_t Degree = AugmentingEdges[Src].size(); 415*81ad6265SDimitry Andric for (auto &Edge : AugmentingEdges[Src]) { 416*81ad6265SDimitry Andric double EdgeFlow = Nodes[Src].FracFlow / Degree; 417*81ad6265SDimitry Andric Nodes[Edge->Dst].FracFlow += EdgeFlow; 418*81ad6265SDimitry Andric if (Edge->Capacity == INF) 419*81ad6265SDimitry Andric continue; 420*81ad6265SDimitry Andric uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow; 421*81ad6265SDimitry Andric MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow); 422*81ad6265SDimitry Andric } 423*81ad6265SDimitry Andric } 424*81ad6265SDimitry Andric // Stop early if we cannot send any (integral) flow from Source to Target 425*81ad6265SDimitry Andric if (MaxFlowAmount == 0) 426*81ad6265SDimitry Andric return false; 427*81ad6265SDimitry Andric 428*81ad6265SDimitry Andric // Phase 2: Send an integral flow of MaxFlowAmount 429*81ad6265SDimitry Andric Nodes[Source].IntFlow = MaxFlowAmount; 430*81ad6265SDimitry Andric for (uint64_t Src : AugmentingOrder) { 431*81ad6265SDimitry Andric if (Src == Target) 432*81ad6265SDimitry Andric break; 433*81ad6265SDimitry Andric // Distribute flow evenly among successors of Src, rounding up to make 434*81ad6265SDimitry Andric // sure all flow is sent 435*81ad6265SDimitry Andric uint64_t Degree = AugmentingEdges[Src].size(); 436*81ad6265SDimitry Andric // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree 437*81ad6265SDimitry Andric uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree; 438*81ad6265SDimitry Andric for (auto &Edge : AugmentingEdges[Src]) { 439*81ad6265SDimitry Andric uint64_t Dst = Edge->Dst; 440*81ad6265SDimitry Andric uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow); 441*81ad6265SDimitry Andric EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow)); 442*81ad6265SDimitry Andric Nodes[Dst].IntFlow += EdgeFlow; 443*81ad6265SDimitry Andric Nodes[Src].IntFlow -= EdgeFlow; 444*81ad6265SDimitry Andric Edge->AugmentedFlow += EdgeFlow; 445*81ad6265SDimitry Andric } 446*81ad6265SDimitry Andric } 447*81ad6265SDimitry Andric assert(Nodes[Target].IntFlow <= MaxFlowAmount); 448*81ad6265SDimitry Andric Nodes[Target].IntFlow = 0; 449*81ad6265SDimitry Andric 450*81ad6265SDimitry Andric // Phase 3: Send excess flow back traversing the nodes backwards. 451*81ad6265SDimitry Andric // Because of rounding, not all flow can be sent along the edges of Src. 452*81ad6265SDimitry Andric // Hence, sending the remaining flow back to maintain flow conservation 453*81ad6265SDimitry Andric for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) { 454*81ad6265SDimitry Andric uint64_t Src = AugmentingOrder[Idx - 1]; 455*81ad6265SDimitry Andric // Try to send excess flow back along each edge. 456*81ad6265SDimitry Andric // Make sure we only send back flow we just augmented (AugmentedFlow). 457*81ad6265SDimitry Andric for (auto &Edge : AugmentingEdges[Src]) { 458*81ad6265SDimitry Andric uint64_t Dst = Edge->Dst; 459*81ad6265SDimitry Andric if (Nodes[Dst].IntFlow == 0) 460*81ad6265SDimitry Andric continue; 461*81ad6265SDimitry Andric uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow); 462*81ad6265SDimitry Andric Nodes[Dst].IntFlow -= EdgeFlow; 463*81ad6265SDimitry Andric Nodes[Src].IntFlow += EdgeFlow; 464*81ad6265SDimitry Andric Edge->AugmentedFlow -= EdgeFlow; 465*81ad6265SDimitry Andric } 466*81ad6265SDimitry Andric } 467*81ad6265SDimitry Andric 468*81ad6265SDimitry Andric // Phase 4: Update flow values along all edges 469*81ad6265SDimitry Andric bool HasSaturatedEdges = false; 470*81ad6265SDimitry Andric for (uint64_t Src : AugmentingOrder) { 471*81ad6265SDimitry Andric // Verify that we have sent all the excess flow from the node 472*81ad6265SDimitry Andric assert(Src == Source || Nodes[Src].IntFlow == 0); 473*81ad6265SDimitry Andric for (auto &Edge : AugmentingEdges[Src]) { 474*81ad6265SDimitry Andric assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow); 475*81ad6265SDimitry Andric // Update flow values along the edge and its reverse copy 476*81ad6265SDimitry Andric auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex]; 477*81ad6265SDimitry Andric Edge->Flow += Edge->AugmentedFlow; 478*81ad6265SDimitry Andric RevEdge.Flow -= Edge->AugmentedFlow; 479*81ad6265SDimitry Andric if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0) 480*81ad6265SDimitry Andric HasSaturatedEdges = true; 481*81ad6265SDimitry Andric } 482*81ad6265SDimitry Andric } 483*81ad6265SDimitry Andric 484*81ad6265SDimitry Andric // The augmentation is successful iff at least one edge becomes saturated 485*81ad6265SDimitry Andric return HasSaturatedEdges; 486*81ad6265SDimitry Andric } 487*81ad6265SDimitry Andric 488*81ad6265SDimitry Andric /// Identify candidate (shortest) edges for augmentation. 489*81ad6265SDimitry Andric void identifyShortestEdges(uint64_t PathCapacity) { 490*81ad6265SDimitry Andric assert(PathCapacity > 0 && "found an incorrect augmenting DAG"); 491*81ad6265SDimitry Andric // To make sure the augmentation DAG contains only edges with large residual 492*81ad6265SDimitry Andric // capacity, we prune all edges whose capacity is below a fraction of 493*81ad6265SDimitry Andric // the capacity of the augmented path. 494*81ad6265SDimitry Andric // (All edges of the path itself are always in the DAG) 495*81ad6265SDimitry Andric uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1)); 496*81ad6265SDimitry Andric 497*81ad6265SDimitry Andric // Decide which edges are on a shortest path from Source to Target 498*81ad6265SDimitry Andric for (size_t Src = 0; Src < Nodes.size(); Src++) { 499*81ad6265SDimitry Andric // An edge cannot be augmenting if the endpoint has large distance 500*81ad6265SDimitry Andric if (Nodes[Src].Distance > Nodes[Target].Distance) 501*81ad6265SDimitry Andric continue; 502*81ad6265SDimitry Andric 503*81ad6265SDimitry Andric for (auto &Edge : Edges[Src]) { 504*81ad6265SDimitry Andric uint64_t Dst = Edge.Dst; 505*81ad6265SDimitry Andric Edge.OnShortestPath = 506*81ad6265SDimitry Andric Src != Target && Dst != Source && 507*81ad6265SDimitry Andric Nodes[Dst].Distance <= Nodes[Target].Distance && 508*81ad6265SDimitry Andric Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost && 509*81ad6265SDimitry Andric Edge.Capacity > Edge.Flow && 510*81ad6265SDimitry Andric uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity; 511*81ad6265SDimitry Andric } 512*81ad6265SDimitry Andric } 513*81ad6265SDimitry Andric } 514*81ad6265SDimitry Andric 51504eeddc0SDimitry Andric /// A node in a flow network. 5164824e7fdSDimitry Andric struct Node { 5174824e7fdSDimitry Andric /// The cost of the cheapest path from the source to the current node. 5184824e7fdSDimitry Andric int64_t Distance; 5194824e7fdSDimitry Andric /// The node preceding the current one in the path. 5204824e7fdSDimitry Andric uint64_t ParentNode; 5214824e7fdSDimitry Andric /// The index of the edge between ParentNode and the current node. 5224824e7fdSDimitry Andric uint64_t ParentEdgeIndex; 5234824e7fdSDimitry Andric /// An indicator of whether the current node is in a queue. 5244824e7fdSDimitry Andric bool Taken; 525*81ad6265SDimitry Andric 526*81ad6265SDimitry Andric /// Data fields utilized in DAG-augmentation: 527*81ad6265SDimitry Andric /// Fractional flow. 528*81ad6265SDimitry Andric double FracFlow; 529*81ad6265SDimitry Andric /// Integral flow. 530*81ad6265SDimitry Andric uint64_t IntFlow; 531*81ad6265SDimitry Andric /// Discovery time. 532*81ad6265SDimitry Andric uint64_t Discovery; 533*81ad6265SDimitry Andric /// Finish time. 534*81ad6265SDimitry Andric uint64_t Finish; 535*81ad6265SDimitry Andric /// NumCalls. 536*81ad6265SDimitry Andric uint64_t NumCalls; 5374824e7fdSDimitry Andric }; 538*81ad6265SDimitry Andric 5394824e7fdSDimitry Andric /// An edge in a flow network. 5404824e7fdSDimitry Andric struct Edge { 5414824e7fdSDimitry Andric /// The cost of the edge. 5424824e7fdSDimitry Andric int64_t Cost; 5434824e7fdSDimitry Andric /// The capacity of the edge. 5444824e7fdSDimitry Andric int64_t Capacity; 5454824e7fdSDimitry Andric /// The current flow on the edge. 5464824e7fdSDimitry Andric int64_t Flow; 5474824e7fdSDimitry Andric /// The destination node of the edge. 5484824e7fdSDimitry Andric uint64_t Dst; 5494824e7fdSDimitry Andric /// The index of the reverse edge between Dst and the current node. 5504824e7fdSDimitry Andric uint64_t RevEdgeIndex; 551*81ad6265SDimitry Andric 552*81ad6265SDimitry Andric /// Data fields utilized in DAG-augmentation: 553*81ad6265SDimitry Andric /// Whether the edge is currently on a shortest path from Source to Target. 554*81ad6265SDimitry Andric bool OnShortestPath; 555*81ad6265SDimitry Andric /// Extra flow along the edge. 556*81ad6265SDimitry Andric uint64_t AugmentedFlow; 5574824e7fdSDimitry Andric }; 5584824e7fdSDimitry Andric 5594824e7fdSDimitry Andric /// The set of network nodes. 5604824e7fdSDimitry Andric std::vector<Node> Nodes; 5614824e7fdSDimitry Andric /// The set of network edges. 5624824e7fdSDimitry Andric std::vector<std::vector<Edge>> Edges; 5634824e7fdSDimitry Andric /// Source node of the flow. 5644824e7fdSDimitry Andric uint64_t Source; 5654824e7fdSDimitry Andric /// Target (sink) node of the flow. 5664824e7fdSDimitry Andric uint64_t Target; 567*81ad6265SDimitry Andric /// Augmenting edges. 568*81ad6265SDimitry Andric std::vector<std::vector<Edge *>> AugmentingEdges; 5694824e7fdSDimitry Andric }; 5704824e7fdSDimitry Andric 571*81ad6265SDimitry Andric constexpr int64_t MinCostMaxFlow::AuxCostUnlikely; 572*81ad6265SDimitry Andric constexpr uint64_t MinCostMaxFlow::MinBaseDistance; 573*81ad6265SDimitry Andric 5740eae32dcSDimitry Andric /// A post-processing adjustment of control flow. It applies two steps by 5750eae32dcSDimitry Andric /// rerouting some flow and making it more realistic: 5760eae32dcSDimitry Andric /// 5770eae32dcSDimitry Andric /// - First, it removes all isolated components ("islands") with a positive flow 5780eae32dcSDimitry Andric /// that are unreachable from the entry block. For every such component, we 5790eae32dcSDimitry Andric /// find the shortest from the entry to an exit passing through the component, 5800eae32dcSDimitry Andric /// and increase the flow by one unit along the path. 5810eae32dcSDimitry Andric /// 5820eae32dcSDimitry Andric /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks 5830eae32dcSDimitry Andric /// with no sampled counts. Then it rebalnces the flow that goes through such 5840eae32dcSDimitry Andric /// a subgraph so that each branch is taken with probability 50%. 5850eae32dcSDimitry Andric /// An unknown subgraph is such that for every two nodes u and v: 5860eae32dcSDimitry Andric /// - u dominates v and u is not unknown; 5870eae32dcSDimitry Andric /// - v post-dominates u; and 5880eae32dcSDimitry Andric /// - all inner-nodes of all (u,v)-paths are unknown. 5890eae32dcSDimitry Andric /// 5900eae32dcSDimitry Andric class FlowAdjuster { 5910eae32dcSDimitry Andric public: 5920eae32dcSDimitry Andric FlowAdjuster(FlowFunction &Func) : Func(Func) { 5930eae32dcSDimitry Andric assert(Func.Blocks[Func.Entry].isEntry() && 5940eae32dcSDimitry Andric "incorrect index of the entry block"); 5950eae32dcSDimitry Andric } 5960eae32dcSDimitry Andric 5970eae32dcSDimitry Andric // Run the post-processing 5980eae32dcSDimitry Andric void run() { 5990eae32dcSDimitry Andric /// Adjust the flow to get rid of isolated components. 6000eae32dcSDimitry Andric joinIsolatedComponents(); 6010eae32dcSDimitry Andric 6020eae32dcSDimitry Andric /// Rebalance the flow inside unknown subgraphs. 6030eae32dcSDimitry Andric rebalanceUnknownSubgraphs(); 6040eae32dcSDimitry Andric } 6050eae32dcSDimitry Andric 6060eae32dcSDimitry Andric private: 6070eae32dcSDimitry Andric void joinIsolatedComponents() { 6080eae32dcSDimitry Andric // Find blocks that are reachable from the source 60904eeddc0SDimitry Andric auto Visited = BitVector(NumBlocks(), false); 6100eae32dcSDimitry Andric findReachable(Func.Entry, Visited); 6110eae32dcSDimitry Andric 6120eae32dcSDimitry Andric // Iterate over all non-reachable blocks and adjust their weights 6130eae32dcSDimitry Andric for (uint64_t I = 0; I < NumBlocks(); I++) { 6140eae32dcSDimitry Andric auto &Block = Func.Blocks[I]; 6150eae32dcSDimitry Andric if (Block.Flow > 0 && !Visited[I]) { 6160eae32dcSDimitry Andric // Find a path from the entry to an exit passing through the block I 6170eae32dcSDimitry Andric auto Path = findShortestPath(I); 6180eae32dcSDimitry Andric // Increase the flow along the path 6190eae32dcSDimitry Andric assert(Path.size() > 0 && Path[0]->Source == Func.Entry && 6200eae32dcSDimitry Andric "incorrectly computed path adjusting control flow"); 6210eae32dcSDimitry Andric Func.Blocks[Func.Entry].Flow += 1; 6220eae32dcSDimitry Andric for (auto &Jump : Path) { 6230eae32dcSDimitry Andric Jump->Flow += 1; 6240eae32dcSDimitry Andric Func.Blocks[Jump->Target].Flow += 1; 6250eae32dcSDimitry Andric // Update reachability 6260eae32dcSDimitry Andric findReachable(Jump->Target, Visited); 6270eae32dcSDimitry Andric } 6280eae32dcSDimitry Andric } 6290eae32dcSDimitry Andric } 6300eae32dcSDimitry Andric } 6310eae32dcSDimitry Andric 6320eae32dcSDimitry Andric /// Run BFS from a given block along the jumps with a positive flow and mark 6330eae32dcSDimitry Andric /// all reachable blocks. 63404eeddc0SDimitry Andric void findReachable(uint64_t Src, BitVector &Visited) { 6350eae32dcSDimitry Andric if (Visited[Src]) 6360eae32dcSDimitry Andric return; 6370eae32dcSDimitry Andric std::queue<uint64_t> Queue; 6380eae32dcSDimitry Andric Queue.push(Src); 6390eae32dcSDimitry Andric Visited[Src] = true; 6400eae32dcSDimitry Andric while (!Queue.empty()) { 6410eae32dcSDimitry Andric Src = Queue.front(); 6420eae32dcSDimitry Andric Queue.pop(); 6430eae32dcSDimitry Andric for (auto Jump : Func.Blocks[Src].SuccJumps) { 6440eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 6450eae32dcSDimitry Andric if (Jump->Flow > 0 && !Visited[Dst]) { 6460eae32dcSDimitry Andric Queue.push(Dst); 6470eae32dcSDimitry Andric Visited[Dst] = true; 6480eae32dcSDimitry Andric } 6490eae32dcSDimitry Andric } 6500eae32dcSDimitry Andric } 6510eae32dcSDimitry Andric } 6520eae32dcSDimitry Andric 6530eae32dcSDimitry Andric /// Find the shortest path from the entry block to an exit block passing 6540eae32dcSDimitry Andric /// through a given block. 6550eae32dcSDimitry Andric std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { 6560eae32dcSDimitry Andric // A path from the entry block to BlockIdx 6570eae32dcSDimitry Andric auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); 6580eae32dcSDimitry Andric // A path from BlockIdx to an exit block 6590eae32dcSDimitry Andric auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); 6600eae32dcSDimitry Andric 6610eae32dcSDimitry Andric // Concatenate the two paths 6620eae32dcSDimitry Andric std::vector<FlowJump *> Result; 6630eae32dcSDimitry Andric Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); 6640eae32dcSDimitry Andric Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); 6650eae32dcSDimitry Andric return Result; 6660eae32dcSDimitry Andric } 6670eae32dcSDimitry Andric 6680eae32dcSDimitry Andric /// Apply the Dijkstra algorithm to find the shortest path from a given 6690eae32dcSDimitry Andric /// Source to a given Target block. 6700eae32dcSDimitry Andric /// If Target == -1, then the path ends at an exit block. 6710eae32dcSDimitry Andric std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { 6720eae32dcSDimitry Andric // Quit early, if possible 6730eae32dcSDimitry Andric if (Source == Target) 6740eae32dcSDimitry Andric return std::vector<FlowJump *>(); 6750eae32dcSDimitry Andric if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) 6760eae32dcSDimitry Andric return std::vector<FlowJump *>(); 6770eae32dcSDimitry Andric 6780eae32dcSDimitry Andric // Initialize data structures 6790eae32dcSDimitry Andric auto Distance = std::vector<int64_t>(NumBlocks(), INF); 6800eae32dcSDimitry Andric auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); 6810eae32dcSDimitry Andric Distance[Source] = 0; 6820eae32dcSDimitry Andric std::set<std::pair<uint64_t, uint64_t>> Queue; 6830eae32dcSDimitry Andric Queue.insert(std::make_pair(Distance[Source], Source)); 6840eae32dcSDimitry Andric 6850eae32dcSDimitry Andric // Run the Dijkstra algorithm 6860eae32dcSDimitry Andric while (!Queue.empty()) { 6870eae32dcSDimitry Andric uint64_t Src = Queue.begin()->second; 6880eae32dcSDimitry Andric Queue.erase(Queue.begin()); 6890eae32dcSDimitry Andric // If we found a solution, quit early 6900eae32dcSDimitry Andric if (Src == Target || 6910eae32dcSDimitry Andric (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) 6920eae32dcSDimitry Andric break; 6930eae32dcSDimitry Andric 6940eae32dcSDimitry Andric for (auto Jump : Func.Blocks[Src].SuccJumps) { 6950eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 6960eae32dcSDimitry Andric int64_t JumpDist = jumpDistance(Jump); 6970eae32dcSDimitry Andric if (Distance[Dst] > Distance[Src] + JumpDist) { 6980eae32dcSDimitry Andric Queue.erase(std::make_pair(Distance[Dst], Dst)); 6990eae32dcSDimitry Andric 7000eae32dcSDimitry Andric Distance[Dst] = Distance[Src] + JumpDist; 7010eae32dcSDimitry Andric Parent[Dst] = Jump; 7020eae32dcSDimitry Andric 7030eae32dcSDimitry Andric Queue.insert(std::make_pair(Distance[Dst], Dst)); 7040eae32dcSDimitry Andric } 7050eae32dcSDimitry Andric } 7060eae32dcSDimitry Andric } 7070eae32dcSDimitry Andric // If Target is not provided, find the closest exit block 7080eae32dcSDimitry Andric if (Target == AnyExitBlock) { 7090eae32dcSDimitry Andric for (uint64_t I = 0; I < NumBlocks(); I++) { 7100eae32dcSDimitry Andric if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { 7110eae32dcSDimitry Andric if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { 7120eae32dcSDimitry Andric Target = I; 7130eae32dcSDimitry Andric } 7140eae32dcSDimitry Andric } 7150eae32dcSDimitry Andric } 7160eae32dcSDimitry Andric } 7170eae32dcSDimitry Andric assert(Parent[Target] != nullptr && "a path does not exist"); 7180eae32dcSDimitry Andric 7190eae32dcSDimitry Andric // Extract the constructed path 7200eae32dcSDimitry Andric std::vector<FlowJump *> Result; 7210eae32dcSDimitry Andric uint64_t Now = Target; 7220eae32dcSDimitry Andric while (Now != Source) { 7230eae32dcSDimitry Andric assert(Now == Parent[Now]->Target && "incorrect parent jump"); 7240eae32dcSDimitry Andric Result.push_back(Parent[Now]); 7250eae32dcSDimitry Andric Now = Parent[Now]->Source; 7260eae32dcSDimitry Andric } 7270eae32dcSDimitry Andric // Reverse the path, since it is extracted from Target to Source 7280eae32dcSDimitry Andric std::reverse(Result.begin(), Result.end()); 7290eae32dcSDimitry Andric return Result; 7300eae32dcSDimitry Andric } 7310eae32dcSDimitry Andric 7320eae32dcSDimitry Andric /// A distance of a path for a given jump. 7330eae32dcSDimitry Andric /// In order to incite the path to use blocks/jumps with large positive flow, 7340eae32dcSDimitry Andric /// and avoid changing branch probability of outgoing edges drastically, 735*81ad6265SDimitry Andric /// set the jump distance so as: 736*81ad6265SDimitry Andric /// - to minimize the number of unlikely jumps used and subject to that, 737*81ad6265SDimitry Andric /// - to minimize the number of Flow == 0 jumps used and subject to that, 738*81ad6265SDimitry Andric /// - minimizes total multiplicative Flow increase for the remaining edges. 739*81ad6265SDimitry Andric /// To capture this objective with integer distances, we round off fractional 740*81ad6265SDimitry Andric /// parts to a multiple of 1 / BaseDistance. 7410eae32dcSDimitry Andric int64_t jumpDistance(FlowJump *Jump) const { 742*81ad6265SDimitry Andric uint64_t BaseDistance = 743*81ad6265SDimitry Andric std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance), 744*81ad6265SDimitry Andric std::min(Func.Blocks[Func.Entry].Flow, 745*81ad6265SDimitry Andric MinCostMaxFlow::AuxCostUnlikely / NumBlocks())); 7460eae32dcSDimitry Andric if (Jump->IsUnlikely) 7470eae32dcSDimitry Andric return MinCostMaxFlow::AuxCostUnlikely; 7480eae32dcSDimitry Andric if (Jump->Flow > 0) 749*81ad6265SDimitry Andric return BaseDistance + BaseDistance / Jump->Flow; 750*81ad6265SDimitry Andric return BaseDistance * NumBlocks(); 7510eae32dcSDimitry Andric }; 7520eae32dcSDimitry Andric 7530eae32dcSDimitry Andric uint64_t NumBlocks() const { return Func.Blocks.size(); } 7540eae32dcSDimitry Andric 75504eeddc0SDimitry Andric /// Rebalance unknown subgraphs so that the flow is split evenly across the 75604eeddc0SDimitry Andric /// outgoing branches of every block of the subgraph. The method iterates over 75704eeddc0SDimitry Andric /// blocks with known weight and identifies unknown subgraphs rooted at the 75804eeddc0SDimitry Andric /// blocks. Then it verifies if flow rebalancing is feasible and applies it. 7590eae32dcSDimitry Andric void rebalanceUnknownSubgraphs() { 76004eeddc0SDimitry Andric // Try to find unknown subgraphs from each block 7610eae32dcSDimitry Andric for (uint64_t I = 0; I < Func.Blocks.size(); I++) { 7620eae32dcSDimitry Andric auto SrcBlock = &Func.Blocks[I]; 76304eeddc0SDimitry Andric // Verify if rebalancing rooted at SrcBlock is feasible 76404eeddc0SDimitry Andric if (!canRebalanceAtRoot(SrcBlock)) 7650eae32dcSDimitry Andric continue; 7660eae32dcSDimitry Andric 76704eeddc0SDimitry Andric // Find an unknown subgraphs starting at SrcBlock. Along the way, 76804eeddc0SDimitry Andric // fill in known destinations and intermediate unknown blocks. 76904eeddc0SDimitry Andric std::vector<FlowBlock *> UnknownBlocks; 77004eeddc0SDimitry Andric std::vector<FlowBlock *> KnownDstBlocks; 77104eeddc0SDimitry Andric findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks); 77204eeddc0SDimitry Andric 77304eeddc0SDimitry Andric // Verify if rebalancing of the subgraph is feasible. If the search is 77404eeddc0SDimitry Andric // successful, find the unique destination block (which can be null) 7750eae32dcSDimitry Andric FlowBlock *DstBlock = nullptr; 77604eeddc0SDimitry Andric if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks, 77704eeddc0SDimitry Andric DstBlock)) 7780eae32dcSDimitry Andric continue; 77904eeddc0SDimitry Andric 78004eeddc0SDimitry Andric // We cannot rebalance subgraphs containing cycles among unknown blocks 78104eeddc0SDimitry Andric if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks)) 7820eae32dcSDimitry Andric continue; 7830eae32dcSDimitry Andric 7840eae32dcSDimitry Andric // Rebalance the flow 78504eeddc0SDimitry Andric rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks); 7860eae32dcSDimitry Andric } 7870eae32dcSDimitry Andric } 7880eae32dcSDimitry Andric 78904eeddc0SDimitry Andric /// Verify if rebalancing rooted at a given block is possible. 79004eeddc0SDimitry Andric bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { 79104eeddc0SDimitry Andric // Do not attempt to find unknown subgraphs from an unknown or a 79204eeddc0SDimitry Andric // zero-flow block 79304eeddc0SDimitry Andric if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) 79404eeddc0SDimitry Andric return false; 79504eeddc0SDimitry Andric 79604eeddc0SDimitry Andric // Do not attempt to process subgraphs from a block w/o unknown sucessors 79704eeddc0SDimitry Andric bool HasUnknownSuccs = false; 79804eeddc0SDimitry Andric for (auto Jump : SrcBlock->SuccJumps) { 79904eeddc0SDimitry Andric if (Func.Blocks[Jump->Target].UnknownWeight) { 80004eeddc0SDimitry Andric HasUnknownSuccs = true; 80104eeddc0SDimitry Andric break; 80204eeddc0SDimitry Andric } 80304eeddc0SDimitry Andric } 80404eeddc0SDimitry Andric if (!HasUnknownSuccs) 80504eeddc0SDimitry Andric return false; 80604eeddc0SDimitry Andric 80704eeddc0SDimitry Andric return true; 80804eeddc0SDimitry Andric } 80904eeddc0SDimitry Andric 81004eeddc0SDimitry Andric /// Find an unknown subgraph starting at block SrcBlock. The method sets 81104eeddc0SDimitry Andric /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. 81204eeddc0SDimitry Andric void findUnknownSubgraph(const FlowBlock *SrcBlock, 81304eeddc0SDimitry Andric std::vector<FlowBlock *> &KnownDstBlocks, 81404eeddc0SDimitry Andric std::vector<FlowBlock *> &UnknownBlocks) { 8150eae32dcSDimitry Andric // Run BFS from SrcBlock and make sure all paths are going through unknown 816*81ad6265SDimitry Andric // blocks and end at a known DstBlock 81704eeddc0SDimitry Andric auto Visited = BitVector(NumBlocks(), false); 8180eae32dcSDimitry Andric std::queue<uint64_t> Queue; 8190eae32dcSDimitry Andric 8200eae32dcSDimitry Andric Queue.push(SrcBlock->Index); 8210eae32dcSDimitry Andric Visited[SrcBlock->Index] = true; 8220eae32dcSDimitry Andric while (!Queue.empty()) { 8230eae32dcSDimitry Andric auto &Block = Func.Blocks[Queue.front()]; 8240eae32dcSDimitry Andric Queue.pop(); 8250eae32dcSDimitry Andric // Process blocks reachable from Block 8260eae32dcSDimitry Andric for (auto Jump : Block.SuccJumps) { 82704eeddc0SDimitry Andric // If Jump can be ignored, skip it 82804eeddc0SDimitry Andric if (ignoreJump(SrcBlock, nullptr, Jump)) 82904eeddc0SDimitry Andric continue; 83004eeddc0SDimitry Andric 8310eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 83204eeddc0SDimitry Andric // If Dst has been visited, skip Jump 8330eae32dcSDimitry Andric if (Visited[Dst]) 8340eae32dcSDimitry Andric continue; 83504eeddc0SDimitry Andric // Process block Dst 8360eae32dcSDimitry Andric Visited[Dst] = true; 8370eae32dcSDimitry Andric if (!Func.Blocks[Dst].UnknownWeight) { 83804eeddc0SDimitry Andric KnownDstBlocks.push_back(&Func.Blocks[Dst]); 8390eae32dcSDimitry Andric } else { 8400eae32dcSDimitry Andric Queue.push(Dst); 84104eeddc0SDimitry Andric UnknownBlocks.push_back(&Func.Blocks[Dst]); 84204eeddc0SDimitry Andric } 8430eae32dcSDimitry Andric } 8440eae32dcSDimitry Andric } 8450eae32dcSDimitry Andric } 8460eae32dcSDimitry Andric 84704eeddc0SDimitry Andric /// Verify if rebalancing of the subgraph is feasible. If the checks are 84804eeddc0SDimitry Andric /// successful, set the unique destination block, DstBlock (can be null). 84904eeddc0SDimitry Andric bool canRebalanceSubgraph(const FlowBlock *SrcBlock, 85004eeddc0SDimitry Andric const std::vector<FlowBlock *> &KnownDstBlocks, 85104eeddc0SDimitry Andric const std::vector<FlowBlock *> &UnknownBlocks, 85204eeddc0SDimitry Andric FlowBlock *&DstBlock) { 8530eae32dcSDimitry Andric // If the list of unknown blocks is empty, we don't need rebalancing 85404eeddc0SDimitry Andric if (UnknownBlocks.empty()) 8550eae32dcSDimitry Andric return false; 85604eeddc0SDimitry Andric 85704eeddc0SDimitry Andric // If there are multiple known sinks, we can't rebalance 85804eeddc0SDimitry Andric if (KnownDstBlocks.size() > 1) 8590eae32dcSDimitry Andric return false; 86004eeddc0SDimitry Andric DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); 86104eeddc0SDimitry Andric 86204eeddc0SDimitry Andric // Verify sinks of the subgraph 86304eeddc0SDimitry Andric for (auto Block : UnknownBlocks) { 86404eeddc0SDimitry Andric if (Block->SuccJumps.empty()) { 86504eeddc0SDimitry Andric // If there are multiple (known and unknown) sinks, we can't rebalance 86604eeddc0SDimitry Andric if (DstBlock != nullptr) 86704eeddc0SDimitry Andric return false; 86804eeddc0SDimitry Andric continue; 86904eeddc0SDimitry Andric } 87004eeddc0SDimitry Andric size_t NumIgnoredJumps = 0; 87104eeddc0SDimitry Andric for (auto Jump : Block->SuccJumps) { 87204eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 87304eeddc0SDimitry Andric NumIgnoredJumps++; 87404eeddc0SDimitry Andric } 87504eeddc0SDimitry Andric // If there is a non-sink block in UnknownBlocks with all jumps ignored, 87604eeddc0SDimitry Andric // then we can't rebalance 87704eeddc0SDimitry Andric if (NumIgnoredJumps == Block->SuccJumps.size()) 8780eae32dcSDimitry Andric return false; 8790eae32dcSDimitry Andric } 8800eae32dcSDimitry Andric 8810eae32dcSDimitry Andric return true; 8820eae32dcSDimitry Andric } 8830eae32dcSDimitry Andric 88404eeddc0SDimitry Andric /// Decide whether the Jump is ignored while processing an unknown subgraphs 88504eeddc0SDimitry Andric /// rooted at basic block SrcBlock with the destination block, DstBlock. 88604eeddc0SDimitry Andric bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 88704eeddc0SDimitry Andric const FlowJump *Jump) { 88804eeddc0SDimitry Andric // Ignore unlikely jumps with zero flow 88904eeddc0SDimitry Andric if (Jump->IsUnlikely && Jump->Flow == 0) 89004eeddc0SDimitry Andric return true; 89104eeddc0SDimitry Andric 89204eeddc0SDimitry Andric auto JumpSource = &Func.Blocks[Jump->Source]; 89304eeddc0SDimitry Andric auto JumpTarget = &Func.Blocks[Jump->Target]; 89404eeddc0SDimitry Andric 89504eeddc0SDimitry Andric // Do not ignore jumps coming into DstBlock 89604eeddc0SDimitry Andric if (DstBlock != nullptr && JumpTarget == DstBlock) 89704eeddc0SDimitry Andric return false; 89804eeddc0SDimitry Andric 89904eeddc0SDimitry Andric // Ignore jumps out of SrcBlock to known blocks 90004eeddc0SDimitry Andric if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) 90104eeddc0SDimitry Andric return true; 90204eeddc0SDimitry Andric 90304eeddc0SDimitry Andric // Ignore jumps to known blocks with zero flow 90404eeddc0SDimitry Andric if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) 90504eeddc0SDimitry Andric return true; 90604eeddc0SDimitry Andric 90704eeddc0SDimitry Andric return false; 90804eeddc0SDimitry Andric } 90904eeddc0SDimitry Andric 9100eae32dcSDimitry Andric /// Verify if the given unknown subgraph is acyclic, and if yes, reorder 91104eeddc0SDimitry Andric /// UnknownBlocks in the topological order (so that all jumps are "forward"). 91204eeddc0SDimitry Andric bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 91304eeddc0SDimitry Andric std::vector<FlowBlock *> &UnknownBlocks) { 9140eae32dcSDimitry Andric // Extract local in-degrees in the considered subgraph 9150eae32dcSDimitry Andric auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); 91604eeddc0SDimitry Andric auto fillInDegree = [&](const FlowBlock *Block) { 91704eeddc0SDimitry Andric for (auto Jump : Block->SuccJumps) { 91804eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 91904eeddc0SDimitry Andric continue; 9200eae32dcSDimitry Andric LocalInDegree[Jump->Target]++; 9210eae32dcSDimitry Andric } 92204eeddc0SDimitry Andric }; 92304eeddc0SDimitry Andric fillInDegree(SrcBlock); 92404eeddc0SDimitry Andric for (auto Block : UnknownBlocks) { 92504eeddc0SDimitry Andric fillInDegree(Block); 9260eae32dcSDimitry Andric } 9270eae32dcSDimitry Andric // A loop containing SrcBlock 9280eae32dcSDimitry Andric if (LocalInDegree[SrcBlock->Index] > 0) 9290eae32dcSDimitry Andric return false; 9300eae32dcSDimitry Andric 9310eae32dcSDimitry Andric std::vector<FlowBlock *> AcyclicOrder; 9320eae32dcSDimitry Andric std::queue<uint64_t> Queue; 9330eae32dcSDimitry Andric Queue.push(SrcBlock->Index); 9340eae32dcSDimitry Andric while (!Queue.empty()) { 93504eeddc0SDimitry Andric FlowBlock *Block = &Func.Blocks[Queue.front()]; 9360eae32dcSDimitry Andric Queue.pop(); 93704eeddc0SDimitry Andric // Stop propagation once we reach DstBlock, if any 93804eeddc0SDimitry Andric if (DstBlock != nullptr && Block == DstBlock) 9390eae32dcSDimitry Andric break; 9400eae32dcSDimitry Andric 94104eeddc0SDimitry Andric // Keep an acyclic order of unknown blocks 94204eeddc0SDimitry Andric if (Block->UnknownWeight && Block != SrcBlock) 94304eeddc0SDimitry Andric AcyclicOrder.push_back(Block); 94404eeddc0SDimitry Andric 9450eae32dcSDimitry Andric // Add to the queue all successors with zero local in-degree 94604eeddc0SDimitry Andric for (auto Jump : Block->SuccJumps) { 94704eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 94804eeddc0SDimitry Andric continue; 9490eae32dcSDimitry Andric uint64_t Dst = Jump->Target; 9500eae32dcSDimitry Andric LocalInDegree[Dst]--; 9510eae32dcSDimitry Andric if (LocalInDegree[Dst] == 0) { 9520eae32dcSDimitry Andric Queue.push(Dst); 9530eae32dcSDimitry Andric } 9540eae32dcSDimitry Andric } 9550eae32dcSDimitry Andric } 9560eae32dcSDimitry Andric 9570eae32dcSDimitry Andric // If there is a cycle in the subgraph, AcyclicOrder contains only a subset 9580eae32dcSDimitry Andric // of all blocks 95904eeddc0SDimitry Andric if (UnknownBlocks.size() != AcyclicOrder.size()) 9600eae32dcSDimitry Andric return false; 96104eeddc0SDimitry Andric UnknownBlocks = AcyclicOrder; 9620eae32dcSDimitry Andric return true; 9630eae32dcSDimitry Andric } 9640eae32dcSDimitry Andric 96504eeddc0SDimitry Andric /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and 96604eeddc0SDimitry Andric /// having UnknownBlocks intermediate blocks. 96704eeddc0SDimitry Andric void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, 96804eeddc0SDimitry Andric const FlowBlock *DstBlock, 96904eeddc0SDimitry Andric const std::vector<FlowBlock *> &UnknownBlocks) { 9700eae32dcSDimitry Andric assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); 9710eae32dcSDimitry Andric 97204eeddc0SDimitry Andric // Ditribute flow from the source block 97304eeddc0SDimitry Andric uint64_t BlockFlow = 0; 97404eeddc0SDimitry Andric // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps 97504eeddc0SDimitry Andric for (auto Jump : SrcBlock->SuccJumps) { 97604eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 9770eae32dcSDimitry Andric continue; 97804eeddc0SDimitry Andric BlockFlow += Jump->Flow; 9790eae32dcSDimitry Andric } 98004eeddc0SDimitry Andric rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); 98104eeddc0SDimitry Andric 98204eeddc0SDimitry Andric // Ditribute flow from the remaining blocks 98304eeddc0SDimitry Andric for (auto Block : UnknownBlocks) { 98404eeddc0SDimitry Andric assert(Block->UnknownWeight && "incorrect unknown subgraph"); 98504eeddc0SDimitry Andric uint64_t BlockFlow = 0; 98604eeddc0SDimitry Andric // Block's flow is the sum of incoming flows 98704eeddc0SDimitry Andric for (auto Jump : Block->PredJumps) { 98804eeddc0SDimitry Andric BlockFlow += Jump->Flow; 98904eeddc0SDimitry Andric } 99004eeddc0SDimitry Andric Block->Flow = BlockFlow; 99104eeddc0SDimitry Andric rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); 99204eeddc0SDimitry Andric } 99304eeddc0SDimitry Andric } 99404eeddc0SDimitry Andric 99504eeddc0SDimitry Andric /// Redistribute flow for a block in a subgraph rooted at SrcBlock, 99604eeddc0SDimitry Andric /// and ending at DstBlock. 99704eeddc0SDimitry Andric void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 99804eeddc0SDimitry Andric const FlowBlock *Block, uint64_t BlockFlow) { 99904eeddc0SDimitry Andric // Process all successor jumps and update corresponding flow values 100004eeddc0SDimitry Andric size_t BlockDegree = 0; 100104eeddc0SDimitry Andric for (auto Jump : Block->SuccJumps) { 100204eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 100304eeddc0SDimitry Andric continue; 100404eeddc0SDimitry Andric BlockDegree++; 100504eeddc0SDimitry Andric } 100604eeddc0SDimitry Andric // If all successor jumps of the block are ignored, skip it 100704eeddc0SDimitry Andric if (DstBlock == nullptr && BlockDegree == 0) 100804eeddc0SDimitry Andric return; 100904eeddc0SDimitry Andric assert(BlockDegree > 0 && "all outgoing jumps are ignored"); 101004eeddc0SDimitry Andric 101104eeddc0SDimitry Andric // Each of the Block's successors gets the following amount of flow. 101204eeddc0SDimitry Andric // Rounding the value up so that all flow is propagated 101304eeddc0SDimitry Andric uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; 101404eeddc0SDimitry Andric for (auto Jump : Block->SuccJumps) { 101504eeddc0SDimitry Andric if (ignoreJump(SrcBlock, DstBlock, Jump)) 101604eeddc0SDimitry Andric continue; 101704eeddc0SDimitry Andric uint64_t Flow = std::min(SuccFlow, BlockFlow); 10180eae32dcSDimitry Andric Jump->Flow = Flow; 101904eeddc0SDimitry Andric BlockFlow -= Flow; 10200eae32dcSDimitry Andric } 102104eeddc0SDimitry Andric assert(BlockFlow == 0 && "not all flow is propagated"); 10220eae32dcSDimitry Andric } 10230eae32dcSDimitry Andric 10240eae32dcSDimitry Andric /// A constant indicating an arbitrary exit block of a function. 10250eae32dcSDimitry Andric static constexpr uint64_t AnyExitBlock = uint64_t(-1); 10260eae32dcSDimitry Andric 10270eae32dcSDimitry Andric /// The function. 10280eae32dcSDimitry Andric FlowFunction &Func; 10290eae32dcSDimitry Andric }; 10300eae32dcSDimitry Andric 10314824e7fdSDimitry Andric /// Initializing flow network for a given function. 10324824e7fdSDimitry Andric /// 10334824e7fdSDimitry Andric /// Every block is split into three nodes that are responsible for (i) an 10344824e7fdSDimitry Andric /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 10354824e7fdSDimitry Andric /// reduction of the block weight. 10364824e7fdSDimitry Andric void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 10374824e7fdSDimitry Andric uint64_t NumBlocks = Func.Blocks.size(); 10384824e7fdSDimitry Andric assert(NumBlocks > 1 && "Too few blocks in a function"); 10394824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 10404824e7fdSDimitry Andric 10414824e7fdSDimitry Andric // Pre-process data: make sure the entry weight is at least 1 10424824e7fdSDimitry Andric if (Func.Blocks[Func.Entry].Weight == 0) { 10434824e7fdSDimitry Andric Func.Blocks[Func.Entry].Weight = 1; 10444824e7fdSDimitry Andric } 10454824e7fdSDimitry Andric // Introducing dummy source/sink pairs to allow flow circulation. 10464824e7fdSDimitry Andric // The nodes corresponding to blocks of Func have indicies in the range 10474824e7fdSDimitry Andric // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 10484824e7fdSDimitry Andric uint64_t S = 3 * NumBlocks; 10494824e7fdSDimitry Andric uint64_t T = S + 1; 10504824e7fdSDimitry Andric uint64_t S1 = S + 2; 10514824e7fdSDimitry Andric uint64_t T1 = S + 3; 10524824e7fdSDimitry Andric 10534824e7fdSDimitry Andric Network.initialize(3 * NumBlocks + 4, S1, T1); 10544824e7fdSDimitry Andric 10554824e7fdSDimitry Andric // Create three nodes for every block of the function 10564824e7fdSDimitry Andric for (uint64_t B = 0; B < NumBlocks; B++) { 10574824e7fdSDimitry Andric auto &Block = Func.Blocks[B]; 10584824e7fdSDimitry Andric assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 10594824e7fdSDimitry Andric "non-zero weight of a block w/o weight except for an entry"); 10604824e7fdSDimitry Andric 10614824e7fdSDimitry Andric // Split every block into two nodes 10624824e7fdSDimitry Andric uint64_t Bin = 3 * B; 10634824e7fdSDimitry Andric uint64_t Bout = 3 * B + 1; 10644824e7fdSDimitry Andric uint64_t Baux = 3 * B + 2; 10654824e7fdSDimitry Andric if (Block.Weight > 0) { 10664824e7fdSDimitry Andric Network.addEdge(S1, Bout, Block.Weight, 0); 10674824e7fdSDimitry Andric Network.addEdge(Bin, T1, Block.Weight, 0); 10684824e7fdSDimitry Andric } 10694824e7fdSDimitry Andric 10704824e7fdSDimitry Andric // Edges from S and to T 10714824e7fdSDimitry Andric assert((!Block.isEntry() || !Block.isExit()) && 10724824e7fdSDimitry Andric "a block cannot be an entry and an exit"); 10734824e7fdSDimitry Andric if (Block.isEntry()) { 10744824e7fdSDimitry Andric Network.addEdge(S, Bin, 0); 10754824e7fdSDimitry Andric } else if (Block.isExit()) { 10764824e7fdSDimitry Andric Network.addEdge(Bout, T, 0); 10774824e7fdSDimitry Andric } 10784824e7fdSDimitry Andric 10794824e7fdSDimitry Andric // An auxiliary node to allow increase/reduction of block counts: 10804824e7fdSDimitry Andric // We assume that decreasing block counts is more expensive than increasing, 10814824e7fdSDimitry Andric // and thus, setting separate costs here. In the future we may want to tune 10824824e7fdSDimitry Andric // the relative costs so as to maximize the quality of generated profiles. 1083*81ad6265SDimitry Andric int64_t AuxCostInc = SampleProfileProfiCostInc; 1084*81ad6265SDimitry Andric int64_t AuxCostDec = SampleProfileProfiCostDec; 10854824e7fdSDimitry Andric if (Block.UnknownWeight) { 10864824e7fdSDimitry Andric // Do not penalize changing weights of blocks w/o known profile count 10874824e7fdSDimitry Andric AuxCostInc = 0; 10884824e7fdSDimitry Andric AuxCostDec = 0; 10894824e7fdSDimitry Andric } else { 10904824e7fdSDimitry Andric // Increasing the count for "cold" blocks with zero initial count is more 10914824e7fdSDimitry Andric // expensive than for "hot" ones 10924824e7fdSDimitry Andric if (Block.Weight == 0) { 1093*81ad6265SDimitry Andric AuxCostInc = SampleProfileProfiCostIncZero; 10944824e7fdSDimitry Andric } 10954824e7fdSDimitry Andric // Modifying the count of the entry block is expensive 10964824e7fdSDimitry Andric if (Block.isEntry()) { 1097*81ad6265SDimitry Andric AuxCostInc = SampleProfileProfiCostIncEntry; 1098*81ad6265SDimitry Andric AuxCostDec = SampleProfileProfiCostDecEntry; 10994824e7fdSDimitry Andric } 11004824e7fdSDimitry Andric } 11014824e7fdSDimitry Andric // For blocks with self-edges, do not penalize a reduction of the count, 11024824e7fdSDimitry Andric // as all of the increase can be attributed to the self-edge 11034824e7fdSDimitry Andric if (Block.HasSelfEdge) { 11044824e7fdSDimitry Andric AuxCostDec = 0; 11054824e7fdSDimitry Andric } 11064824e7fdSDimitry Andric 11074824e7fdSDimitry Andric Network.addEdge(Bin, Baux, AuxCostInc); 11084824e7fdSDimitry Andric Network.addEdge(Baux, Bout, AuxCostInc); 11094824e7fdSDimitry Andric if (Block.Weight > 0) { 11104824e7fdSDimitry Andric Network.addEdge(Bout, Baux, AuxCostDec); 11114824e7fdSDimitry Andric Network.addEdge(Baux, Bin, AuxCostDec); 11124824e7fdSDimitry Andric } 11134824e7fdSDimitry Andric } 11144824e7fdSDimitry Andric 11154824e7fdSDimitry Andric // Creating edges for every jump 11164824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 11174824e7fdSDimitry Andric uint64_t Src = Jump.Source; 11184824e7fdSDimitry Andric uint64_t Dst = Jump.Target; 11194824e7fdSDimitry Andric if (Src != Dst) { 11204824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 11214824e7fdSDimitry Andric uint64_t DstIn = 3 * Dst; 11224824e7fdSDimitry Andric uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 11234824e7fdSDimitry Andric Network.addEdge(SrcOut, DstIn, Cost); 11244824e7fdSDimitry Andric } 11254824e7fdSDimitry Andric } 11264824e7fdSDimitry Andric 11274824e7fdSDimitry Andric // Make sure we have a valid flow circulation 11284824e7fdSDimitry Andric Network.addEdge(T, S, 0); 11294824e7fdSDimitry Andric } 11304824e7fdSDimitry Andric 11314824e7fdSDimitry Andric /// Extract resulting block and edge counts from the flow network. 11324824e7fdSDimitry Andric void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 11334824e7fdSDimitry Andric uint64_t NumBlocks = Func.Blocks.size(); 11344824e7fdSDimitry Andric 11354824e7fdSDimitry Andric // Extract resulting block counts 11364824e7fdSDimitry Andric for (uint64_t Src = 0; Src < NumBlocks; Src++) { 11374824e7fdSDimitry Andric auto &Block = Func.Blocks[Src]; 11384824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 11394824e7fdSDimitry Andric int64_t Flow = 0; 11404824e7fdSDimitry Andric for (auto &Adj : Network.getFlow(SrcOut)) { 11414824e7fdSDimitry Andric uint64_t DstIn = Adj.first; 11424824e7fdSDimitry Andric int64_t DstFlow = Adj.second; 11434824e7fdSDimitry Andric bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 11444824e7fdSDimitry Andric if (!IsAuxNode || Block.HasSelfEdge) { 11454824e7fdSDimitry Andric Flow += DstFlow; 11464824e7fdSDimitry Andric } 11474824e7fdSDimitry Andric } 11484824e7fdSDimitry Andric Block.Flow = Flow; 11494824e7fdSDimitry Andric assert(Flow >= 0 && "negative block flow"); 11504824e7fdSDimitry Andric } 11514824e7fdSDimitry Andric 11524824e7fdSDimitry Andric // Extract resulting jump counts 11534824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 11544824e7fdSDimitry Andric uint64_t Src = Jump.Source; 11554824e7fdSDimitry Andric uint64_t Dst = Jump.Target; 11564824e7fdSDimitry Andric int64_t Flow = 0; 11574824e7fdSDimitry Andric if (Src != Dst) { 11584824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 11594824e7fdSDimitry Andric uint64_t DstIn = 3 * Dst; 11604824e7fdSDimitry Andric Flow = Network.getFlow(SrcOut, DstIn); 11614824e7fdSDimitry Andric } else { 11624824e7fdSDimitry Andric uint64_t SrcOut = 3 * Src + 1; 11634824e7fdSDimitry Andric uint64_t SrcAux = 3 * Src + 2; 11644824e7fdSDimitry Andric int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 11654824e7fdSDimitry Andric if (AuxFlow > 0) 11664824e7fdSDimitry Andric Flow = AuxFlow; 11674824e7fdSDimitry Andric } 11684824e7fdSDimitry Andric Jump.Flow = Flow; 11694824e7fdSDimitry Andric assert(Flow >= 0 && "negative jump flow"); 11704824e7fdSDimitry Andric } 11714824e7fdSDimitry Andric } 11724824e7fdSDimitry Andric 11734824e7fdSDimitry Andric #ifndef NDEBUG 11744824e7fdSDimitry Andric /// Verify that the computed flow values satisfy flow conservation rules 11754824e7fdSDimitry Andric void verifyWeights(const FlowFunction &Func) { 11764824e7fdSDimitry Andric const uint64_t NumBlocks = Func.Blocks.size(); 11774824e7fdSDimitry Andric auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 11784824e7fdSDimitry Andric auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 11794824e7fdSDimitry Andric for (auto &Jump : Func.Jumps) { 11804824e7fdSDimitry Andric InFlow[Jump.Target] += Jump.Flow; 11814824e7fdSDimitry Andric OutFlow[Jump.Source] += Jump.Flow; 11824824e7fdSDimitry Andric } 11834824e7fdSDimitry Andric 11844824e7fdSDimitry Andric uint64_t TotalInFlow = 0; 11854824e7fdSDimitry Andric uint64_t TotalOutFlow = 0; 11864824e7fdSDimitry Andric for (uint64_t I = 0; I < NumBlocks; I++) { 11874824e7fdSDimitry Andric auto &Block = Func.Blocks[I]; 11884824e7fdSDimitry Andric if (Block.isEntry()) { 11894824e7fdSDimitry Andric TotalInFlow += Block.Flow; 11904824e7fdSDimitry Andric assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 11914824e7fdSDimitry Andric } else if (Block.isExit()) { 11924824e7fdSDimitry Andric TotalOutFlow += Block.Flow; 11934824e7fdSDimitry Andric assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 11944824e7fdSDimitry Andric } else { 11954824e7fdSDimitry Andric assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 11964824e7fdSDimitry Andric assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 11974824e7fdSDimitry Andric } 11984824e7fdSDimitry Andric } 11994824e7fdSDimitry Andric assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 12000eae32dcSDimitry Andric 12010eae32dcSDimitry Andric // Verify that there are no isolated flow components 12020eae32dcSDimitry Andric // One could modify FlowFunction to hold edges indexed by the sources, which 12030eae32dcSDimitry Andric // will avoid a creation of the object 12040eae32dcSDimitry Andric auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); 12050eae32dcSDimitry Andric for (auto &Jump : Func.Jumps) { 12060eae32dcSDimitry Andric if (Jump.Flow > 0) { 12070eae32dcSDimitry Andric PositiveFlowEdges[Jump.Source].push_back(Jump.Target); 12080eae32dcSDimitry Andric } 12090eae32dcSDimitry Andric } 12100eae32dcSDimitry Andric 12110eae32dcSDimitry Andric // Run BFS from the source along edges with positive flow 12120eae32dcSDimitry Andric std::queue<uint64_t> Queue; 121304eeddc0SDimitry Andric auto Visited = BitVector(NumBlocks, false); 12140eae32dcSDimitry Andric Queue.push(Func.Entry); 12150eae32dcSDimitry Andric Visited[Func.Entry] = true; 12160eae32dcSDimitry Andric while (!Queue.empty()) { 12170eae32dcSDimitry Andric uint64_t Src = Queue.front(); 12180eae32dcSDimitry Andric Queue.pop(); 12190eae32dcSDimitry Andric for (uint64_t Dst : PositiveFlowEdges[Src]) { 12200eae32dcSDimitry Andric if (!Visited[Dst]) { 12210eae32dcSDimitry Andric Queue.push(Dst); 12220eae32dcSDimitry Andric Visited[Dst] = true; 12230eae32dcSDimitry Andric } 12240eae32dcSDimitry Andric } 12250eae32dcSDimitry Andric } 12260eae32dcSDimitry Andric 12270eae32dcSDimitry Andric // Verify that every block that has a positive flow is reached from the source 12280eae32dcSDimitry Andric // along edges with a positive flow 12290eae32dcSDimitry Andric for (uint64_t I = 0; I < NumBlocks; I++) { 12300eae32dcSDimitry Andric auto &Block = Func.Blocks[I]; 12310eae32dcSDimitry Andric assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); 12320eae32dcSDimitry Andric } 12334824e7fdSDimitry Andric } 12344824e7fdSDimitry Andric #endif 12354824e7fdSDimitry Andric 12364824e7fdSDimitry Andric } // end of anonymous namespace 12374824e7fdSDimitry Andric 12384824e7fdSDimitry Andric /// Apply the profile inference algorithm for a given flow function 12394824e7fdSDimitry Andric void llvm::applyFlowInference(FlowFunction &Func) { 12404824e7fdSDimitry Andric // Create and apply an inference network model 12414824e7fdSDimitry Andric auto InferenceNetwork = MinCostMaxFlow(); 12424824e7fdSDimitry Andric initializeNetwork(InferenceNetwork, Func); 12434824e7fdSDimitry Andric InferenceNetwork.run(); 12444824e7fdSDimitry Andric 12454824e7fdSDimitry Andric // Extract flow values for every block and every edge 12464824e7fdSDimitry Andric extractWeights(InferenceNetwork, Func); 12474824e7fdSDimitry Andric 12480eae32dcSDimitry Andric // Post-processing adjustments to the flow 12490eae32dcSDimitry Andric auto Adjuster = FlowAdjuster(Func); 12500eae32dcSDimitry Andric Adjuster.run(); 12510eae32dcSDimitry Andric 12524824e7fdSDimitry Andric #ifndef NDEBUG 12534824e7fdSDimitry Andric // Verify the result 12544824e7fdSDimitry Andric verifyWeights(Func); 12554824e7fdSDimitry Andric #endif 12564824e7fdSDimitry Andric } 1257