xref: /llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp (revision 1650f1b3d7f97ca95eb930984e74bdfd91b02b4e)
17cc2493dSspupyrev //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
27cc2493dSspupyrev //
37cc2493dSspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47cc2493dSspupyrev // See https://llvm.org/LICENSE.txt for license information.
57cc2493dSspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67cc2493dSspupyrev //
77cc2493dSspupyrev //===----------------------------------------------------------------------===//
87cc2493dSspupyrev //
97cc2493dSspupyrev // This file implements a profile inference algorithm. Given an incomplete and
107cc2493dSspupyrev // possibly imprecise block counts, the algorithm reconstructs realistic block
117cc2493dSspupyrev // and edge counts that satisfy flow conservation rules, while minimally modify
127cc2493dSspupyrev // input block counts.
137cc2493dSspupyrev //
147cc2493dSspupyrev //===----------------------------------------------------------------------===//
157cc2493dSspupyrev 
167cc2493dSspupyrev #include "llvm/Transforms/Utils/SampleProfileInference.h"
175f4ae564SJan Svoboda #include "llvm/ADT/BitVector.h"
18f2ade65fSspupyrev #include "llvm/Support/CommandLine.h"
197cc2493dSspupyrev #include "llvm/Support/Debug.h"
207cc2493dSspupyrev #include <queue>
217cc2493dSspupyrev #include <set>
22f2ade65fSspupyrev #include <stack>
232ee4ddaeSspupyrev #include <unordered_set>
247cc2493dSspupyrev 
257cc2493dSspupyrev using namespace llvm;
267cc2493dSspupyrev #define DEBUG_TYPE "sample-profile-inference"
277cc2493dSspupyrev 
287cc2493dSspupyrev namespace {
297cc2493dSspupyrev 
3061eb12e1Sspupyrev static cl::opt<bool> SampleProfileEvenFlowDistribution(
3161eb12e1Sspupyrev     "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden,
3261eb12e1Sspupyrev     cl::desc("Try to evenly distribute flow when there are multiple equally "
33f2ade65fSspupyrev              "likely options."));
34f2ade65fSspupyrev 
3561eb12e1Sspupyrev static cl::opt<bool> SampleProfileRebalanceUnknown(
3661eb12e1Sspupyrev     "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden,
3761eb12e1Sspupyrev     cl::desc("Evenly re-distribute flow among unknown subgraphs."));
38f2ade65fSspupyrev 
3961eb12e1Sspupyrev static cl::opt<bool> SampleProfileJoinIslands(
4061eb12e1Sspupyrev     "sample-profile-join-islands", cl::init(true), cl::Hidden,
4161eb12e1Sspupyrev     cl::desc("Join isolated components having positive flow."));
4281aedab7Sspupyrev 
4361eb12e1Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostBlockInc(
4461eb12e1Sspupyrev     "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden,
4561eb12e1Sspupyrev     cl::desc("The cost of increasing a block's count by one."));
4681aedab7Sspupyrev 
4761eb12e1Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostBlockDec(
4861eb12e1Sspupyrev     "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden,
4961eb12e1Sspupyrev     cl::desc("The cost of decreasing a block's count by one."));
5081aedab7Sspupyrev 
5161eb12e1Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(
5261eb12e1Sspupyrev     "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,
5361eb12e1Sspupyrev     cl::desc("The cost of increasing the entry block's count by one."));
5481aedab7Sspupyrev 
5561eb12e1Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(
5661eb12e1Sspupyrev     "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,
5761eb12e1Sspupyrev     cl::desc("The cost of decreasing the entry block's count by one."));
5861eb12e1Sspupyrev 
5961eb12e1Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(
6061eb12e1Sspupyrev     "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden,
6161eb12e1Sspupyrev     cl::desc("The cost of increasing a count of zero-weight block by one."));
6261eb12e1Sspupyrev 
6361eb12e1Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(
6461eb12e1Sspupyrev     "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,
6561eb12e1Sspupyrev     cl::desc("The cost of increasing an unknown block's count by one."));
6681aedab7Sspupyrev 
677cc2493dSspupyrev /// A value indicating an infinite flow/capacity/weight of a block/edge.
687cc2493dSspupyrev /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
697cc2493dSspupyrev /// during the execution.
707cc2493dSspupyrev static constexpr int64_t INF = ((int64_t)1) << 50;
717cc2493dSspupyrev 
727cc2493dSspupyrev /// The minimum-cost maximum flow algorithm.
737cc2493dSspupyrev ///
747cc2493dSspupyrev /// The algorithm finds the maximum flow of minimum cost on a given (directed)
757cc2493dSspupyrev /// network using a modified version of the classical Moore-Bellman-Ford
767cc2493dSspupyrev /// approach. The algorithm applies a number of augmentation iterations in which
777cc2493dSspupyrev /// flow is sent along paths of positive capacity from the source to the sink.
787cc2493dSspupyrev /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
797cc2493dSspupyrev /// where m is the number of edges, n is the number of vertices, and v(f) is the
807cc2493dSspupyrev /// value of the maximum flow. However, the observed running time on typical
817cc2493dSspupyrev /// instances is sub-quadratic, that is, o(n^2).
827cc2493dSspupyrev ///
837cc2493dSspupyrev /// The input is a set of edges with specified costs and capacities, and a pair
847cc2493dSspupyrev /// of nodes (source and sink). The output is the flow along each edge of the
857cc2493dSspupyrev /// minimum total cost respecting the given edge capacities.
867cc2493dSspupyrev class MinCostMaxFlow {
877cc2493dSspupyrev public:
MinCostMaxFlow(const ProfiParams & Params)8861eb12e1Sspupyrev   MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}
8961eb12e1Sspupyrev 
907cc2493dSspupyrev   // Initialize algorithm's data structures for a network of a given size.
initialize(uint64_t NodeCount,uint64_t SourceNode,uint64_t SinkNode)917cc2493dSspupyrev   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
927cc2493dSspupyrev     Source = SourceNode;
937cc2493dSspupyrev     Target = SinkNode;
947cc2493dSspupyrev 
957cc2493dSspupyrev     Nodes = std::vector<Node>(NodeCount);
967cc2493dSspupyrev     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
9761eb12e1Sspupyrev     if (Params.EvenFlowDistribution)
98f2ade65fSspupyrev       AugmentingEdges =
99f2ade65fSspupyrev           std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
1007cc2493dSspupyrev   }
1017cc2493dSspupyrev 
1027cc2493dSspupyrev   // Run the algorithm.
run()1037cc2493dSspupyrev   int64_t run() {
10445b15592Sspupyrev     LLVM_DEBUG(dbgs() << "Starting profi for " << Nodes.size() << " nodes\n");
10545b15592Sspupyrev 
106f2ade65fSspupyrev     // Iteratively find an augmentation path/dag in the network and send the
107f2ade65fSspupyrev     // flow along its edges
108f2ade65fSspupyrev     size_t AugmentationIters = applyFlowAugmentation();
1097cc2493dSspupyrev 
1107cc2493dSspupyrev     // Compute the total flow and its cost
1117cc2493dSspupyrev     int64_t TotalCost = 0;
1127cc2493dSspupyrev     int64_t TotalFlow = 0;
1137cc2493dSspupyrev     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
1147cc2493dSspupyrev       for (auto &Edge : Edges[Src]) {
1157cc2493dSspupyrev         if (Edge.Flow > 0) {
1167cc2493dSspupyrev           TotalCost += Edge.Cost * Edge.Flow;
1177cc2493dSspupyrev           if (Src == Source)
1187cc2493dSspupyrev             TotalFlow += Edge.Flow;
1197cc2493dSspupyrev         }
1207cc2493dSspupyrev       }
1217cc2493dSspupyrev     }
1227cc2493dSspupyrev     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
1237cc2493dSspupyrev                       << " iterations with " << TotalFlow << " total flow"
1247cc2493dSspupyrev                       << " of " << TotalCost << " cost\n");
12522d82949SKazu Hirata     (void)TotalFlow;
126f2ade65fSspupyrev     (void)AugmentationIters;
1277cc2493dSspupyrev     return TotalCost;
1287cc2493dSspupyrev   }
1297cc2493dSspupyrev 
1307cc2493dSspupyrev   /// Adding an edge to the network with a specified capacity and a cost.
1317cc2493dSspupyrev   /// Multiple edges between a pair of nodes are allowed but self-edges
1327cc2493dSspupyrev   /// are not supported.
addEdge(uint64_t Src,uint64_t Dst,int64_t Capacity,int64_t Cost)1337cc2493dSspupyrev   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
1347cc2493dSspupyrev     assert(Capacity > 0 && "adding an edge of zero capacity");
1357cc2493dSspupyrev     assert(Src != Dst && "loop edge are not supported");
1367cc2493dSspupyrev 
1377cc2493dSspupyrev     Edge SrcEdge;
1387cc2493dSspupyrev     SrcEdge.Dst = Dst;
1397cc2493dSspupyrev     SrcEdge.Cost = Cost;
1407cc2493dSspupyrev     SrcEdge.Capacity = Capacity;
1417cc2493dSspupyrev     SrcEdge.Flow = 0;
1427cc2493dSspupyrev     SrcEdge.RevEdgeIndex = Edges[Dst].size();
1437cc2493dSspupyrev 
1447cc2493dSspupyrev     Edge DstEdge;
1457cc2493dSspupyrev     DstEdge.Dst = Src;
1467cc2493dSspupyrev     DstEdge.Cost = -Cost;
1477cc2493dSspupyrev     DstEdge.Capacity = 0;
1487cc2493dSspupyrev     DstEdge.Flow = 0;
1497cc2493dSspupyrev     DstEdge.RevEdgeIndex = Edges[Src].size();
1507cc2493dSspupyrev 
1517cc2493dSspupyrev     Edges[Src].push_back(SrcEdge);
1527cc2493dSspupyrev     Edges[Dst].push_back(DstEdge);
1537cc2493dSspupyrev   }
1547cc2493dSspupyrev 
1557cc2493dSspupyrev   /// Adding an edge to the network of infinite capacity and a given cost.
addEdge(uint64_t Src,uint64_t Dst,int64_t Cost)1567cc2493dSspupyrev   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
1577cc2493dSspupyrev     addEdge(Src, Dst, INF, Cost);
1587cc2493dSspupyrev   }
1597cc2493dSspupyrev 
1607cc2493dSspupyrev   /// Get the total flow from a given source node.
1617cc2493dSspupyrev   /// Returns a list of pairs (target node, amount of flow to the target).
getFlow(uint64_t Src) const1625675f44cSKazu Hirata   std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
1637cc2493dSspupyrev     std::vector<std::pair<uint64_t, int64_t>> Flow;
16450724716SKazu Hirata     for (const auto &Edge : Edges[Src]) {
1657cc2493dSspupyrev       if (Edge.Flow > 0)
1667cc2493dSspupyrev         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
1677cc2493dSspupyrev     }
1687cc2493dSspupyrev     return Flow;
1697cc2493dSspupyrev   }
1707cc2493dSspupyrev 
1717cc2493dSspupyrev   /// Get the total flow between a pair of nodes.
getFlow(uint64_t Src,uint64_t Dst) const1727cc2493dSspupyrev   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
1737cc2493dSspupyrev     int64_t Flow = 0;
17450724716SKazu Hirata     for (const auto &Edge : Edges[Src]) {
1757cc2493dSspupyrev       if (Edge.Dst == Dst) {
1767cc2493dSspupyrev         Flow += Edge.Flow;
1777cc2493dSspupyrev       }
1787cc2493dSspupyrev     }
1797cc2493dSspupyrev     return Flow;
1807cc2493dSspupyrev   }
1817cc2493dSspupyrev 
1827cc2493dSspupyrev private:
183f2ade65fSspupyrev   /// Iteratively find an augmentation path/dag in the network and send the
184f2ade65fSspupyrev   /// flow along its edges. The method returns the number of applied iterations.
applyFlowAugmentation()185f2ade65fSspupyrev   size_t applyFlowAugmentation() {
186f2ade65fSspupyrev     size_t AugmentationIters = 0;
187f2ade65fSspupyrev     while (findAugmentingPath()) {
188f2ade65fSspupyrev       uint64_t PathCapacity = computeAugmentingPathCapacity();
189f2ade65fSspupyrev       while (PathCapacity > 0) {
190f2ade65fSspupyrev         bool Progress = false;
19161eb12e1Sspupyrev         if (Params.EvenFlowDistribution) {
192f2ade65fSspupyrev           // Identify node/edge candidates for augmentation
193f2ade65fSspupyrev           identifyShortestEdges(PathCapacity);
194f2ade65fSspupyrev 
195f2ade65fSspupyrev           // Find an augmenting DAG
196f2ade65fSspupyrev           auto AugmentingOrder = findAugmentingDAG();
197f2ade65fSspupyrev 
198f2ade65fSspupyrev           // Apply the DAG augmentation
199f2ade65fSspupyrev           Progress = augmentFlowAlongDAG(AugmentingOrder);
200f2ade65fSspupyrev           PathCapacity = computeAugmentingPathCapacity();
201f2ade65fSspupyrev         }
202f2ade65fSspupyrev 
203f2ade65fSspupyrev         if (!Progress) {
204f2ade65fSspupyrev           augmentFlowAlongPath(PathCapacity);
205f2ade65fSspupyrev           PathCapacity = 0;
206f2ade65fSspupyrev         }
207f2ade65fSspupyrev 
208f2ade65fSspupyrev         AugmentationIters++;
209f2ade65fSspupyrev       }
210f2ade65fSspupyrev     }
211f2ade65fSspupyrev     return AugmentationIters;
212f2ade65fSspupyrev   }
213f2ade65fSspupyrev 
214f2ade65fSspupyrev   /// Compute the capacity of the cannonical augmenting path. If the path is
215f2ade65fSspupyrev   /// saturated (that is, no flow can be sent along the path), then return 0.
computeAugmentingPathCapacity()216f2ade65fSspupyrev   uint64_t computeAugmentingPathCapacity() {
217f2ade65fSspupyrev     uint64_t PathCapacity = INF;
218f2ade65fSspupyrev     uint64_t Now = Target;
219f2ade65fSspupyrev     while (Now != Source) {
220f2ade65fSspupyrev       uint64_t Pred = Nodes[Now].ParentNode;
221f2ade65fSspupyrev       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
222f2ade65fSspupyrev 
223f2ade65fSspupyrev       assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
224f2ade65fSspupyrev       uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
225f2ade65fSspupyrev       PathCapacity = std::min(PathCapacity, EdgeCapacity);
226f2ade65fSspupyrev 
227f2ade65fSspupyrev       Now = Pred;
228f2ade65fSspupyrev     }
229f2ade65fSspupyrev     return PathCapacity;
230f2ade65fSspupyrev   }
231f2ade65fSspupyrev 
2327cc2493dSspupyrev   /// Check for existence of an augmenting path with a positive capacity.
findAugmentingPath()2337cc2493dSspupyrev   bool findAugmentingPath() {
2347cc2493dSspupyrev     // Initialize data structures
2357cc2493dSspupyrev     for (auto &Node : Nodes) {
2367cc2493dSspupyrev       Node.Distance = INF;
2377cc2493dSspupyrev       Node.ParentNode = uint64_t(-1);
2387cc2493dSspupyrev       Node.ParentEdgeIndex = uint64_t(-1);
2397cc2493dSspupyrev       Node.Taken = false;
2407cc2493dSspupyrev     }
2417cc2493dSspupyrev 
2427cc2493dSspupyrev     std::queue<uint64_t> Queue;
2437cc2493dSspupyrev     Queue.push(Source);
2447cc2493dSspupyrev     Nodes[Source].Distance = 0;
2457cc2493dSspupyrev     Nodes[Source].Taken = true;
2467cc2493dSspupyrev     while (!Queue.empty()) {
2477cc2493dSspupyrev       uint64_t Src = Queue.front();
2487cc2493dSspupyrev       Queue.pop();
2497cc2493dSspupyrev       Nodes[Src].Taken = false;
2507cc2493dSspupyrev       // Although the residual network contains edges with negative costs
2517cc2493dSspupyrev       // (in particular, backward edges), it can be shown that there are no
2527cc2493dSspupyrev       // negative-weight cycles and the following two invariants are maintained:
2537cc2493dSspupyrev       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
2547cc2493dSspupyrev       // where Dist is the length of the shortest path between two nodes. This
2557cc2493dSspupyrev       // allows to prune the search-space of the path-finding algorithm using
2567cc2493dSspupyrev       // the following early-stop criteria:
2577cc2493dSspupyrev       // -- If we find a path with zero-distance from Source to Target, stop the
2587cc2493dSspupyrev       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
2597cc2493dSspupyrev       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
2607cc2493dSspupyrev       //    process node V, as it is guaranteed _not_ to be on a shortest path
2617cc2493dSspupyrev       //    from Source to Target; it follows from inequalities
2627cc2493dSspupyrev       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
2637cc2493dSspupyrev       //                         >= Dist[Source, V]
26461eb12e1Sspupyrev       if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0)
2657cc2493dSspupyrev         break;
2667cc2493dSspupyrev       if (Nodes[Src].Distance > Nodes[Target].Distance)
2677cc2493dSspupyrev         continue;
2687cc2493dSspupyrev 
2697cc2493dSspupyrev       // Process adjacent edges
2707cc2493dSspupyrev       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
2717cc2493dSspupyrev         auto &Edge = Edges[Src][EdgeIdx];
2727cc2493dSspupyrev         if (Edge.Flow < Edge.Capacity) {
2737cc2493dSspupyrev           uint64_t Dst = Edge.Dst;
2747cc2493dSspupyrev           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
2757cc2493dSspupyrev           if (Nodes[Dst].Distance > NewDistance) {
2767cc2493dSspupyrev             // Update the distance and the parent node/edge
2777cc2493dSspupyrev             Nodes[Dst].Distance = NewDistance;
2787cc2493dSspupyrev             Nodes[Dst].ParentNode = Src;
2797cc2493dSspupyrev             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
2807cc2493dSspupyrev             // Add the node to the queue, if it is not there yet
2817cc2493dSspupyrev             if (!Nodes[Dst].Taken) {
2827cc2493dSspupyrev               Queue.push(Dst);
2837cc2493dSspupyrev               Nodes[Dst].Taken = true;
2847cc2493dSspupyrev             }
2857cc2493dSspupyrev           }
2867cc2493dSspupyrev         }
2877cc2493dSspupyrev       }
2887cc2493dSspupyrev     }
2897cc2493dSspupyrev 
2907cc2493dSspupyrev     return Nodes[Target].Distance != INF;
2917cc2493dSspupyrev   }
2927cc2493dSspupyrev 
2937cc2493dSspupyrev   /// Update the current flow along the augmenting path.
augmentFlowAlongPath(uint64_t PathCapacity)294f2ade65fSspupyrev   void augmentFlowAlongPath(uint64_t PathCapacity) {
29593a2c291Sspupyrev     assert(PathCapacity > 0 && "found an incorrect augmenting path");
296f2ade65fSspupyrev     uint64_t Now = Target;
2977cc2493dSspupyrev     while (Now != Source) {
2987cc2493dSspupyrev       uint64_t Pred = Nodes[Now].ParentNode;
2997cc2493dSspupyrev       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
3007cc2493dSspupyrev       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
3017cc2493dSspupyrev 
3027cc2493dSspupyrev       Edge.Flow += PathCapacity;
3037cc2493dSspupyrev       RevEdge.Flow -= PathCapacity;
3047cc2493dSspupyrev 
3057cc2493dSspupyrev       Now = Pred;
3067cc2493dSspupyrev     }
3077cc2493dSspupyrev   }
3087cc2493dSspupyrev 
309f2ade65fSspupyrev   /// Find an Augmenting DAG order using a modified version of DFS in which we
310f2ade65fSspupyrev   /// can visit a node multiple times. In the DFS search, when scanning each
311f2ade65fSspupyrev   /// edge out of a node, continue search at Edge.Dst endpoint if it has not
312f2ade65fSspupyrev   /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
313f2ade65fSspupyrev   /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
314f2ade65fSspupyrev   /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
315f2ade65fSspupyrev   /// that starts with Source and ends with Target.
findAugmentingDAG()316f2ade65fSspupyrev   std::vector<uint64_t> findAugmentingDAG() {
317f2ade65fSspupyrev     // We use a stack based implemenation of DFS to avoid recursion.
318f2ade65fSspupyrev     // Defining DFS data structures:
319f2ade65fSspupyrev     // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
320f2ade65fSspupyrev     //  - we are currently visiting Nodes[NodeIdx] and
321f2ade65fSspupyrev     //  - the next edge to scan is Edges[NodeIdx][EdgeIdx]
322f2ade65fSspupyrev     typedef std::pair<uint64_t, uint64_t> StackItemType;
323f2ade65fSspupyrev     std::stack<StackItemType> Stack;
324f2ade65fSspupyrev     std::vector<uint64_t> AugmentingOrder;
325f2ade65fSspupyrev 
326f2ade65fSspupyrev     // Phase 0: Initialize Node attributes and Time for DFS run
327f2ade65fSspupyrev     for (auto &Node : Nodes) {
328f2ade65fSspupyrev       Node.Discovery = 0;
329f2ade65fSspupyrev       Node.Finish = 0;
330f2ade65fSspupyrev       Node.NumCalls = 0;
331f2ade65fSspupyrev       Node.Taken = false;
332f2ade65fSspupyrev     }
333f2ade65fSspupyrev     uint64_t Time = 0;
334f2ade65fSspupyrev     // Mark Target as Taken
335f2ade65fSspupyrev     // Taken attribute will be propagated backwards from Target towards Source
336f2ade65fSspupyrev     Nodes[Target].Taken = true;
337f2ade65fSspupyrev 
338f2ade65fSspupyrev     // Phase 1: Start DFS traversal from Source
339f2ade65fSspupyrev     Stack.emplace(Source, 0);
340f2ade65fSspupyrev     Nodes[Source].Discovery = ++Time;
341f2ade65fSspupyrev     while (!Stack.empty()) {
342f2ade65fSspupyrev       auto NodeIdx = Stack.top().first;
343f2ade65fSspupyrev       auto EdgeIdx = Stack.top().second;
344f2ade65fSspupyrev 
345f2ade65fSspupyrev       // If we haven't scanned all edges out of NodeIdx, continue scanning
346f2ade65fSspupyrev       if (EdgeIdx < Edges[NodeIdx].size()) {
347f2ade65fSspupyrev         auto &Edge = Edges[NodeIdx][EdgeIdx];
348f2ade65fSspupyrev         auto &Dst = Nodes[Edge.Dst];
349f2ade65fSspupyrev         Stack.top().second++;
350f2ade65fSspupyrev 
351f2ade65fSspupyrev         if (Edge.OnShortestPath) {
352f2ade65fSspupyrev           // If we haven't seen Edge.Dst so far, continue DFS search there
35361eb12e1Sspupyrev           if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
354f2ade65fSspupyrev             Dst.Discovery = ++Time;
355f2ade65fSspupyrev             Stack.emplace(Edge.Dst, 0);
356f2ade65fSspupyrev             Dst.NumCalls++;
357f2ade65fSspupyrev           } else if (Dst.Taken && Dst.Finish != 0) {
358f2ade65fSspupyrev             // Else, if Edge.Dst already have a path to Target, so that NodeIdx
359f2ade65fSspupyrev             Nodes[NodeIdx].Taken = true;
360f2ade65fSspupyrev           }
361f2ade65fSspupyrev         }
362f2ade65fSspupyrev       } else {
363f2ade65fSspupyrev         // If we are done scanning all edge out of NodeIdx
364f2ade65fSspupyrev         Stack.pop();
365f2ade65fSspupyrev         // If we haven't found a path from NodeIdx to Target, forget about it
366f2ade65fSspupyrev         if (!Nodes[NodeIdx].Taken) {
367f2ade65fSspupyrev           Nodes[NodeIdx].Discovery = 0;
368f2ade65fSspupyrev         } else {
369f2ade65fSspupyrev           // If we have found a path from NodeIdx to Target, then finish NodeIdx
370f2ade65fSspupyrev           // and propagate Taken flag to DFS parent unless at the Source
371f2ade65fSspupyrev           Nodes[NodeIdx].Finish = ++Time;
372f2ade65fSspupyrev           // NodeIdx == Source if and only if the stack is empty
373f2ade65fSspupyrev           if (NodeIdx != Source) {
374f2ade65fSspupyrev             assert(!Stack.empty() && "empty stack while running dfs");
375f2ade65fSspupyrev             Nodes[Stack.top().first].Taken = true;
376f2ade65fSspupyrev           }
377f2ade65fSspupyrev           AugmentingOrder.push_back(NodeIdx);
378f2ade65fSspupyrev         }
379f2ade65fSspupyrev       }
380f2ade65fSspupyrev     }
381f2ade65fSspupyrev     // Nodes are collected decreasing Finish time, so the order is reversed
382f2ade65fSspupyrev     std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
383f2ade65fSspupyrev 
384f2ade65fSspupyrev     // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
385f2ade65fSspupyrev     for (size_t Src : AugmentingOrder) {
386f2ade65fSspupyrev       AugmentingEdges[Src].clear();
387f2ade65fSspupyrev       for (auto &Edge : Edges[Src]) {
388f2ade65fSspupyrev         uint64_t Dst = Edge.Dst;
389f2ade65fSspupyrev         if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
390f2ade65fSspupyrev             Nodes[Dst].Finish < Nodes[Src].Finish) {
391f2ade65fSspupyrev           AugmentingEdges[Src].push_back(&Edge);
392f2ade65fSspupyrev         }
393f2ade65fSspupyrev       }
394f2ade65fSspupyrev       assert((Src == Target || !AugmentingEdges[Src].empty()) &&
395f2ade65fSspupyrev              "incorrectly constructed augmenting edges");
396f2ade65fSspupyrev     }
397f2ade65fSspupyrev 
398f2ade65fSspupyrev     return AugmentingOrder;
399f2ade65fSspupyrev   }
400f2ade65fSspupyrev 
401f2ade65fSspupyrev   /// Update the current flow along the given (acyclic) subgraph specified by
402f2ade65fSspupyrev   /// the vertex order, AugmentingOrder. The objective is to send as much flow
403f2ade65fSspupyrev   /// as possible while evenly distributing flow among successors of each node.
404f2ade65fSspupyrev   /// After the update at least one edge is saturated.
augmentFlowAlongDAG(const std::vector<uint64_t> & AugmentingOrder)405f2ade65fSspupyrev   bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
406f2ade65fSspupyrev     // Phase 0: Initialization
407f2ade65fSspupyrev     for (uint64_t Src : AugmentingOrder) {
408f2ade65fSspupyrev       Nodes[Src].FracFlow = 0;
409f2ade65fSspupyrev       Nodes[Src].IntFlow = 0;
410f2ade65fSspupyrev       for (auto &Edge : AugmentingEdges[Src]) {
411f2ade65fSspupyrev         Edge->AugmentedFlow = 0;
412f2ade65fSspupyrev       }
413f2ade65fSspupyrev     }
414f2ade65fSspupyrev 
415f2ade65fSspupyrev     // Phase 1: Send a unit of fractional flow along the DAG
416f2ade65fSspupyrev     uint64_t MaxFlowAmount = INF;
417f2ade65fSspupyrev     Nodes[Source].FracFlow = 1.0;
418f2ade65fSspupyrev     for (uint64_t Src : AugmentingOrder) {
419f2ade65fSspupyrev       assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
420f2ade65fSspupyrev              "incorrectly computed fractional flow");
421f2ade65fSspupyrev       // Distribute flow evenly among successors of Src
422f2ade65fSspupyrev       uint64_t Degree = AugmentingEdges[Src].size();
423f2ade65fSspupyrev       for (auto &Edge : AugmentingEdges[Src]) {
424f2ade65fSspupyrev         double EdgeFlow = Nodes[Src].FracFlow / Degree;
425f2ade65fSspupyrev         Nodes[Edge->Dst].FracFlow += EdgeFlow;
426f2ade65fSspupyrev         if (Edge->Capacity == INF)
427f2ade65fSspupyrev           continue;
428f2ade65fSspupyrev         uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
429f2ade65fSspupyrev         MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
430f2ade65fSspupyrev       }
431f2ade65fSspupyrev     }
432f2ade65fSspupyrev     // Stop early if we cannot send any (integral) flow from Source to Target
433f2ade65fSspupyrev     if (MaxFlowAmount == 0)
434f2ade65fSspupyrev       return false;
435f2ade65fSspupyrev 
436f2ade65fSspupyrev     // Phase 2: Send an integral flow of MaxFlowAmount
437f2ade65fSspupyrev     Nodes[Source].IntFlow = MaxFlowAmount;
438f2ade65fSspupyrev     for (uint64_t Src : AugmentingOrder) {
439f2ade65fSspupyrev       if (Src == Target)
440f2ade65fSspupyrev         break;
441f2ade65fSspupyrev       // Distribute flow evenly among successors of Src, rounding up to make
442f2ade65fSspupyrev       // sure all flow is sent
443f2ade65fSspupyrev       uint64_t Degree = AugmentingEdges[Src].size();
444f2ade65fSspupyrev       // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
445f2ade65fSspupyrev       uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
446f2ade65fSspupyrev       for (auto &Edge : AugmentingEdges[Src]) {
447f2ade65fSspupyrev         uint64_t Dst = Edge->Dst;
448f2ade65fSspupyrev         uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
449f2ade65fSspupyrev         EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
450f2ade65fSspupyrev         Nodes[Dst].IntFlow += EdgeFlow;
451f2ade65fSspupyrev         Nodes[Src].IntFlow -= EdgeFlow;
452f2ade65fSspupyrev         Edge->AugmentedFlow += EdgeFlow;
453f2ade65fSspupyrev       }
454f2ade65fSspupyrev     }
455f2ade65fSspupyrev     assert(Nodes[Target].IntFlow <= MaxFlowAmount);
456f2ade65fSspupyrev     Nodes[Target].IntFlow = 0;
457f2ade65fSspupyrev 
458f2ade65fSspupyrev     // Phase 3: Send excess flow back traversing the nodes backwards.
459f2ade65fSspupyrev     // Because of rounding, not all flow can be sent along the edges of Src.
460f2ade65fSspupyrev     // Hence, sending the remaining flow back to maintain flow conservation
461f2ade65fSspupyrev     for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
462f2ade65fSspupyrev       uint64_t Src = AugmentingOrder[Idx - 1];
463f2ade65fSspupyrev       // Try to send excess flow back along each edge.
464f2ade65fSspupyrev       // Make sure we only send back flow we just augmented (AugmentedFlow).
465f2ade65fSspupyrev       for (auto &Edge : AugmentingEdges[Src]) {
466f2ade65fSspupyrev         uint64_t Dst = Edge->Dst;
467f2ade65fSspupyrev         if (Nodes[Dst].IntFlow == 0)
468f2ade65fSspupyrev           continue;
469f2ade65fSspupyrev         uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
470f2ade65fSspupyrev         Nodes[Dst].IntFlow -= EdgeFlow;
471f2ade65fSspupyrev         Nodes[Src].IntFlow += EdgeFlow;
472f2ade65fSspupyrev         Edge->AugmentedFlow -= EdgeFlow;
473f2ade65fSspupyrev       }
474f2ade65fSspupyrev     }
475f2ade65fSspupyrev 
476f2ade65fSspupyrev     // Phase 4: Update flow values along all edges
477f2ade65fSspupyrev     bool HasSaturatedEdges = false;
478f2ade65fSspupyrev     for (uint64_t Src : AugmentingOrder) {
479f2ade65fSspupyrev       // Verify that we have sent all the excess flow from the node
480f2ade65fSspupyrev       assert(Src == Source || Nodes[Src].IntFlow == 0);
481f2ade65fSspupyrev       for (auto &Edge : AugmentingEdges[Src]) {
482f2ade65fSspupyrev         assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
483f2ade65fSspupyrev         // Update flow values along the edge and its reverse copy
484f2ade65fSspupyrev         auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
485f2ade65fSspupyrev         Edge->Flow += Edge->AugmentedFlow;
486f2ade65fSspupyrev         RevEdge.Flow -= Edge->AugmentedFlow;
487f2ade65fSspupyrev         if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
488f2ade65fSspupyrev           HasSaturatedEdges = true;
489f2ade65fSspupyrev       }
490f2ade65fSspupyrev     }
491f2ade65fSspupyrev 
492f2ade65fSspupyrev     // The augmentation is successful iff at least one edge becomes saturated
493f2ade65fSspupyrev     return HasSaturatedEdges;
494f2ade65fSspupyrev   }
495f2ade65fSspupyrev 
496f2ade65fSspupyrev   /// Identify candidate (shortest) edges for augmentation.
identifyShortestEdges(uint64_t PathCapacity)497f2ade65fSspupyrev   void identifyShortestEdges(uint64_t PathCapacity) {
498f2ade65fSspupyrev     assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
499f2ade65fSspupyrev     // To make sure the augmentation DAG contains only edges with large residual
500f2ade65fSspupyrev     // capacity, we prune all edges whose capacity is below a fraction of
501f2ade65fSspupyrev     // the capacity of the augmented path.
502f2ade65fSspupyrev     // (All edges of the path itself are always in the DAG)
503f2ade65fSspupyrev     uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
504f2ade65fSspupyrev 
505f2ade65fSspupyrev     // Decide which edges are on a shortest path from Source to Target
506f2ade65fSspupyrev     for (size_t Src = 0; Src < Nodes.size(); Src++) {
507f2ade65fSspupyrev       // An edge cannot be augmenting if the endpoint has large distance
508f2ade65fSspupyrev       if (Nodes[Src].Distance > Nodes[Target].Distance)
509f2ade65fSspupyrev         continue;
510f2ade65fSspupyrev 
511f2ade65fSspupyrev       for (auto &Edge : Edges[Src]) {
512f2ade65fSspupyrev         uint64_t Dst = Edge.Dst;
513f2ade65fSspupyrev         Edge.OnShortestPath =
514f2ade65fSspupyrev             Src != Target && Dst != Source &&
515f2ade65fSspupyrev             Nodes[Dst].Distance <= Nodes[Target].Distance &&
516f2ade65fSspupyrev             Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
517f2ade65fSspupyrev             Edge.Capacity > Edge.Flow &&
518f2ade65fSspupyrev             uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
519f2ade65fSspupyrev       }
520f2ade65fSspupyrev     }
521f2ade65fSspupyrev   }
522f2ade65fSspupyrev 
52361eb12e1Sspupyrev   /// Maximum number of DFS iterations for DAG finding.
52461eb12e1Sspupyrev   static constexpr uint64_t MaxDfsCalls = 10;
52561eb12e1Sspupyrev 
52613d1364aSspupyrev   /// A node in a flow network.
5277cc2493dSspupyrev   struct Node {
5287cc2493dSspupyrev     /// The cost of the cheapest path from the source to the current node.
5297cc2493dSspupyrev     int64_t Distance;
5307cc2493dSspupyrev     /// The node preceding the current one in the path.
5317cc2493dSspupyrev     uint64_t ParentNode;
5327cc2493dSspupyrev     /// The index of the edge between ParentNode and the current node.
5337cc2493dSspupyrev     uint64_t ParentEdgeIndex;
5347cc2493dSspupyrev     /// An indicator of whether the current node is in a queue.
5357cc2493dSspupyrev     bool Taken;
536f2ade65fSspupyrev 
537f2ade65fSspupyrev     /// Data fields utilized in DAG-augmentation:
538f2ade65fSspupyrev     /// Fractional flow.
539f2ade65fSspupyrev     double FracFlow;
540f2ade65fSspupyrev     /// Integral flow.
541f2ade65fSspupyrev     uint64_t IntFlow;
542f2ade65fSspupyrev     /// Discovery time.
543f2ade65fSspupyrev     uint64_t Discovery;
544f2ade65fSspupyrev     /// Finish time.
545f2ade65fSspupyrev     uint64_t Finish;
546f2ade65fSspupyrev     /// NumCalls.
547f2ade65fSspupyrev     uint64_t NumCalls;
5487cc2493dSspupyrev   };
549f2ade65fSspupyrev 
5507cc2493dSspupyrev   /// An edge in a flow network.
5517cc2493dSspupyrev   struct Edge {
5527cc2493dSspupyrev     /// The cost of the edge.
5537cc2493dSspupyrev     int64_t Cost;
5547cc2493dSspupyrev     /// The capacity of the edge.
5557cc2493dSspupyrev     int64_t Capacity;
5567cc2493dSspupyrev     /// The current flow on the edge.
5577cc2493dSspupyrev     int64_t Flow;
5587cc2493dSspupyrev     /// The destination node of the edge.
5597cc2493dSspupyrev     uint64_t Dst;
5607cc2493dSspupyrev     /// The index of the reverse edge between Dst and the current node.
5617cc2493dSspupyrev     uint64_t RevEdgeIndex;
562f2ade65fSspupyrev 
563f2ade65fSspupyrev     /// Data fields utilized in DAG-augmentation:
564f2ade65fSspupyrev     /// Whether the edge is currently on a shortest path from Source to Target.
565f2ade65fSspupyrev     bool OnShortestPath;
566f2ade65fSspupyrev     /// Extra flow along the edge.
567f2ade65fSspupyrev     uint64_t AugmentedFlow;
5687cc2493dSspupyrev   };
5697cc2493dSspupyrev 
5707cc2493dSspupyrev   /// The set of network nodes.
5717cc2493dSspupyrev   std::vector<Node> Nodes;
5727cc2493dSspupyrev   /// The set of network edges.
5737cc2493dSspupyrev   std::vector<std::vector<Edge>> Edges;
5747cc2493dSspupyrev   /// Source node of the flow.
5757cc2493dSspupyrev   uint64_t Source;
5767cc2493dSspupyrev   /// Target (sink) node of the flow.
5777cc2493dSspupyrev   uint64_t Target;
578f2ade65fSspupyrev   /// Augmenting edges.
579f2ade65fSspupyrev   std::vector<std::vector<Edge *>> AugmentingEdges;
58061eb12e1Sspupyrev   /// Params for flow computation.
58161eb12e1Sspupyrev   const ProfiParams &Params;
5827cc2493dSspupyrev };
5837cc2493dSspupyrev 
58445b15592Sspupyrev /// A post-processing adjustment of the control flow. It applies two steps by
58593a2c291Sspupyrev /// rerouting some flow and making it more realistic:
58693a2c291Sspupyrev ///
58793a2c291Sspupyrev /// - First, it removes all isolated components ("islands") with a positive flow
58893a2c291Sspupyrev ///   that are unreachable from the entry block. For every such component, we
58993a2c291Sspupyrev ///   find the shortest from the entry to an exit passing through the component,
59093a2c291Sspupyrev ///   and increase the flow by one unit along the path.
59193a2c291Sspupyrev ///
59293a2c291Sspupyrev /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
59393a2c291Sspupyrev ///   with no sampled counts. Then it rebalnces the flow that goes through such
59493a2c291Sspupyrev ///   a subgraph so that each branch is taken with probability 50%.
59593a2c291Sspupyrev ///   An unknown subgraph is such that for every two nodes u and v:
59693a2c291Sspupyrev ///     - u dominates v and u is not unknown;
59793a2c291Sspupyrev ///     - v post-dominates u; and
59893a2c291Sspupyrev ///     - all inner-nodes of all (u,v)-paths are unknown.
59993a2c291Sspupyrev ///
60098dd2f9eSspupyrev class FlowAdjuster {
60198dd2f9eSspupyrev public:
FlowAdjuster(const ProfiParams & Params,FlowFunction & Func)60261eb12e1Sspupyrev   FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)
60345b15592Sspupyrev       : Params(Params), Func(Func) {}
60498dd2f9eSspupyrev 
60545b15592Sspupyrev   /// Apply the post-processing.
run()60698dd2f9eSspupyrev   void run() {
60761eb12e1Sspupyrev     if (Params.JoinIslands) {
60845b15592Sspupyrev       // Adjust the flow to get rid of isolated components
60998dd2f9eSspupyrev       joinIsolatedComponents();
61061eb12e1Sspupyrev     }
61193a2c291Sspupyrev 
61261eb12e1Sspupyrev     if (Params.RebalanceUnknown) {
61345b15592Sspupyrev       // Rebalance the flow inside unknown subgraphs
61493a2c291Sspupyrev       rebalanceUnknownSubgraphs();
61598dd2f9eSspupyrev     }
61661eb12e1Sspupyrev   }
61798dd2f9eSspupyrev 
61898dd2f9eSspupyrev private:
joinIsolatedComponents()61998dd2f9eSspupyrev   void joinIsolatedComponents() {
62098dd2f9eSspupyrev     // Find blocks that are reachable from the source
6215f4ae564SJan Svoboda     auto Visited = BitVector(NumBlocks(), false);
62298dd2f9eSspupyrev     findReachable(Func.Entry, Visited);
62398dd2f9eSspupyrev 
62498dd2f9eSspupyrev     // Iterate over all non-reachable blocks and adjust their weights
62598dd2f9eSspupyrev     for (uint64_t I = 0; I < NumBlocks(); I++) {
62698dd2f9eSspupyrev       auto &Block = Func.Blocks[I];
62798dd2f9eSspupyrev       if (Block.Flow > 0 && !Visited[I]) {
62898dd2f9eSspupyrev         // Find a path from the entry to an exit passing through the block I
62998dd2f9eSspupyrev         auto Path = findShortestPath(I);
63098dd2f9eSspupyrev         // Increase the flow along the path
63198dd2f9eSspupyrev         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
63298dd2f9eSspupyrev                "incorrectly computed path adjusting control flow");
63398dd2f9eSspupyrev         Func.Blocks[Func.Entry].Flow += 1;
63498dd2f9eSspupyrev         for (auto &Jump : Path) {
63598dd2f9eSspupyrev           Jump->Flow += 1;
63698dd2f9eSspupyrev           Func.Blocks[Jump->Target].Flow += 1;
63798dd2f9eSspupyrev           // Update reachability
63898dd2f9eSspupyrev           findReachable(Jump->Target, Visited);
63998dd2f9eSspupyrev         }
64098dd2f9eSspupyrev       }
64198dd2f9eSspupyrev     }
64298dd2f9eSspupyrev   }
64398dd2f9eSspupyrev 
64493a2c291Sspupyrev   /// Run BFS from a given block along the jumps with a positive flow and mark
64598dd2f9eSspupyrev   /// all reachable blocks.
findReachable(uint64_t Src,BitVector & Visited)6465f4ae564SJan Svoboda   void findReachable(uint64_t Src, BitVector &Visited) {
64798dd2f9eSspupyrev     if (Visited[Src])
64898dd2f9eSspupyrev       return;
64998dd2f9eSspupyrev     std::queue<uint64_t> Queue;
65098dd2f9eSspupyrev     Queue.push(Src);
65198dd2f9eSspupyrev     Visited[Src] = true;
65298dd2f9eSspupyrev     while (!Queue.empty()) {
65398dd2f9eSspupyrev       Src = Queue.front();
65498dd2f9eSspupyrev       Queue.pop();
65556ea4f9bSKazu Hirata       for (auto *Jump : Func.Blocks[Src].SuccJumps) {
65698dd2f9eSspupyrev         uint64_t Dst = Jump->Target;
65798dd2f9eSspupyrev         if (Jump->Flow > 0 && !Visited[Dst]) {
65898dd2f9eSspupyrev           Queue.push(Dst);
65998dd2f9eSspupyrev           Visited[Dst] = true;
66098dd2f9eSspupyrev         }
66198dd2f9eSspupyrev       }
66298dd2f9eSspupyrev     }
66398dd2f9eSspupyrev   }
66498dd2f9eSspupyrev 
66598dd2f9eSspupyrev   /// Find the shortest path from the entry block to an exit block passing
66698dd2f9eSspupyrev   /// through a given block.
findShortestPath(uint64_t BlockIdx)66798dd2f9eSspupyrev   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
66898dd2f9eSspupyrev     // A path from the entry block to BlockIdx
66998dd2f9eSspupyrev     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
67098dd2f9eSspupyrev     // A path from BlockIdx to an exit block
67198dd2f9eSspupyrev     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
67298dd2f9eSspupyrev 
67398dd2f9eSspupyrev     // Concatenate the two paths
67498dd2f9eSspupyrev     std::vector<FlowJump *> Result;
67598dd2f9eSspupyrev     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
67698dd2f9eSspupyrev     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
67798dd2f9eSspupyrev     return Result;
67898dd2f9eSspupyrev   }
67998dd2f9eSspupyrev 
68098dd2f9eSspupyrev   /// Apply the Dijkstra algorithm to find the shortest path from a given
68198dd2f9eSspupyrev   /// Source to a given Target block.
68298dd2f9eSspupyrev   /// If Target == -1, then the path ends at an exit block.
findShortestPath(uint64_t Source,uint64_t Target)68398dd2f9eSspupyrev   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
68498dd2f9eSspupyrev     // Quit early, if possible
68598dd2f9eSspupyrev     if (Source == Target)
68698dd2f9eSspupyrev       return std::vector<FlowJump *>();
68798dd2f9eSspupyrev     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
68898dd2f9eSspupyrev       return std::vector<FlowJump *>();
68998dd2f9eSspupyrev 
69098dd2f9eSspupyrev     // Initialize data structures
69198dd2f9eSspupyrev     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
69298dd2f9eSspupyrev     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
69398dd2f9eSspupyrev     Distance[Source] = 0;
69498dd2f9eSspupyrev     std::set<std::pair<uint64_t, uint64_t>> Queue;
69598dd2f9eSspupyrev     Queue.insert(std::make_pair(Distance[Source], Source));
69698dd2f9eSspupyrev 
69798dd2f9eSspupyrev     // Run the Dijkstra algorithm
69898dd2f9eSspupyrev     while (!Queue.empty()) {
69998dd2f9eSspupyrev       uint64_t Src = Queue.begin()->second;
70098dd2f9eSspupyrev       Queue.erase(Queue.begin());
70198dd2f9eSspupyrev       // If we found a solution, quit early
70298dd2f9eSspupyrev       if (Src == Target ||
70398dd2f9eSspupyrev           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
70498dd2f9eSspupyrev         break;
70598dd2f9eSspupyrev 
70656ea4f9bSKazu Hirata       for (auto *Jump : Func.Blocks[Src].SuccJumps) {
70798dd2f9eSspupyrev         uint64_t Dst = Jump->Target;
70898dd2f9eSspupyrev         int64_t JumpDist = jumpDistance(Jump);
70998dd2f9eSspupyrev         if (Distance[Dst] > Distance[Src] + JumpDist) {
71098dd2f9eSspupyrev           Queue.erase(std::make_pair(Distance[Dst], Dst));
71198dd2f9eSspupyrev 
71298dd2f9eSspupyrev           Distance[Dst] = Distance[Src] + JumpDist;
71398dd2f9eSspupyrev           Parent[Dst] = Jump;
71498dd2f9eSspupyrev 
71598dd2f9eSspupyrev           Queue.insert(std::make_pair(Distance[Dst], Dst));
71698dd2f9eSspupyrev         }
71798dd2f9eSspupyrev       }
71898dd2f9eSspupyrev     }
71998dd2f9eSspupyrev     // If Target is not provided, find the closest exit block
72098dd2f9eSspupyrev     if (Target == AnyExitBlock) {
72198dd2f9eSspupyrev       for (uint64_t I = 0; I < NumBlocks(); I++) {
72298dd2f9eSspupyrev         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
72398dd2f9eSspupyrev           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
72498dd2f9eSspupyrev             Target = I;
72598dd2f9eSspupyrev           }
72698dd2f9eSspupyrev         }
72798dd2f9eSspupyrev       }
72898dd2f9eSspupyrev     }
72998dd2f9eSspupyrev     assert(Parent[Target] != nullptr && "a path does not exist");
73098dd2f9eSspupyrev 
73198dd2f9eSspupyrev     // Extract the constructed path
73298dd2f9eSspupyrev     std::vector<FlowJump *> Result;
73398dd2f9eSspupyrev     uint64_t Now = Target;
73498dd2f9eSspupyrev     while (Now != Source) {
73598dd2f9eSspupyrev       assert(Now == Parent[Now]->Target && "incorrect parent jump");
73698dd2f9eSspupyrev       Result.push_back(Parent[Now]);
73798dd2f9eSspupyrev       Now = Parent[Now]->Source;
73898dd2f9eSspupyrev     }
73998dd2f9eSspupyrev     // Reverse the path, since it is extracted from Target to Source
74098dd2f9eSspupyrev     std::reverse(Result.begin(), Result.end());
74198dd2f9eSspupyrev     return Result;
74298dd2f9eSspupyrev   }
74398dd2f9eSspupyrev 
74498dd2f9eSspupyrev   /// A distance of a path for a given jump.
74598dd2f9eSspupyrev   /// In order to incite the path to use blocks/jumps with large positive flow,
74698dd2f9eSspupyrev   /// and avoid changing branch probability of outgoing edges drastically,
74781aedab7Sspupyrev   /// set the jump distance so as:
74881aedab7Sspupyrev   ///   - to minimize the number of unlikely jumps used and subject to that,
74981aedab7Sspupyrev   ///   - to minimize the number of Flow == 0 jumps used and subject to that,
75081aedab7Sspupyrev   ///   - minimizes total multiplicative Flow increase for the remaining edges.
75181aedab7Sspupyrev   /// To capture this objective with integer distances, we round off fractional
75281aedab7Sspupyrev   /// parts to a multiple of 1 / BaseDistance.
jumpDistance(FlowJump * Jump) const75398dd2f9eSspupyrev   int64_t jumpDistance(FlowJump *Jump) const {
75498dd2f9eSspupyrev     if (Jump->IsUnlikely)
75561eb12e1Sspupyrev       return Params.CostUnlikely;
75661eb12e1Sspupyrev     uint64_t BaseDistance =
75761eb12e1Sspupyrev         std::max(FlowAdjuster::MinBaseDistance,
75861eb12e1Sspupyrev                  std::min(Func.Blocks[Func.Entry].Flow,
75945b15592Sspupyrev                           Params.CostUnlikely / (2 * (NumBlocks() + 1))));
76098dd2f9eSspupyrev     if (Jump->Flow > 0)
76181aedab7Sspupyrev       return BaseDistance + BaseDistance / Jump->Flow;
76245b15592Sspupyrev     return 2 * BaseDistance * (NumBlocks() + 1);
76398dd2f9eSspupyrev   };
76498dd2f9eSspupyrev 
NumBlocks() const76598dd2f9eSspupyrev   uint64_t NumBlocks() const { return Func.Blocks.size(); }
76698dd2f9eSspupyrev 
76713d1364aSspupyrev   /// Rebalance unknown subgraphs so that the flow is split evenly across the
76813d1364aSspupyrev   /// outgoing branches of every block of the subgraph. The method iterates over
76913d1364aSspupyrev   /// blocks with known weight and identifies unknown subgraphs rooted at the
77013d1364aSspupyrev   /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
rebalanceUnknownSubgraphs()77193a2c291Sspupyrev   void rebalanceUnknownSubgraphs() {
77213d1364aSspupyrev     // Try to find unknown subgraphs from each block
773fedc5973SKazu Hirata     for (const FlowBlock &SrcBlock : Func.Blocks) {
77413d1364aSspupyrev       // Verify if rebalancing rooted at SrcBlock is feasible
775fedc5973SKazu Hirata       if (!canRebalanceAtRoot(&SrcBlock))
77693a2c291Sspupyrev         continue;
77793a2c291Sspupyrev 
77813d1364aSspupyrev       // Find an unknown subgraphs starting at SrcBlock. Along the way,
77913d1364aSspupyrev       // fill in known destinations and intermediate unknown blocks.
78013d1364aSspupyrev       std::vector<FlowBlock *> UnknownBlocks;
78113d1364aSspupyrev       std::vector<FlowBlock *> KnownDstBlocks;
782fedc5973SKazu Hirata       findUnknownSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks);
78313d1364aSspupyrev 
78413d1364aSspupyrev       // Verify if rebalancing of the subgraph is feasible. If the search is
78513d1364aSspupyrev       // successful, find the unique destination block (which can be null)
78693a2c291Sspupyrev       FlowBlock *DstBlock = nullptr;
787fedc5973SKazu Hirata       if (!canRebalanceSubgraph(&SrcBlock, KnownDstBlocks, UnknownBlocks,
78813d1364aSspupyrev                                 DstBlock))
78993a2c291Sspupyrev         continue;
79013d1364aSspupyrev 
79113d1364aSspupyrev       // We cannot rebalance subgraphs containing cycles among unknown blocks
792fedc5973SKazu Hirata       if (!isAcyclicSubgraph(&SrcBlock, DstBlock, UnknownBlocks))
79393a2c291Sspupyrev         continue;
79493a2c291Sspupyrev 
79593a2c291Sspupyrev       // Rebalance the flow
796fedc5973SKazu Hirata       rebalanceUnknownSubgraph(&SrcBlock, DstBlock, UnknownBlocks);
79793a2c291Sspupyrev     }
79893a2c291Sspupyrev   }
79993a2c291Sspupyrev 
80013d1364aSspupyrev   /// Verify if rebalancing rooted at a given block is possible.
canRebalanceAtRoot(const FlowBlock * SrcBlock)80113d1364aSspupyrev   bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
80213d1364aSspupyrev     // Do not attempt to find unknown subgraphs from an unknown or a
80313d1364aSspupyrev     // zero-flow block
80461eb12e1Sspupyrev     if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)
80513d1364aSspupyrev       return false;
80613d1364aSspupyrev 
80713d1364aSspupyrev     // Do not attempt to process subgraphs from a block w/o unknown sucessors
80813d1364aSspupyrev     bool HasUnknownSuccs = false;
80956ea4f9bSKazu Hirata     for (auto *Jump : SrcBlock->SuccJumps) {
81061eb12e1Sspupyrev       if (Func.Blocks[Jump->Target].HasUnknownWeight) {
81113d1364aSspupyrev         HasUnknownSuccs = true;
81213d1364aSspupyrev         break;
81313d1364aSspupyrev       }
81413d1364aSspupyrev     }
81513d1364aSspupyrev     if (!HasUnknownSuccs)
81613d1364aSspupyrev       return false;
81713d1364aSspupyrev 
81813d1364aSspupyrev     return true;
81913d1364aSspupyrev   }
82013d1364aSspupyrev 
82113d1364aSspupyrev   /// Find an unknown subgraph starting at block SrcBlock. The method sets
82213d1364aSspupyrev   /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
findUnknownSubgraph(const FlowBlock * SrcBlock,std::vector<FlowBlock * > & KnownDstBlocks,std::vector<FlowBlock * > & UnknownBlocks)82313d1364aSspupyrev   void findUnknownSubgraph(const FlowBlock *SrcBlock,
82413d1364aSspupyrev                            std::vector<FlowBlock *> &KnownDstBlocks,
82513d1364aSspupyrev                            std::vector<FlowBlock *> &UnknownBlocks) {
82693a2c291Sspupyrev     // Run BFS from SrcBlock and make sure all paths are going through unknown
827f2ade65fSspupyrev     // blocks and end at a known DstBlock
8285f4ae564SJan Svoboda     auto Visited = BitVector(NumBlocks(), false);
82993a2c291Sspupyrev     std::queue<uint64_t> Queue;
83093a2c291Sspupyrev 
83193a2c291Sspupyrev     Queue.push(SrcBlock->Index);
83293a2c291Sspupyrev     Visited[SrcBlock->Index] = true;
83393a2c291Sspupyrev     while (!Queue.empty()) {
83493a2c291Sspupyrev       auto &Block = Func.Blocks[Queue.front()];
83593a2c291Sspupyrev       Queue.pop();
83693a2c291Sspupyrev       // Process blocks reachable from Block
83756ea4f9bSKazu Hirata       for (auto *Jump : Block.SuccJumps) {
83813d1364aSspupyrev         // If Jump can be ignored, skip it
83913d1364aSspupyrev         if (ignoreJump(SrcBlock, nullptr, Jump))
84013d1364aSspupyrev           continue;
84113d1364aSspupyrev 
84293a2c291Sspupyrev         uint64_t Dst = Jump->Target;
84313d1364aSspupyrev         // If Dst has been visited, skip Jump
84493a2c291Sspupyrev         if (Visited[Dst])
84593a2c291Sspupyrev           continue;
84613d1364aSspupyrev         // Process block Dst
84793a2c291Sspupyrev         Visited[Dst] = true;
84861eb12e1Sspupyrev         if (!Func.Blocks[Dst].HasUnknownWeight) {
84913d1364aSspupyrev           KnownDstBlocks.push_back(&Func.Blocks[Dst]);
85093a2c291Sspupyrev         } else {
85193a2c291Sspupyrev           Queue.push(Dst);
85213d1364aSspupyrev           UnknownBlocks.push_back(&Func.Blocks[Dst]);
85313d1364aSspupyrev         }
85493a2c291Sspupyrev       }
85593a2c291Sspupyrev     }
85693a2c291Sspupyrev   }
85793a2c291Sspupyrev 
85813d1364aSspupyrev   /// Verify if rebalancing of the subgraph is feasible. If the checks are
85913d1364aSspupyrev   /// successful, set the unique destination block, DstBlock (can be null).
canRebalanceSubgraph(const FlowBlock * SrcBlock,const std::vector<FlowBlock * > & KnownDstBlocks,const std::vector<FlowBlock * > & UnknownBlocks,FlowBlock * & DstBlock)86013d1364aSspupyrev   bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
86113d1364aSspupyrev                             const std::vector<FlowBlock *> &KnownDstBlocks,
86213d1364aSspupyrev                             const std::vector<FlowBlock *> &UnknownBlocks,
86313d1364aSspupyrev                             FlowBlock *&DstBlock) {
86493a2c291Sspupyrev     // If the list of unknown blocks is empty, we don't need rebalancing
86513d1364aSspupyrev     if (UnknownBlocks.empty())
86693a2c291Sspupyrev       return false;
86713d1364aSspupyrev 
86813d1364aSspupyrev     // If there are multiple known sinks, we can't rebalance
86913d1364aSspupyrev     if (KnownDstBlocks.size() > 1)
87093a2c291Sspupyrev       return false;
87113d1364aSspupyrev     DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
87213d1364aSspupyrev 
87313d1364aSspupyrev     // Verify sinks of the subgraph
87456ea4f9bSKazu Hirata     for (auto *Block : UnknownBlocks) {
87513d1364aSspupyrev       if (Block->SuccJumps.empty()) {
87613d1364aSspupyrev         // If there are multiple (known and unknown) sinks, we can't rebalance
87713d1364aSspupyrev         if (DstBlock != nullptr)
87813d1364aSspupyrev           return false;
87913d1364aSspupyrev         continue;
88013d1364aSspupyrev       }
88113d1364aSspupyrev       size_t NumIgnoredJumps = 0;
88256ea4f9bSKazu Hirata       for (auto *Jump : Block->SuccJumps) {
88313d1364aSspupyrev         if (ignoreJump(SrcBlock, DstBlock, Jump))
88413d1364aSspupyrev           NumIgnoredJumps++;
88513d1364aSspupyrev       }
88613d1364aSspupyrev       // If there is a non-sink block in UnknownBlocks with all jumps ignored,
88713d1364aSspupyrev       // then we can't rebalance
88813d1364aSspupyrev       if (NumIgnoredJumps == Block->SuccJumps.size())
88993a2c291Sspupyrev         return false;
89093a2c291Sspupyrev     }
89193a2c291Sspupyrev 
89293a2c291Sspupyrev     return true;
89393a2c291Sspupyrev   }
89493a2c291Sspupyrev 
89513d1364aSspupyrev   /// Decide whether the Jump is ignored while processing an unknown subgraphs
89613d1364aSspupyrev   /// rooted at basic block SrcBlock with the destination block, DstBlock.
ignoreJump(const FlowBlock * SrcBlock,const FlowBlock * DstBlock,const FlowJump * Jump)89713d1364aSspupyrev   bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
89813d1364aSspupyrev                   const FlowJump *Jump) {
89913d1364aSspupyrev     // Ignore unlikely jumps with zero flow
90013d1364aSspupyrev     if (Jump->IsUnlikely && Jump->Flow == 0)
90113d1364aSspupyrev       return true;
90213d1364aSspupyrev 
90313d1364aSspupyrev     auto JumpSource = &Func.Blocks[Jump->Source];
90413d1364aSspupyrev     auto JumpTarget = &Func.Blocks[Jump->Target];
90513d1364aSspupyrev 
90613d1364aSspupyrev     // Do not ignore jumps coming into DstBlock
90713d1364aSspupyrev     if (DstBlock != nullptr && JumpTarget == DstBlock)
90813d1364aSspupyrev       return false;
90913d1364aSspupyrev 
91013d1364aSspupyrev     // Ignore jumps out of SrcBlock to known blocks
91161eb12e1Sspupyrev     if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
91213d1364aSspupyrev       return true;
91313d1364aSspupyrev 
91413d1364aSspupyrev     // Ignore jumps to known blocks with zero flow
91561eb12e1Sspupyrev     if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
91613d1364aSspupyrev       return true;
91713d1364aSspupyrev 
91813d1364aSspupyrev     return false;
91913d1364aSspupyrev   }
92013d1364aSspupyrev 
92193a2c291Sspupyrev   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
92213d1364aSspupyrev   /// UnknownBlocks in the topological order (so that all jumps are "forward").
isAcyclicSubgraph(const FlowBlock * SrcBlock,const FlowBlock * DstBlock,std::vector<FlowBlock * > & UnknownBlocks)92313d1364aSspupyrev   bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
92413d1364aSspupyrev                          std::vector<FlowBlock *> &UnknownBlocks) {
92593a2c291Sspupyrev     // Extract local in-degrees in the considered subgraph
92693a2c291Sspupyrev     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
92713d1364aSspupyrev     auto fillInDegree = [&](const FlowBlock *Block) {
92856ea4f9bSKazu Hirata       for (auto *Jump : Block->SuccJumps) {
92913d1364aSspupyrev         if (ignoreJump(SrcBlock, DstBlock, Jump))
93013d1364aSspupyrev           continue;
93193a2c291Sspupyrev         LocalInDegree[Jump->Target]++;
93293a2c291Sspupyrev       }
93313d1364aSspupyrev     };
93413d1364aSspupyrev     fillInDegree(SrcBlock);
93556ea4f9bSKazu Hirata     for (auto *Block : UnknownBlocks) {
93613d1364aSspupyrev       fillInDegree(Block);
93793a2c291Sspupyrev     }
93893a2c291Sspupyrev     // A loop containing SrcBlock
93993a2c291Sspupyrev     if (LocalInDegree[SrcBlock->Index] > 0)
94093a2c291Sspupyrev       return false;
94193a2c291Sspupyrev 
94293a2c291Sspupyrev     std::vector<FlowBlock *> AcyclicOrder;
94393a2c291Sspupyrev     std::queue<uint64_t> Queue;
94493a2c291Sspupyrev     Queue.push(SrcBlock->Index);
94593a2c291Sspupyrev     while (!Queue.empty()) {
94613d1364aSspupyrev       FlowBlock *Block = &Func.Blocks[Queue.front()];
94793a2c291Sspupyrev       Queue.pop();
94813d1364aSspupyrev       // Stop propagation once we reach DstBlock, if any
94913d1364aSspupyrev       if (DstBlock != nullptr && Block == DstBlock)
95093a2c291Sspupyrev         break;
95193a2c291Sspupyrev 
95213d1364aSspupyrev       // Keep an acyclic order of unknown blocks
95361eb12e1Sspupyrev       if (Block->HasUnknownWeight && Block != SrcBlock)
95413d1364aSspupyrev         AcyclicOrder.push_back(Block);
95513d1364aSspupyrev 
95693a2c291Sspupyrev       // Add to the queue all successors with zero local in-degree
95756ea4f9bSKazu Hirata       for (auto *Jump : Block->SuccJumps) {
95813d1364aSspupyrev         if (ignoreJump(SrcBlock, DstBlock, Jump))
95913d1364aSspupyrev           continue;
96093a2c291Sspupyrev         uint64_t Dst = Jump->Target;
96193a2c291Sspupyrev         LocalInDegree[Dst]--;
96293a2c291Sspupyrev         if (LocalInDegree[Dst] == 0) {
96393a2c291Sspupyrev           Queue.push(Dst);
96493a2c291Sspupyrev         }
96593a2c291Sspupyrev       }
96693a2c291Sspupyrev     }
96793a2c291Sspupyrev 
96893a2c291Sspupyrev     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
96993a2c291Sspupyrev     // of all blocks
97013d1364aSspupyrev     if (UnknownBlocks.size() != AcyclicOrder.size())
97193a2c291Sspupyrev       return false;
97213d1364aSspupyrev     UnknownBlocks = AcyclicOrder;
97393a2c291Sspupyrev     return true;
97493a2c291Sspupyrev   }
97593a2c291Sspupyrev 
97613d1364aSspupyrev   /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
97713d1364aSspupyrev   /// having UnknownBlocks intermediate blocks.
rebalanceUnknownSubgraph(const FlowBlock * SrcBlock,const FlowBlock * DstBlock,const std::vector<FlowBlock * > & UnknownBlocks)97813d1364aSspupyrev   void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
97913d1364aSspupyrev                                 const FlowBlock *DstBlock,
98013d1364aSspupyrev                                 const std::vector<FlowBlock *> &UnknownBlocks) {
98193a2c291Sspupyrev     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
98293a2c291Sspupyrev 
98313d1364aSspupyrev     // Ditribute flow from the source block
98413d1364aSspupyrev     uint64_t BlockFlow = 0;
98513d1364aSspupyrev     // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
98656ea4f9bSKazu Hirata     for (auto *Jump : SrcBlock->SuccJumps) {
98713d1364aSspupyrev       if (ignoreJump(SrcBlock, DstBlock, Jump))
98893a2c291Sspupyrev         continue;
98913d1364aSspupyrev       BlockFlow += Jump->Flow;
99093a2c291Sspupyrev     }
99113d1364aSspupyrev     rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
99213d1364aSspupyrev 
99313d1364aSspupyrev     // Ditribute flow from the remaining blocks
99456ea4f9bSKazu Hirata     for (auto *Block : UnknownBlocks) {
99561eb12e1Sspupyrev       assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
99613d1364aSspupyrev       uint64_t BlockFlow = 0;
99713d1364aSspupyrev       // Block's flow is the sum of incoming flows
99856ea4f9bSKazu Hirata       for (auto *Jump : Block->PredJumps) {
99913d1364aSspupyrev         BlockFlow += Jump->Flow;
100013d1364aSspupyrev       }
100113d1364aSspupyrev       Block->Flow = BlockFlow;
100213d1364aSspupyrev       rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
100313d1364aSspupyrev     }
100413d1364aSspupyrev   }
100513d1364aSspupyrev 
100613d1364aSspupyrev   /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
100713d1364aSspupyrev   /// and ending at DstBlock.
rebalanceBlock(const FlowBlock * SrcBlock,const FlowBlock * DstBlock,const FlowBlock * Block,uint64_t BlockFlow)100813d1364aSspupyrev   void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
100913d1364aSspupyrev                       const FlowBlock *Block, uint64_t BlockFlow) {
101013d1364aSspupyrev     // Process all successor jumps and update corresponding flow values
101113d1364aSspupyrev     size_t BlockDegree = 0;
101256ea4f9bSKazu Hirata     for (auto *Jump : Block->SuccJumps) {
101313d1364aSspupyrev       if (ignoreJump(SrcBlock, DstBlock, Jump))
101413d1364aSspupyrev         continue;
101513d1364aSspupyrev       BlockDegree++;
101613d1364aSspupyrev     }
101713d1364aSspupyrev     // If all successor jumps of the block are ignored, skip it
101813d1364aSspupyrev     if (DstBlock == nullptr && BlockDegree == 0)
101913d1364aSspupyrev       return;
102013d1364aSspupyrev     assert(BlockDegree > 0 && "all outgoing jumps are ignored");
102113d1364aSspupyrev 
102213d1364aSspupyrev     // Each of the Block's successors gets the following amount of flow.
102313d1364aSspupyrev     // Rounding the value up so that all flow is propagated
102413d1364aSspupyrev     uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
102556ea4f9bSKazu Hirata     for (auto *Jump : Block->SuccJumps) {
102613d1364aSspupyrev       if (ignoreJump(SrcBlock, DstBlock, Jump))
102713d1364aSspupyrev         continue;
102813d1364aSspupyrev       uint64_t Flow = std::min(SuccFlow, BlockFlow);
102993a2c291Sspupyrev       Jump->Flow = Flow;
103013d1364aSspupyrev       BlockFlow -= Flow;
103193a2c291Sspupyrev     }
103213d1364aSspupyrev     assert(BlockFlow == 0 && "not all flow is propagated");
103393a2c291Sspupyrev   }
103493a2c291Sspupyrev 
103598dd2f9eSspupyrev   /// A constant indicating an arbitrary exit block of a function.
103698dd2f9eSspupyrev   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
103761eb12e1Sspupyrev   /// Minimum BaseDistance for the jump distance values in island joining.
103861eb12e1Sspupyrev   static constexpr uint64_t MinBaseDistance = 10000;
103998dd2f9eSspupyrev 
104061eb12e1Sspupyrev   /// Params for flow computation.
104161eb12e1Sspupyrev   const ProfiParams &Params;
104298dd2f9eSspupyrev   /// The function.
104398dd2f9eSspupyrev   FlowFunction &Func;
104498dd2f9eSspupyrev };
104598dd2f9eSspupyrev 
104645b15592Sspupyrev std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
104745b15592Sspupyrev                                              const FlowBlock &Block);
104845b15592Sspupyrev std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
104945b15592Sspupyrev                                             const FlowJump &Jump);
105045b15592Sspupyrev 
10517cc2493dSspupyrev /// Initializing flow network for a given function.
10527cc2493dSspupyrev ///
105345b15592Sspupyrev /// Every block is split into two nodes that are responsible for (i) an
105445b15592Sspupyrev /// incoming flow, (ii) an outgoing flow; they penalize an increase or a
10557cc2493dSspupyrev /// reduction of the block weight.
initializeNetwork(const ProfiParams & Params,MinCostMaxFlow & Network,FlowFunction & Func)105661eb12e1Sspupyrev void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
105761eb12e1Sspupyrev                        FlowFunction &Func) {
10587cc2493dSspupyrev   uint64_t NumBlocks = Func.Blocks.size();
10597cc2493dSspupyrev   assert(NumBlocks > 1 && "Too few blocks in a function");
106045b15592Sspupyrev   uint64_t NumJumps = Func.Jumps.size();
106145b15592Sspupyrev   assert(NumJumps > 0 && "Too few jumps in a function");
10627cc2493dSspupyrev 
10637cc2493dSspupyrev   // Introducing dummy source/sink pairs to allow flow circulation.
1064*1650f1b3SJay Foad   // The nodes corresponding to blocks of the function have indices in
106545b15592Sspupyrev   // the range [0 .. 2 * NumBlocks); the dummy sources/sinks are indexed by the
106645b15592Sspupyrev   // next four values.
106745b15592Sspupyrev   uint64_t S = 2 * NumBlocks;
10687cc2493dSspupyrev   uint64_t T = S + 1;
10697cc2493dSspupyrev   uint64_t S1 = S + 2;
10707cc2493dSspupyrev   uint64_t T1 = S + 3;
10717cc2493dSspupyrev 
107245b15592Sspupyrev   Network.initialize(2 * NumBlocks + 4, S1, T1);
10737cc2493dSspupyrev 
107445b15592Sspupyrev   // Initialize nodes of the flow network
10757cc2493dSspupyrev   for (uint64_t B = 0; B < NumBlocks; B++) {
10767cc2493dSspupyrev     auto &Block = Func.Blocks[B];
10777cc2493dSspupyrev 
107845b15592Sspupyrev     // Split every block into two auxiliary nodes to allow
107945b15592Sspupyrev     // increase/reduction of the block count.
108045b15592Sspupyrev     uint64_t Bin = 2 * B;
108145b15592Sspupyrev     uint64_t Bout = 2 * B + 1;
10827cc2493dSspupyrev 
10837cc2493dSspupyrev     // Edges from S and to T
10847cc2493dSspupyrev     if (Block.isEntry()) {
10857cc2493dSspupyrev       Network.addEdge(S, Bin, 0);
10867cc2493dSspupyrev     } else if (Block.isExit()) {
10877cc2493dSspupyrev       Network.addEdge(Bout, T, 0);
10887cc2493dSspupyrev     }
10897cc2493dSspupyrev 
109045b15592Sspupyrev     // Assign costs for increasing/decreasing the block counts
109145b15592Sspupyrev     auto [AuxCostInc, AuxCostDec] = assignBlockCosts(Params, Block);
10927cc2493dSspupyrev 
109345b15592Sspupyrev     // Add the corresponding edges to the network
109445b15592Sspupyrev     Network.addEdge(Bin, Bout, AuxCostInc);
10957cc2493dSspupyrev     if (Block.Weight > 0) {
109645b15592Sspupyrev       Network.addEdge(Bout, Bin, Block.Weight, AuxCostDec);
109745b15592Sspupyrev       Network.addEdge(S1, Bout, Block.Weight, 0);
109845b15592Sspupyrev       Network.addEdge(Bin, T1, Block.Weight, 0);
10997cc2493dSspupyrev     }
11007cc2493dSspupyrev   }
11017cc2493dSspupyrev 
110245b15592Sspupyrev   // Initialize edges of the flow network
110345b15592Sspupyrev   for (uint64_t J = 0; J < NumJumps; J++) {
110445b15592Sspupyrev     auto &Jump = Func.Jumps[J];
110545b15592Sspupyrev 
110645b15592Sspupyrev     // Get the endpoints corresponding to the jump
110745b15592Sspupyrev     uint64_t Jin = 2 * Jump.Source + 1;
110845b15592Sspupyrev     uint64_t Jout = 2 * Jump.Target;
110945b15592Sspupyrev 
111045b15592Sspupyrev     // Assign costs for increasing/decreasing the jump counts
111145b15592Sspupyrev     auto [AuxCostInc, AuxCostDec] = assignJumpCosts(Params, Jump);
111245b15592Sspupyrev 
111345b15592Sspupyrev     // Add the corresponding edges to the network
111445b15592Sspupyrev     Network.addEdge(Jin, Jout, AuxCostInc);
111545b15592Sspupyrev     if (Jump.Weight > 0) {
111645b15592Sspupyrev       Network.addEdge(Jout, Jin, Jump.Weight, AuxCostDec);
111745b15592Sspupyrev       Network.addEdge(S1, Jout, Jump.Weight, 0);
111845b15592Sspupyrev       Network.addEdge(Jin, T1, Jump.Weight, 0);
11197cc2493dSspupyrev     }
11207cc2493dSspupyrev   }
11217cc2493dSspupyrev 
11227cc2493dSspupyrev   // Make sure we have a valid flow circulation
11237cc2493dSspupyrev   Network.addEdge(T, S, 0);
11247cc2493dSspupyrev }
11257cc2493dSspupyrev 
112645b15592Sspupyrev /// Assign costs for increasing/decreasing the block counts.
assignBlockCosts(const ProfiParams & Params,const FlowBlock & Block)112745b15592Sspupyrev std::pair<int64_t, int64_t> assignBlockCosts(const ProfiParams &Params,
112845b15592Sspupyrev                                              const FlowBlock &Block) {
112945b15592Sspupyrev   // Modifying the weight of an unlikely block is expensive
113045b15592Sspupyrev   if (Block.IsUnlikely)
113145b15592Sspupyrev     return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
11327cc2493dSspupyrev 
113345b15592Sspupyrev   // Assign default values for the costs
113445b15592Sspupyrev   int64_t CostInc = Params.CostBlockInc;
113545b15592Sspupyrev   int64_t CostDec = Params.CostBlockDec;
113645b15592Sspupyrev   // Update the costs depending on the block metadata
113745b15592Sspupyrev   if (Block.HasUnknownWeight) {
113845b15592Sspupyrev     CostInc = Params.CostBlockUnknownInc;
113945b15592Sspupyrev     CostDec = 0;
114045b15592Sspupyrev   } else {
114145b15592Sspupyrev     // Increasing the count for "cold" blocks with zero initial count is more
114245b15592Sspupyrev     // expensive than for "hot" ones
114345b15592Sspupyrev     if (Block.Weight == 0)
114445b15592Sspupyrev       CostInc = Params.CostBlockZeroInc;
114545b15592Sspupyrev     // Modifying the count of the entry block is expensive
114645b15592Sspupyrev     if (Block.isEntry()) {
114745b15592Sspupyrev       CostInc = Params.CostBlockEntryInc;
114845b15592Sspupyrev       CostDec = Params.CostBlockEntryDec;
11497cc2493dSspupyrev     }
11507cc2493dSspupyrev   }
115145b15592Sspupyrev   return std::make_pair(CostInc, CostDec);
11527cc2493dSspupyrev }
11537cc2493dSspupyrev 
115445b15592Sspupyrev /// Assign costs for increasing/decreasing the jump counts.
assignJumpCosts(const ProfiParams & Params,const FlowJump & Jump)115545b15592Sspupyrev std::pair<int64_t, int64_t> assignJumpCosts(const ProfiParams &Params,
115645b15592Sspupyrev                                             const FlowJump &Jump) {
115745b15592Sspupyrev   // Modifying the weight of an unlikely jump is expensive
115845b15592Sspupyrev   if (Jump.IsUnlikely)
115945b15592Sspupyrev     return std::make_pair(Params.CostUnlikely, Params.CostUnlikely);
116045b15592Sspupyrev 
116145b15592Sspupyrev   // Assign default values for the costs
116245b15592Sspupyrev   int64_t CostInc = Params.CostJumpInc;
116345b15592Sspupyrev   int64_t CostDec = Params.CostJumpDec;
116445b15592Sspupyrev   // Update the costs depending on the block metadata
116545b15592Sspupyrev   if (Jump.Source + 1 == Jump.Target) {
116645b15592Sspupyrev     // Adjusting the fall-through branch
116745b15592Sspupyrev     CostInc = Params.CostJumpFTInc;
116845b15592Sspupyrev     CostDec = Params.CostJumpFTDec;
116945b15592Sspupyrev   }
117045b15592Sspupyrev   if (Jump.HasUnknownWeight) {
117145b15592Sspupyrev     // The cost is different for fall-through and non-fall-through branches
117245b15592Sspupyrev     if (Jump.Source + 1 == Jump.Target)
117345b15592Sspupyrev       CostInc = Params.CostJumpUnknownFTInc;
117445b15592Sspupyrev     else
117545b15592Sspupyrev       CostInc = Params.CostJumpUnknownInc;
117645b15592Sspupyrev     CostDec = 0;
117745b15592Sspupyrev   } else {
117845b15592Sspupyrev     assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight");
117945b15592Sspupyrev   }
118045b15592Sspupyrev   return std::make_pair(CostInc, CostDec);
118145b15592Sspupyrev }
118245b15592Sspupyrev 
118345b15592Sspupyrev /// Extract resulting block and edge counts from the flow network.
extractWeights(const ProfiParams & Params,MinCostMaxFlow & Network,FlowFunction & Func)118445b15592Sspupyrev void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
118545b15592Sspupyrev                     FlowFunction &Func) {
118645b15592Sspupyrev   uint64_t NumBlocks = Func.Blocks.size();
118745b15592Sspupyrev   uint64_t NumJumps = Func.Jumps.size();
118845b15592Sspupyrev 
11897cc2493dSspupyrev   // Extract resulting jump counts
119045b15592Sspupyrev   for (uint64_t J = 0; J < NumJumps; J++) {
119145b15592Sspupyrev     auto &Jump = Func.Jumps[J];
119245b15592Sspupyrev     uint64_t SrcOut = 2 * Jump.Source + 1;
119345b15592Sspupyrev     uint64_t DstIn = 2 * Jump.Target;
119445b15592Sspupyrev 
11957cc2493dSspupyrev     int64_t Flow = 0;
119645b15592Sspupyrev     int64_t AuxFlow = Network.getFlow(SrcOut, DstIn);
119745b15592Sspupyrev     if (Jump.Source != Jump.Target)
119845b15592Sspupyrev       Flow = int64_t(Jump.Weight) + AuxFlow;
119945b15592Sspupyrev     else
120045b15592Sspupyrev       Flow = int64_t(Jump.Weight) + (AuxFlow > 0 ? AuxFlow : 0);
120145b15592Sspupyrev 
12027cc2493dSspupyrev     Jump.Flow = Flow;
12037cc2493dSspupyrev     assert(Flow >= 0 && "negative jump flow");
12047cc2493dSspupyrev   }
120545b15592Sspupyrev 
120645b15592Sspupyrev   // Extract resulting block counts
120745b15592Sspupyrev   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
120845b15592Sspupyrev   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
120945b15592Sspupyrev   for (auto &Jump : Func.Jumps) {
121045b15592Sspupyrev     InFlow[Jump.Target] += Jump.Flow;
121145b15592Sspupyrev     OutFlow[Jump.Source] += Jump.Flow;
121245b15592Sspupyrev   }
121345b15592Sspupyrev   for (uint64_t B = 0; B < NumBlocks; B++) {
121445b15592Sspupyrev     auto &Block = Func.Blocks[B];
121545b15592Sspupyrev     Block.Flow = std::max(OutFlow[B], InFlow[B]);
121645b15592Sspupyrev   }
12177cc2493dSspupyrev }
12187cc2493dSspupyrev 
12197cc2493dSspupyrev #ifndef NDEBUG
122045b15592Sspupyrev /// Verify that the provided block/jump weights are as expected.
verifyInput(const FlowFunction & Func)122145b15592Sspupyrev void verifyInput(const FlowFunction &Func) {
12222ee4ddaeSspupyrev   // Verify entry and exit blocks
122345b15592Sspupyrev   assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
12242ee4ddaeSspupyrev   size_t NumExitBlocks = 0;
122545b15592Sspupyrev   for (size_t I = 1; I < Func.Blocks.size(); I++) {
122645b15592Sspupyrev     assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
12272ee4ddaeSspupyrev     if (Func.Blocks[I].isExit())
12282ee4ddaeSspupyrev       NumExitBlocks++;
12292ee4ddaeSspupyrev   }
12302ee4ddaeSspupyrev   assert(NumExitBlocks > 0 && "cannot find exit blocks");
12312ee4ddaeSspupyrev 
12322ee4ddaeSspupyrev   // Verify that there are no parallel edges
12332ee4ddaeSspupyrev   for (auto &Block : Func.Blocks) {
12342ee4ddaeSspupyrev     std::unordered_set<uint64_t> UniqueSuccs;
12352ee4ddaeSspupyrev     for (auto &Jump : Block.SuccJumps) {
12362ee4ddaeSspupyrev       auto It = UniqueSuccs.insert(Jump->Target);
12372ee4ddaeSspupyrev       assert(It.second && "input CFG contains parallel edges");
12382ee4ddaeSspupyrev     }
123945b15592Sspupyrev   }
124045b15592Sspupyrev   // Verify CFG jumps
124145b15592Sspupyrev   for (auto &Block : Func.Blocks) {
124245b15592Sspupyrev     assert((!Block.isEntry() || !Block.isExit()) &&
124345b15592Sspupyrev            "a block cannot be an entry and an exit");
124445b15592Sspupyrev   }
124545b15592Sspupyrev   // Verify input block weights
124645b15592Sspupyrev   for (auto &Block : Func.Blocks) {
124745b15592Sspupyrev     assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
124845b15592Sspupyrev            "non-zero weight of a block w/o weight except for an entry");
124945b15592Sspupyrev   }
125045b15592Sspupyrev   // Verify input jump weights
125145b15592Sspupyrev   for (auto &Jump : Func.Jumps) {
125245b15592Sspupyrev     assert((!Jump.HasUnknownWeight || Jump.Weight == 0) &&
125345b15592Sspupyrev            "non-zero weight of a jump w/o weight");
125445b15592Sspupyrev   }
125545b15592Sspupyrev }
125645b15592Sspupyrev 
125745b15592Sspupyrev /// Verify that the computed flow values satisfy flow conservation rules.
verifyOutput(const FlowFunction & Func)125845b15592Sspupyrev void verifyOutput(const FlowFunction &Func) {
12597cc2493dSspupyrev   const uint64_t NumBlocks = Func.Blocks.size();
12607cc2493dSspupyrev   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
12617cc2493dSspupyrev   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
126250724716SKazu Hirata   for (const auto &Jump : Func.Jumps) {
12637cc2493dSspupyrev     InFlow[Jump.Target] += Jump.Flow;
12647cc2493dSspupyrev     OutFlow[Jump.Source] += Jump.Flow;
12657cc2493dSspupyrev   }
12667cc2493dSspupyrev 
12677cc2493dSspupyrev   uint64_t TotalInFlow = 0;
12687cc2493dSspupyrev   uint64_t TotalOutFlow = 0;
12697cc2493dSspupyrev   for (uint64_t I = 0; I < NumBlocks; I++) {
12707cc2493dSspupyrev     auto &Block = Func.Blocks[I];
12717cc2493dSspupyrev     if (Block.isEntry()) {
12727cc2493dSspupyrev       TotalInFlow += Block.Flow;
12737cc2493dSspupyrev       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
12747cc2493dSspupyrev     } else if (Block.isExit()) {
12757cc2493dSspupyrev       TotalOutFlow += Block.Flow;
12767cc2493dSspupyrev       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
12777cc2493dSspupyrev     } else {
12787cc2493dSspupyrev       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
12797cc2493dSspupyrev       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
12807cc2493dSspupyrev     }
12817cc2493dSspupyrev   }
12827cc2493dSspupyrev   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
128398dd2f9eSspupyrev 
128498dd2f9eSspupyrev   // Verify that there are no isolated flow components
128598dd2f9eSspupyrev   // One could modify FlowFunction to hold edges indexed by the sources, which
128698dd2f9eSspupyrev   // will avoid a creation of the object
128798dd2f9eSspupyrev   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
128850724716SKazu Hirata   for (const auto &Jump : Func.Jumps) {
128998dd2f9eSspupyrev     if (Jump.Flow > 0) {
129098dd2f9eSspupyrev       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
129198dd2f9eSspupyrev     }
129298dd2f9eSspupyrev   }
129398dd2f9eSspupyrev 
129493a2c291Sspupyrev   // Run BFS from the source along edges with positive flow
129598dd2f9eSspupyrev   std::queue<uint64_t> Queue;
12965f4ae564SJan Svoboda   auto Visited = BitVector(NumBlocks, false);
129798dd2f9eSspupyrev   Queue.push(Func.Entry);
129898dd2f9eSspupyrev   Visited[Func.Entry] = true;
129998dd2f9eSspupyrev   while (!Queue.empty()) {
130098dd2f9eSspupyrev     uint64_t Src = Queue.front();
130198dd2f9eSspupyrev     Queue.pop();
130298dd2f9eSspupyrev     for (uint64_t Dst : PositiveFlowEdges[Src]) {
130398dd2f9eSspupyrev       if (!Visited[Dst]) {
130498dd2f9eSspupyrev         Queue.push(Dst);
130598dd2f9eSspupyrev         Visited[Dst] = true;
130698dd2f9eSspupyrev       }
130798dd2f9eSspupyrev     }
130898dd2f9eSspupyrev   }
130998dd2f9eSspupyrev 
131098dd2f9eSspupyrev   // Verify that every block that has a positive flow is reached from the source
131198dd2f9eSspupyrev   // along edges with a positive flow
131298dd2f9eSspupyrev   for (uint64_t I = 0; I < NumBlocks; I++) {
131398dd2f9eSspupyrev     auto &Block = Func.Blocks[I];
131498dd2f9eSspupyrev     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
131598dd2f9eSspupyrev   }
13167cc2493dSspupyrev }
13177cc2493dSspupyrev #endif
13187cc2493dSspupyrev 
13197cc2493dSspupyrev } // end of anonymous namespace
13207cc2493dSspupyrev 
13212ee4ddaeSspupyrev /// Apply the profile inference algorithm for a given function and provided
13222ee4ddaeSspupyrev /// profi options
applyFlowInference(const ProfiParams & Params,FlowFunction & Func)132361eb12e1Sspupyrev void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) {
13242ee4ddaeSspupyrev   // Check if the function has samples and assign initial flow values
13252ee4ddaeSspupyrev   bool HasSamples = false;
13262ee4ddaeSspupyrev   for (FlowBlock &Block : Func.Blocks) {
13272ee4ddaeSspupyrev     if (Block.Weight > 0)
13282ee4ddaeSspupyrev       HasSamples = true;
13292ee4ddaeSspupyrev     Block.Flow = Block.Weight;
13302ee4ddaeSspupyrev   }
13312ee4ddaeSspupyrev   for (FlowJump &Jump : Func.Jumps) {
13322ee4ddaeSspupyrev     if (Jump.Weight > 0)
13332ee4ddaeSspupyrev       HasSamples = true;
13342ee4ddaeSspupyrev     Jump.Flow = Jump.Weight;
13352ee4ddaeSspupyrev   }
13362ee4ddaeSspupyrev 
13372ee4ddaeSspupyrev   // Quit early for functions with a single block or ones w/o samples
13382ee4ddaeSspupyrev   if (Func.Blocks.size() <= 1 || !HasSamples)
13392ee4ddaeSspupyrev     return;
13402ee4ddaeSspupyrev 
134145b15592Sspupyrev #ifndef NDEBUG
134245b15592Sspupyrev   // Verify the input data
134345b15592Sspupyrev   verifyInput(Func);
134445b15592Sspupyrev #endif
134545b15592Sspupyrev 
13467cc2493dSspupyrev   // Create and apply an inference network model
134761eb12e1Sspupyrev   auto InferenceNetwork = MinCostMaxFlow(Params);
134861eb12e1Sspupyrev   initializeNetwork(Params, InferenceNetwork, Func);
13497cc2493dSspupyrev   InferenceNetwork.run();
13507cc2493dSspupyrev 
13517cc2493dSspupyrev   // Extract flow values for every block and every edge
135245b15592Sspupyrev   extractWeights(Params, InferenceNetwork, Func);
13537cc2493dSspupyrev 
135498dd2f9eSspupyrev   // Post-processing adjustments to the flow
135561eb12e1Sspupyrev   auto Adjuster = FlowAdjuster(Params, Func);
135698dd2f9eSspupyrev   Adjuster.run();
135798dd2f9eSspupyrev 
13587cc2493dSspupyrev #ifndef NDEBUG
13597cc2493dSspupyrev   // Verify the result
136045b15592Sspupyrev   verifyOutput(Func);
13617cc2493dSspupyrev #endif
13627cc2493dSspupyrev }
136361eb12e1Sspupyrev 
136461eb12e1Sspupyrev /// Apply the profile inference algorithm for a given flow function
applyFlowInference(FlowFunction & Func)136561eb12e1Sspupyrev void llvm::applyFlowInference(FlowFunction &Func) {
136661eb12e1Sspupyrev   ProfiParams Params;
136761eb12e1Sspupyrev   // Set the params from the command-line flags.
136861eb12e1Sspupyrev   Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
136961eb12e1Sspupyrev   Params.RebalanceUnknown = SampleProfileRebalanceUnknown;
137061eb12e1Sspupyrev   Params.JoinIslands = SampleProfileJoinIslands;
137161eb12e1Sspupyrev   Params.CostBlockInc = SampleProfileProfiCostBlockInc;
137261eb12e1Sspupyrev   Params.CostBlockDec = SampleProfileProfiCostBlockDec;
137361eb12e1Sspupyrev   Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;
137461eb12e1Sspupyrev   Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;
137561eb12e1Sspupyrev   Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;
137661eb12e1Sspupyrev   Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;
137761eb12e1Sspupyrev 
137861eb12e1Sspupyrev   applyFlowInference(Params, Func);
137961eb12e1Sspupyrev }
1380