xref: /llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp (revision d0166c617d77b9dd57e605cfb016438f202e9c9c)
1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a profile inference algorithm. Given an incomplete and
10 // possibly imprecise block counts, the algorithm reconstructs realistic block
11 // and edge counts that satisfy flow conservation rules, while minimally modify
12 // input block counts.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Utils/SampleProfileInference.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include <queue>
21 #include <set>
22 #include <stack>
23 
24 using namespace llvm;
25 #define DEBUG_TYPE "sample-profile-inference"
26 
27 namespace {
28 
29 static cl::opt<bool> SampleProfileEvenCountDistribution(
30     "sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
31     cl::desc("Try to evenly distribute counts when there are multiple equally "
32              "likely options."));
33 
34 static cl::opt<unsigned> SampleProfileMaxDfsCalls(
35     "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
36     cl::desc("Maximum number of dfs iterations for even count distribution."));
37 
38 static cl::opt<unsigned> SampleProfileProfiCostInc(
39     "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
40     cl::desc("A cost of increasing a block's count by one."));
41 
42 static cl::opt<unsigned> SampleProfileProfiCostDec(
43     "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
44     cl::desc("A cost of decreasing a block's count by one."));
45 
46 static cl::opt<unsigned> SampleProfileProfiCostIncZero(
47     "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
48     cl::desc("A cost of increasing a count of zero-weight block by one."));
49 
50 static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
51     "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
52     cl::desc("A cost of increasing the entry block's count by one."));
53 
54 static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
55     "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
56     cl::desc("A cost of decreasing the entry block's count by one."));
57 
58 /// A value indicating an infinite flow/capacity/weight of a block/edge.
59 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
60 /// during the execution.
61 static constexpr int64_t INF = ((int64_t)1) << 50;
62 
63 /// The minimum-cost maximum flow algorithm.
64 ///
65 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
66 /// network using a modified version of the classical Moore-Bellman-Ford
67 /// approach. The algorithm applies a number of augmentation iterations in which
68 /// flow is sent along paths of positive capacity from the source to the sink.
69 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
70 /// where m is the number of edges, n is the number of vertices, and v(f) is the
71 /// value of the maximum flow. However, the observed running time on typical
72 /// instances is sub-quadratic, that is, o(n^2).
73 ///
74 /// The input is a set of edges with specified costs and capacities, and a pair
75 /// of nodes (source and sink). The output is the flow along each edge of the
76 /// minimum total cost respecting the given edge capacities.
77 class MinCostMaxFlow {
78 public:
79   // Initialize algorithm's data structures for a network of a given size.
80   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
81     Source = SourceNode;
82     Target = SinkNode;
83 
84     Nodes = std::vector<Node>(NodeCount);
85     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
86     if (SampleProfileEvenCountDistribution)
87       AugmentingEdges =
88           std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
89   }
90 
91   // Run the algorithm.
92   int64_t run() {
93     // Iteratively find an augmentation path/dag in the network and send the
94     // flow along its edges
95     size_t AugmentationIters = applyFlowAugmentation();
96 
97     // Compute the total flow and its cost
98     int64_t TotalCost = 0;
99     int64_t TotalFlow = 0;
100     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
101       for (auto &Edge : Edges[Src]) {
102         if (Edge.Flow > 0) {
103           TotalCost += Edge.Cost * Edge.Flow;
104           if (Src == Source)
105             TotalFlow += Edge.Flow;
106         }
107       }
108     }
109     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
110                       << " iterations with " << TotalFlow << " total flow"
111                       << " of " << TotalCost << " cost\n");
112     (void)TotalFlow;
113     (void)AugmentationIters;
114     return TotalCost;
115   }
116 
117   /// Adding an edge to the network with a specified capacity and a cost.
118   /// Multiple edges between a pair of nodes are allowed but self-edges
119   /// are not supported.
120   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
121     assert(Capacity > 0 && "adding an edge of zero capacity");
122     assert(Src != Dst && "loop edge are not supported");
123 
124     Edge SrcEdge;
125     SrcEdge.Dst = Dst;
126     SrcEdge.Cost = Cost;
127     SrcEdge.Capacity = Capacity;
128     SrcEdge.Flow = 0;
129     SrcEdge.RevEdgeIndex = Edges[Dst].size();
130 
131     Edge DstEdge;
132     DstEdge.Dst = Src;
133     DstEdge.Cost = -Cost;
134     DstEdge.Capacity = 0;
135     DstEdge.Flow = 0;
136     DstEdge.RevEdgeIndex = Edges[Src].size();
137 
138     Edges[Src].push_back(SrcEdge);
139     Edges[Dst].push_back(DstEdge);
140   }
141 
142   /// Adding an edge to the network of infinite capacity and a given cost.
143   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
144     addEdge(Src, Dst, INF, Cost);
145   }
146 
147   /// Get the total flow from a given source node.
148   /// Returns a list of pairs (target node, amount of flow to the target).
149   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
150     std::vector<std::pair<uint64_t, int64_t>> Flow;
151     for (const auto &Edge : Edges[Src]) {
152       if (Edge.Flow > 0)
153         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
154     }
155     return Flow;
156   }
157 
158   /// Get the total flow between a pair of nodes.
159   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
160     int64_t Flow = 0;
161     for (const auto &Edge : Edges[Src]) {
162       if (Edge.Dst == Dst) {
163         Flow += Edge.Flow;
164       }
165     }
166     return Flow;
167   }
168 
169   /// A cost of taking an unlikely jump.
170   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
171   /// Minimum BaseDistance for the jump distance values in island joining.
172   static constexpr uint64_t MinBaseDistance = 10000;
173 
174 private:
175   /// Iteratively find an augmentation path/dag in the network and send the
176   /// flow along its edges. The method returns the number of applied iterations.
177   size_t applyFlowAugmentation() {
178     size_t AugmentationIters = 0;
179     while (findAugmentingPath()) {
180       uint64_t PathCapacity = computeAugmentingPathCapacity();
181       while (PathCapacity > 0) {
182         bool Progress = false;
183         if (SampleProfileEvenCountDistribution) {
184           // Identify node/edge candidates for augmentation
185           identifyShortestEdges(PathCapacity);
186 
187           // Find an augmenting DAG
188           auto AugmentingOrder = findAugmentingDAG();
189 
190           // Apply the DAG augmentation
191           Progress = augmentFlowAlongDAG(AugmentingOrder);
192           PathCapacity = computeAugmentingPathCapacity();
193         }
194 
195         if (!Progress) {
196           augmentFlowAlongPath(PathCapacity);
197           PathCapacity = 0;
198         }
199 
200         AugmentationIters++;
201       }
202     }
203     return AugmentationIters;
204   }
205 
206   /// Compute the capacity of the cannonical augmenting path. If the path is
207   /// saturated (that is, no flow can be sent along the path), then return 0.
208   uint64_t computeAugmentingPathCapacity() {
209     uint64_t PathCapacity = INF;
210     uint64_t Now = Target;
211     while (Now != Source) {
212       uint64_t Pred = Nodes[Now].ParentNode;
213       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
214 
215       assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
216       uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
217       PathCapacity = std::min(PathCapacity, EdgeCapacity);
218 
219       Now = Pred;
220     }
221     return PathCapacity;
222   }
223 
224   /// Check for existence of an augmenting path with a positive capacity.
225   bool findAugmentingPath() {
226     // Initialize data structures
227     for (auto &Node : Nodes) {
228       Node.Distance = INF;
229       Node.ParentNode = uint64_t(-1);
230       Node.ParentEdgeIndex = uint64_t(-1);
231       Node.Taken = false;
232     }
233 
234     std::queue<uint64_t> Queue;
235     Queue.push(Source);
236     Nodes[Source].Distance = 0;
237     Nodes[Source].Taken = true;
238     while (!Queue.empty()) {
239       uint64_t Src = Queue.front();
240       Queue.pop();
241       Nodes[Src].Taken = false;
242       // Although the residual network contains edges with negative costs
243       // (in particular, backward edges), it can be shown that there are no
244       // negative-weight cycles and the following two invariants are maintained:
245       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
246       // where Dist is the length of the shortest path between two nodes. This
247       // allows to prune the search-space of the path-finding algorithm using
248       // the following early-stop criteria:
249       // -- If we find a path with zero-distance from Source to Target, stop the
250       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
251       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
252       //    process node V, as it is guaranteed _not_ to be on a shortest path
253       //    from Source to Target; it follows from inequalities
254       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
255       //                         >= Dist[Source, V]
256       if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
257         break;
258       if (Nodes[Src].Distance > Nodes[Target].Distance)
259         continue;
260 
261       // Process adjacent edges
262       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
263         auto &Edge = Edges[Src][EdgeIdx];
264         if (Edge.Flow < Edge.Capacity) {
265           uint64_t Dst = Edge.Dst;
266           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
267           if (Nodes[Dst].Distance > NewDistance) {
268             // Update the distance and the parent node/edge
269             Nodes[Dst].Distance = NewDistance;
270             Nodes[Dst].ParentNode = Src;
271             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
272             // Add the node to the queue, if it is not there yet
273             if (!Nodes[Dst].Taken) {
274               Queue.push(Dst);
275               Nodes[Dst].Taken = true;
276             }
277           }
278         }
279       }
280     }
281 
282     return Nodes[Target].Distance != INF;
283   }
284 
285   /// Update the current flow along the augmenting path.
286   void augmentFlowAlongPath(uint64_t PathCapacity) {
287     assert(PathCapacity > 0 && "found an incorrect augmenting path");
288     uint64_t Now = Target;
289     while (Now != Source) {
290       uint64_t Pred = Nodes[Now].ParentNode;
291       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
292       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
293 
294       Edge.Flow += PathCapacity;
295       RevEdge.Flow -= PathCapacity;
296 
297       Now = Pred;
298     }
299   }
300 
301   /// Find an Augmenting DAG order using a modified version of DFS in which we
302   /// can visit a node multiple times. In the DFS search, when scanning each
303   /// edge out of a node, continue search at Edge.Dst endpoint if it has not
304   /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
305   /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
306   /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
307   /// that starts with Source and ends with Target.
308   std::vector<uint64_t> findAugmentingDAG() {
309     // We use a stack based implemenation of DFS to avoid recursion.
310     // Defining DFS data structures:
311     // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
312     //  - we are currently visiting Nodes[NodeIdx] and
313     //  - the next edge to scan is Edges[NodeIdx][EdgeIdx]
314     typedef std::pair<uint64_t, uint64_t> StackItemType;
315     std::stack<StackItemType> Stack;
316     std::vector<uint64_t> AugmentingOrder;
317 
318     // Phase 0: Initialize Node attributes and Time for DFS run
319     for (auto &Node : Nodes) {
320       Node.Discovery = 0;
321       Node.Finish = 0;
322       Node.NumCalls = 0;
323       Node.Taken = false;
324     }
325     uint64_t Time = 0;
326     // Mark Target as Taken
327     // Taken attribute will be propagated backwards from Target towards Source
328     Nodes[Target].Taken = true;
329 
330     // Phase 1: Start DFS traversal from Source
331     Stack.emplace(Source, 0);
332     Nodes[Source].Discovery = ++Time;
333     while (!Stack.empty()) {
334       auto NodeIdx = Stack.top().first;
335       auto EdgeIdx = Stack.top().second;
336 
337       // If we haven't scanned all edges out of NodeIdx, continue scanning
338       if (EdgeIdx < Edges[NodeIdx].size()) {
339         auto &Edge = Edges[NodeIdx][EdgeIdx];
340         auto &Dst = Nodes[Edge.Dst];
341         Stack.top().second++;
342 
343         if (Edge.OnShortestPath) {
344           // If we haven't seen Edge.Dst so far, continue DFS search there
345           if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
346             Dst.Discovery = ++Time;
347             Stack.emplace(Edge.Dst, 0);
348             Dst.NumCalls++;
349           } else if (Dst.Taken && Dst.Finish != 0) {
350             // Else, if Edge.Dst already have a path to Target, so that NodeIdx
351             Nodes[NodeIdx].Taken = true;
352           }
353         }
354       } else {
355         // If we are done scanning all edge out of NodeIdx
356         Stack.pop();
357         // If we haven't found a path from NodeIdx to Target, forget about it
358         if (!Nodes[NodeIdx].Taken) {
359           Nodes[NodeIdx].Discovery = 0;
360         } else {
361           // If we have found a path from NodeIdx to Target, then finish NodeIdx
362           // and propagate Taken flag to DFS parent unless at the Source
363           Nodes[NodeIdx].Finish = ++Time;
364           // NodeIdx == Source if and only if the stack is empty
365           if (NodeIdx != Source) {
366             assert(!Stack.empty() && "empty stack while running dfs");
367             Nodes[Stack.top().first].Taken = true;
368           }
369           AugmentingOrder.push_back(NodeIdx);
370         }
371       }
372     }
373     // Nodes are collected decreasing Finish time, so the order is reversed
374     std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
375 
376     // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
377     for (size_t Src : AugmentingOrder) {
378       AugmentingEdges[Src].clear();
379       for (auto &Edge : Edges[Src]) {
380         uint64_t Dst = Edge.Dst;
381         if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
382             Nodes[Dst].Finish < Nodes[Src].Finish) {
383           AugmentingEdges[Src].push_back(&Edge);
384         }
385       }
386       assert((Src == Target || !AugmentingEdges[Src].empty()) &&
387              "incorrectly constructed augmenting edges");
388     }
389 
390     return AugmentingOrder;
391   }
392 
393   /// Update the current flow along the given (acyclic) subgraph specified by
394   /// the vertex order, AugmentingOrder. The objective is to send as much flow
395   /// as possible while evenly distributing flow among successors of each node.
396   /// After the update at least one edge is saturated.
397   bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
398     // Phase 0: Initialization
399     for (uint64_t Src : AugmentingOrder) {
400       Nodes[Src].FracFlow = 0;
401       Nodes[Src].IntFlow = 0;
402       for (auto &Edge : AugmentingEdges[Src]) {
403         Edge->AugmentedFlow = 0;
404       }
405     }
406 
407     // Phase 1: Send a unit of fractional flow along the DAG
408     uint64_t MaxFlowAmount = INF;
409     Nodes[Source].FracFlow = 1.0;
410     for (uint64_t Src : AugmentingOrder) {
411       assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
412              "incorrectly computed fractional flow");
413       // Distribute flow evenly among successors of Src
414       uint64_t Degree = AugmentingEdges[Src].size();
415       for (auto &Edge : AugmentingEdges[Src]) {
416         double EdgeFlow = Nodes[Src].FracFlow / Degree;
417         Nodes[Edge->Dst].FracFlow += EdgeFlow;
418         if (Edge->Capacity == INF)
419           continue;
420         uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
421         MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
422       }
423     }
424     // Stop early if we cannot send any (integral) flow from Source to Target
425     if (MaxFlowAmount == 0)
426       return false;
427 
428     // Phase 2: Send an integral flow of MaxFlowAmount
429     Nodes[Source].IntFlow = MaxFlowAmount;
430     for (uint64_t Src : AugmentingOrder) {
431       if (Src == Target)
432         break;
433       // Distribute flow evenly among successors of Src, rounding up to make
434       // sure all flow is sent
435       uint64_t Degree = AugmentingEdges[Src].size();
436       // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
437       uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
438       for (auto &Edge : AugmentingEdges[Src]) {
439         uint64_t Dst = Edge->Dst;
440         uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
441         EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
442         Nodes[Dst].IntFlow += EdgeFlow;
443         Nodes[Src].IntFlow -= EdgeFlow;
444         Edge->AugmentedFlow += EdgeFlow;
445       }
446     }
447     assert(Nodes[Target].IntFlow <= MaxFlowAmount);
448     Nodes[Target].IntFlow = 0;
449 
450     // Phase 3: Send excess flow back traversing the nodes backwards.
451     // Because of rounding, not all flow can be sent along the edges of Src.
452     // Hence, sending the remaining flow back to maintain flow conservation
453     for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
454       uint64_t Src = AugmentingOrder[Idx - 1];
455       // Try to send excess flow back along each edge.
456       // Make sure we only send back flow we just augmented (AugmentedFlow).
457       for (auto &Edge : AugmentingEdges[Src]) {
458         uint64_t Dst = Edge->Dst;
459         if (Nodes[Dst].IntFlow == 0)
460           continue;
461         uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
462         Nodes[Dst].IntFlow -= EdgeFlow;
463         Nodes[Src].IntFlow += EdgeFlow;
464         Edge->AugmentedFlow -= EdgeFlow;
465       }
466     }
467 
468     // Phase 4: Update flow values along all edges
469     bool HasSaturatedEdges = false;
470     for (uint64_t Src : AugmentingOrder) {
471       // Verify that we have sent all the excess flow from the node
472       assert(Src == Source || Nodes[Src].IntFlow == 0);
473       for (auto &Edge : AugmentingEdges[Src]) {
474         assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
475         // Update flow values along the edge and its reverse copy
476         auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
477         Edge->Flow += Edge->AugmentedFlow;
478         RevEdge.Flow -= Edge->AugmentedFlow;
479         if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
480           HasSaturatedEdges = true;
481       }
482     }
483 
484     // The augmentation is successful iff at least one edge becomes saturated
485     return HasSaturatedEdges;
486   }
487 
488   /// Identify candidate (shortest) edges for augmentation.
489   void identifyShortestEdges(uint64_t PathCapacity) {
490     assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
491     // To make sure the augmentation DAG contains only edges with large residual
492     // capacity, we prune all edges whose capacity is below a fraction of
493     // the capacity of the augmented path.
494     // (All edges of the path itself are always in the DAG)
495     uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
496 
497     // Decide which edges are on a shortest path from Source to Target
498     for (size_t Src = 0; Src < Nodes.size(); Src++) {
499       // An edge cannot be augmenting if the endpoint has large distance
500       if (Nodes[Src].Distance > Nodes[Target].Distance)
501         continue;
502 
503       for (auto &Edge : Edges[Src]) {
504         uint64_t Dst = Edge.Dst;
505         Edge.OnShortestPath =
506             Src != Target && Dst != Source &&
507             Nodes[Dst].Distance <= Nodes[Target].Distance &&
508             Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
509             Edge.Capacity > Edge.Flow &&
510             uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
511       }
512     }
513   }
514 
515   /// A node in a flow network.
516   struct Node {
517     /// The cost of the cheapest path from the source to the current node.
518     int64_t Distance;
519     /// The node preceding the current one in the path.
520     uint64_t ParentNode;
521     /// The index of the edge between ParentNode and the current node.
522     uint64_t ParentEdgeIndex;
523     /// An indicator of whether the current node is in a queue.
524     bool Taken;
525 
526     /// Data fields utilized in DAG-augmentation:
527     /// Fractional flow.
528     double FracFlow;
529     /// Integral flow.
530     uint64_t IntFlow;
531     /// Discovery time.
532     uint64_t Discovery;
533     /// Finish time.
534     uint64_t Finish;
535     /// NumCalls.
536     uint64_t NumCalls;
537   };
538 
539   /// An edge in a flow network.
540   struct Edge {
541     /// The cost of the edge.
542     int64_t Cost;
543     /// The capacity of the edge.
544     int64_t Capacity;
545     /// The current flow on the edge.
546     int64_t Flow;
547     /// The destination node of the edge.
548     uint64_t Dst;
549     /// The index of the reverse edge between Dst and the current node.
550     uint64_t RevEdgeIndex;
551 
552     /// Data fields utilized in DAG-augmentation:
553     /// Whether the edge is currently on a shortest path from Source to Target.
554     bool OnShortestPath;
555     /// Extra flow along the edge.
556     uint64_t AugmentedFlow;
557   };
558 
559   /// The set of network nodes.
560   std::vector<Node> Nodes;
561   /// The set of network edges.
562   std::vector<std::vector<Edge>> Edges;
563   /// Source node of the flow.
564   uint64_t Source;
565   /// Target (sink) node of the flow.
566   uint64_t Target;
567   /// Augmenting edges.
568   std::vector<std::vector<Edge *>> AugmentingEdges;
569 };
570 
571 /// A post-processing adjustment of control flow. It applies two steps by
572 /// rerouting some flow and making it more realistic:
573 ///
574 /// - First, it removes all isolated components ("islands") with a positive flow
575 ///   that are unreachable from the entry block. For every such component, we
576 ///   find the shortest from the entry to an exit passing through the component,
577 ///   and increase the flow by one unit along the path.
578 ///
579 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
580 ///   with no sampled counts. Then it rebalnces the flow that goes through such
581 ///   a subgraph so that each branch is taken with probability 50%.
582 ///   An unknown subgraph is such that for every two nodes u and v:
583 ///     - u dominates v and u is not unknown;
584 ///     - v post-dominates u; and
585 ///     - all inner-nodes of all (u,v)-paths are unknown.
586 ///
587 class FlowAdjuster {
588 public:
589   FlowAdjuster(FlowFunction &Func) : Func(Func) {
590     assert(Func.Blocks[Func.Entry].isEntry() &&
591            "incorrect index of the entry block");
592   }
593 
594   // Run the post-processing
595   void run() {
596     /// Adjust the flow to get rid of isolated components.
597     joinIsolatedComponents();
598 
599     /// Rebalance the flow inside unknown subgraphs.
600     rebalanceUnknownSubgraphs();
601   }
602 
603 private:
604   void joinIsolatedComponents() {
605     // Find blocks that are reachable from the source
606     auto Visited = BitVector(NumBlocks(), false);
607     findReachable(Func.Entry, Visited);
608 
609     // Iterate over all non-reachable blocks and adjust their weights
610     for (uint64_t I = 0; I < NumBlocks(); I++) {
611       auto &Block = Func.Blocks[I];
612       if (Block.Flow > 0 && !Visited[I]) {
613         // Find a path from the entry to an exit passing through the block I
614         auto Path = findShortestPath(I);
615         // Increase the flow along the path
616         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
617                "incorrectly computed path adjusting control flow");
618         Func.Blocks[Func.Entry].Flow += 1;
619         for (auto &Jump : Path) {
620           Jump->Flow += 1;
621           Func.Blocks[Jump->Target].Flow += 1;
622           // Update reachability
623           findReachable(Jump->Target, Visited);
624         }
625       }
626     }
627   }
628 
629   /// Run BFS from a given block along the jumps with a positive flow and mark
630   /// all reachable blocks.
631   void findReachable(uint64_t Src, BitVector &Visited) {
632     if (Visited[Src])
633       return;
634     std::queue<uint64_t> Queue;
635     Queue.push(Src);
636     Visited[Src] = true;
637     while (!Queue.empty()) {
638       Src = Queue.front();
639       Queue.pop();
640       for (auto *Jump : Func.Blocks[Src].SuccJumps) {
641         uint64_t Dst = Jump->Target;
642         if (Jump->Flow > 0 && !Visited[Dst]) {
643           Queue.push(Dst);
644           Visited[Dst] = true;
645         }
646       }
647     }
648   }
649 
650   /// Find the shortest path from the entry block to an exit block passing
651   /// through a given block.
652   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
653     // A path from the entry block to BlockIdx
654     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
655     // A path from BlockIdx to an exit block
656     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
657 
658     // Concatenate the two paths
659     std::vector<FlowJump *> Result;
660     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
661     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
662     return Result;
663   }
664 
665   /// Apply the Dijkstra algorithm to find the shortest path from a given
666   /// Source to a given Target block.
667   /// If Target == -1, then the path ends at an exit block.
668   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
669     // Quit early, if possible
670     if (Source == Target)
671       return std::vector<FlowJump *>();
672     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
673       return std::vector<FlowJump *>();
674 
675     // Initialize data structures
676     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
677     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
678     Distance[Source] = 0;
679     std::set<std::pair<uint64_t, uint64_t>> Queue;
680     Queue.insert(std::make_pair(Distance[Source], Source));
681 
682     // Run the Dijkstra algorithm
683     while (!Queue.empty()) {
684       uint64_t Src = Queue.begin()->second;
685       Queue.erase(Queue.begin());
686       // If we found a solution, quit early
687       if (Src == Target ||
688           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
689         break;
690 
691       for (auto *Jump : Func.Blocks[Src].SuccJumps) {
692         uint64_t Dst = Jump->Target;
693         int64_t JumpDist = jumpDistance(Jump);
694         if (Distance[Dst] > Distance[Src] + JumpDist) {
695           Queue.erase(std::make_pair(Distance[Dst], Dst));
696 
697           Distance[Dst] = Distance[Src] + JumpDist;
698           Parent[Dst] = Jump;
699 
700           Queue.insert(std::make_pair(Distance[Dst], Dst));
701         }
702       }
703     }
704     // If Target is not provided, find the closest exit block
705     if (Target == AnyExitBlock) {
706       for (uint64_t I = 0; I < NumBlocks(); I++) {
707         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
708           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
709             Target = I;
710           }
711         }
712       }
713     }
714     assert(Parent[Target] != nullptr && "a path does not exist");
715 
716     // Extract the constructed path
717     std::vector<FlowJump *> Result;
718     uint64_t Now = Target;
719     while (Now != Source) {
720       assert(Now == Parent[Now]->Target && "incorrect parent jump");
721       Result.push_back(Parent[Now]);
722       Now = Parent[Now]->Source;
723     }
724     // Reverse the path, since it is extracted from Target to Source
725     std::reverse(Result.begin(), Result.end());
726     return Result;
727   }
728 
729   /// A distance of a path for a given jump.
730   /// In order to incite the path to use blocks/jumps with large positive flow,
731   /// and avoid changing branch probability of outgoing edges drastically,
732   /// set the jump distance so as:
733   ///   - to minimize the number of unlikely jumps used and subject to that,
734   ///   - to minimize the number of Flow == 0 jumps used and subject to that,
735   ///   - minimizes total multiplicative Flow increase for the remaining edges.
736   /// To capture this objective with integer distances, we round off fractional
737   /// parts to a multiple of 1 / BaseDistance.
738   int64_t jumpDistance(FlowJump *Jump) const {
739     uint64_t BaseDistance =
740         std::max(MinCostMaxFlow::MinBaseDistance,
741                  std::min(Func.Blocks[Func.Entry].Flow,
742                           MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
743     if (Jump->IsUnlikely)
744       return MinCostMaxFlow::AuxCostUnlikely;
745     if (Jump->Flow > 0)
746       return BaseDistance + BaseDistance / Jump->Flow;
747     return BaseDistance * NumBlocks();
748   };
749 
750   uint64_t NumBlocks() const { return Func.Blocks.size(); }
751 
752   /// Rebalance unknown subgraphs so that the flow is split evenly across the
753   /// outgoing branches of every block of the subgraph. The method iterates over
754   /// blocks with known weight and identifies unknown subgraphs rooted at the
755   /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
756   void rebalanceUnknownSubgraphs() {
757     // Try to find unknown subgraphs from each block
758     for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
759       auto SrcBlock = &Func.Blocks[I];
760       // Verify if rebalancing rooted at SrcBlock is feasible
761       if (!canRebalanceAtRoot(SrcBlock))
762         continue;
763 
764       // Find an unknown subgraphs starting at SrcBlock. Along the way,
765       // fill in known destinations and intermediate unknown blocks.
766       std::vector<FlowBlock *> UnknownBlocks;
767       std::vector<FlowBlock *> KnownDstBlocks;
768       findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
769 
770       // Verify if rebalancing of the subgraph is feasible. If the search is
771       // successful, find the unique destination block (which can be null)
772       FlowBlock *DstBlock = nullptr;
773       if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
774                                 DstBlock))
775         continue;
776 
777       // We cannot rebalance subgraphs containing cycles among unknown blocks
778       if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
779         continue;
780 
781       // Rebalance the flow
782       rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
783     }
784   }
785 
786   /// Verify if rebalancing rooted at a given block is possible.
787   bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
788     // Do not attempt to find unknown subgraphs from an unknown or a
789     // zero-flow block
790     if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
791       return false;
792 
793     // Do not attempt to process subgraphs from a block w/o unknown sucessors
794     bool HasUnknownSuccs = false;
795     for (auto *Jump : SrcBlock->SuccJumps) {
796       if (Func.Blocks[Jump->Target].UnknownWeight) {
797         HasUnknownSuccs = true;
798         break;
799       }
800     }
801     if (!HasUnknownSuccs)
802       return false;
803 
804     return true;
805   }
806 
807   /// Find an unknown subgraph starting at block SrcBlock. The method sets
808   /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
809   void findUnknownSubgraph(const FlowBlock *SrcBlock,
810                            std::vector<FlowBlock *> &KnownDstBlocks,
811                            std::vector<FlowBlock *> &UnknownBlocks) {
812     // Run BFS from SrcBlock and make sure all paths are going through unknown
813     // blocks and end at a known DstBlock
814     auto Visited = BitVector(NumBlocks(), false);
815     std::queue<uint64_t> Queue;
816 
817     Queue.push(SrcBlock->Index);
818     Visited[SrcBlock->Index] = true;
819     while (!Queue.empty()) {
820       auto &Block = Func.Blocks[Queue.front()];
821       Queue.pop();
822       // Process blocks reachable from Block
823       for (auto *Jump : Block.SuccJumps) {
824         // If Jump can be ignored, skip it
825         if (ignoreJump(SrcBlock, nullptr, Jump))
826           continue;
827 
828         uint64_t Dst = Jump->Target;
829         // If Dst has been visited, skip Jump
830         if (Visited[Dst])
831           continue;
832         // Process block Dst
833         Visited[Dst] = true;
834         if (!Func.Blocks[Dst].UnknownWeight) {
835           KnownDstBlocks.push_back(&Func.Blocks[Dst]);
836         } else {
837           Queue.push(Dst);
838           UnknownBlocks.push_back(&Func.Blocks[Dst]);
839         }
840       }
841     }
842   }
843 
844   /// Verify if rebalancing of the subgraph is feasible. If the checks are
845   /// successful, set the unique destination block, DstBlock (can be null).
846   bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
847                             const std::vector<FlowBlock *> &KnownDstBlocks,
848                             const std::vector<FlowBlock *> &UnknownBlocks,
849                             FlowBlock *&DstBlock) {
850     // If the list of unknown blocks is empty, we don't need rebalancing
851     if (UnknownBlocks.empty())
852       return false;
853 
854     // If there are multiple known sinks, we can't rebalance
855     if (KnownDstBlocks.size() > 1)
856       return false;
857     DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
858 
859     // Verify sinks of the subgraph
860     for (auto *Block : UnknownBlocks) {
861       if (Block->SuccJumps.empty()) {
862         // If there are multiple (known and unknown) sinks, we can't rebalance
863         if (DstBlock != nullptr)
864           return false;
865         continue;
866       }
867       size_t NumIgnoredJumps = 0;
868       for (auto *Jump : Block->SuccJumps) {
869         if (ignoreJump(SrcBlock, DstBlock, Jump))
870           NumIgnoredJumps++;
871       }
872       // If there is a non-sink block in UnknownBlocks with all jumps ignored,
873       // then we can't rebalance
874       if (NumIgnoredJumps == Block->SuccJumps.size())
875         return false;
876     }
877 
878     return true;
879   }
880 
881   /// Decide whether the Jump is ignored while processing an unknown subgraphs
882   /// rooted at basic block SrcBlock with the destination block, DstBlock.
883   bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
884                   const FlowJump *Jump) {
885     // Ignore unlikely jumps with zero flow
886     if (Jump->IsUnlikely && Jump->Flow == 0)
887       return true;
888 
889     auto JumpSource = &Func.Blocks[Jump->Source];
890     auto JumpTarget = &Func.Blocks[Jump->Target];
891 
892     // Do not ignore jumps coming into DstBlock
893     if (DstBlock != nullptr && JumpTarget == DstBlock)
894       return false;
895 
896     // Ignore jumps out of SrcBlock to known blocks
897     if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
898       return true;
899 
900     // Ignore jumps to known blocks with zero flow
901     if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
902       return true;
903 
904     return false;
905   }
906 
907   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
908   /// UnknownBlocks in the topological order (so that all jumps are "forward").
909   bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
910                          std::vector<FlowBlock *> &UnknownBlocks) {
911     // Extract local in-degrees in the considered subgraph
912     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
913     auto fillInDegree = [&](const FlowBlock *Block) {
914       for (auto *Jump : Block->SuccJumps) {
915         if (ignoreJump(SrcBlock, DstBlock, Jump))
916           continue;
917         LocalInDegree[Jump->Target]++;
918       }
919     };
920     fillInDegree(SrcBlock);
921     for (auto *Block : UnknownBlocks) {
922       fillInDegree(Block);
923     }
924     // A loop containing SrcBlock
925     if (LocalInDegree[SrcBlock->Index] > 0)
926       return false;
927 
928     std::vector<FlowBlock *> AcyclicOrder;
929     std::queue<uint64_t> Queue;
930     Queue.push(SrcBlock->Index);
931     while (!Queue.empty()) {
932       FlowBlock *Block = &Func.Blocks[Queue.front()];
933       Queue.pop();
934       // Stop propagation once we reach DstBlock, if any
935       if (DstBlock != nullptr && Block == DstBlock)
936         break;
937 
938       // Keep an acyclic order of unknown blocks
939       if (Block->UnknownWeight && Block != SrcBlock)
940         AcyclicOrder.push_back(Block);
941 
942       // Add to the queue all successors with zero local in-degree
943       for (auto *Jump : Block->SuccJumps) {
944         if (ignoreJump(SrcBlock, DstBlock, Jump))
945           continue;
946         uint64_t Dst = Jump->Target;
947         LocalInDegree[Dst]--;
948         if (LocalInDegree[Dst] == 0) {
949           Queue.push(Dst);
950         }
951       }
952     }
953 
954     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
955     // of all blocks
956     if (UnknownBlocks.size() != AcyclicOrder.size())
957       return false;
958     UnknownBlocks = AcyclicOrder;
959     return true;
960   }
961 
962   /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
963   /// having UnknownBlocks intermediate blocks.
964   void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
965                                 const FlowBlock *DstBlock,
966                                 const std::vector<FlowBlock *> &UnknownBlocks) {
967     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
968 
969     // Ditribute flow from the source block
970     uint64_t BlockFlow = 0;
971     // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
972     for (auto *Jump : SrcBlock->SuccJumps) {
973       if (ignoreJump(SrcBlock, DstBlock, Jump))
974         continue;
975       BlockFlow += Jump->Flow;
976     }
977     rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
978 
979     // Ditribute flow from the remaining blocks
980     for (auto *Block : UnknownBlocks) {
981       assert(Block->UnknownWeight && "incorrect unknown subgraph");
982       uint64_t BlockFlow = 0;
983       // Block's flow is the sum of incoming flows
984       for (auto *Jump : Block->PredJumps) {
985         BlockFlow += Jump->Flow;
986       }
987       Block->Flow = BlockFlow;
988       rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
989     }
990   }
991 
992   /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
993   /// and ending at DstBlock.
994   void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
995                       const FlowBlock *Block, uint64_t BlockFlow) {
996     // Process all successor jumps and update corresponding flow values
997     size_t BlockDegree = 0;
998     for (auto *Jump : Block->SuccJumps) {
999       if (ignoreJump(SrcBlock, DstBlock, Jump))
1000         continue;
1001       BlockDegree++;
1002     }
1003     // If all successor jumps of the block are ignored, skip it
1004     if (DstBlock == nullptr && BlockDegree == 0)
1005       return;
1006     assert(BlockDegree > 0 && "all outgoing jumps are ignored");
1007 
1008     // Each of the Block's successors gets the following amount of flow.
1009     // Rounding the value up so that all flow is propagated
1010     uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1011     for (auto *Jump : Block->SuccJumps) {
1012       if (ignoreJump(SrcBlock, DstBlock, Jump))
1013         continue;
1014       uint64_t Flow = std::min(SuccFlow, BlockFlow);
1015       Jump->Flow = Flow;
1016       BlockFlow -= Flow;
1017     }
1018     assert(BlockFlow == 0 && "not all flow is propagated");
1019   }
1020 
1021   /// A constant indicating an arbitrary exit block of a function.
1022   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
1023 
1024   /// The function.
1025   FlowFunction &Func;
1026 };
1027 
1028 /// Initializing flow network for a given function.
1029 ///
1030 /// Every block is split into three nodes that are responsible for (i) an
1031 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
1032 /// reduction of the block weight.
1033 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
1034   uint64_t NumBlocks = Func.Blocks.size();
1035   assert(NumBlocks > 1 && "Too few blocks in a function");
1036   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
1037 
1038   // Pre-process data: make sure the entry weight is at least 1
1039   if (Func.Blocks[Func.Entry].Weight == 0) {
1040     Func.Blocks[Func.Entry].Weight = 1;
1041   }
1042   // Introducing dummy source/sink pairs to allow flow circulation.
1043   // The nodes corresponding to blocks of Func have indicies in the range
1044   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
1045   uint64_t S = 3 * NumBlocks;
1046   uint64_t T = S + 1;
1047   uint64_t S1 = S + 2;
1048   uint64_t T1 = S + 3;
1049 
1050   Network.initialize(3 * NumBlocks + 4, S1, T1);
1051 
1052   // Create three nodes for every block of the function
1053   for (uint64_t B = 0; B < NumBlocks; B++) {
1054     auto &Block = Func.Blocks[B];
1055     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
1056            "non-zero weight of a block w/o weight except for an entry");
1057 
1058     // Split every block into two nodes
1059     uint64_t Bin = 3 * B;
1060     uint64_t Bout = 3 * B + 1;
1061     uint64_t Baux = 3 * B + 2;
1062     if (Block.Weight > 0) {
1063       Network.addEdge(S1, Bout, Block.Weight, 0);
1064       Network.addEdge(Bin, T1, Block.Weight, 0);
1065     }
1066 
1067     // Edges from S and to T
1068     assert((!Block.isEntry() || !Block.isExit()) &&
1069            "a block cannot be an entry and an exit");
1070     if (Block.isEntry()) {
1071       Network.addEdge(S, Bin, 0);
1072     } else if (Block.isExit()) {
1073       Network.addEdge(Bout, T, 0);
1074     }
1075 
1076     // An auxiliary node to allow increase/reduction of block counts:
1077     // We assume that decreasing block counts is more expensive than increasing,
1078     // and thus, setting separate costs here. In the future we may want to tune
1079     // the relative costs so as to maximize the quality of generated profiles.
1080     int64_t AuxCostInc = SampleProfileProfiCostInc;
1081     int64_t AuxCostDec = SampleProfileProfiCostDec;
1082     if (Block.UnknownWeight) {
1083       // Do not penalize changing weights of blocks w/o known profile count
1084       AuxCostInc = 0;
1085       AuxCostDec = 0;
1086     } else {
1087       // Increasing the count for "cold" blocks with zero initial count is more
1088       // expensive than for "hot" ones
1089       if (Block.Weight == 0) {
1090         AuxCostInc = SampleProfileProfiCostIncZero;
1091       }
1092       // Modifying the count of the entry block is expensive
1093       if (Block.isEntry()) {
1094         AuxCostInc = SampleProfileProfiCostIncEntry;
1095         AuxCostDec = SampleProfileProfiCostDecEntry;
1096       }
1097     }
1098     // For blocks with self-edges, do not penalize a reduction of the count,
1099     // as all of the increase can be attributed to the self-edge
1100     if (Block.HasSelfEdge) {
1101       AuxCostDec = 0;
1102     }
1103 
1104     Network.addEdge(Bin, Baux, AuxCostInc);
1105     Network.addEdge(Baux, Bout, AuxCostInc);
1106     if (Block.Weight > 0) {
1107       Network.addEdge(Bout, Baux, AuxCostDec);
1108       Network.addEdge(Baux, Bin, AuxCostDec);
1109     }
1110   }
1111 
1112   // Creating edges for every jump
1113   for (auto &Jump : Func.Jumps) {
1114     uint64_t Src = Jump.Source;
1115     uint64_t Dst = Jump.Target;
1116     if (Src != Dst) {
1117       uint64_t SrcOut = 3 * Src + 1;
1118       uint64_t DstIn = 3 * Dst;
1119       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
1120       Network.addEdge(SrcOut, DstIn, Cost);
1121     }
1122   }
1123 
1124   // Make sure we have a valid flow circulation
1125   Network.addEdge(T, S, 0);
1126 }
1127 
1128 /// Extract resulting block and edge counts from the flow network.
1129 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
1130   uint64_t NumBlocks = Func.Blocks.size();
1131 
1132   // Extract resulting block counts
1133   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
1134     auto &Block = Func.Blocks[Src];
1135     uint64_t SrcOut = 3 * Src + 1;
1136     int64_t Flow = 0;
1137     for (const auto &Adj : Network.getFlow(SrcOut)) {
1138       uint64_t DstIn = Adj.first;
1139       int64_t DstFlow = Adj.second;
1140       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
1141       if (!IsAuxNode || Block.HasSelfEdge) {
1142         Flow += DstFlow;
1143       }
1144     }
1145     Block.Flow = Flow;
1146     assert(Flow >= 0 && "negative block flow");
1147   }
1148 
1149   // Extract resulting jump counts
1150   for (auto &Jump : Func.Jumps) {
1151     uint64_t Src = Jump.Source;
1152     uint64_t Dst = Jump.Target;
1153     int64_t Flow = 0;
1154     if (Src != Dst) {
1155       uint64_t SrcOut = 3 * Src + 1;
1156       uint64_t DstIn = 3 * Dst;
1157       Flow = Network.getFlow(SrcOut, DstIn);
1158     } else {
1159       uint64_t SrcOut = 3 * Src + 1;
1160       uint64_t SrcAux = 3 * Src + 2;
1161       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
1162       if (AuxFlow > 0)
1163         Flow = AuxFlow;
1164     }
1165     Jump.Flow = Flow;
1166     assert(Flow >= 0 && "negative jump flow");
1167   }
1168 }
1169 
1170 #ifndef NDEBUG
1171 /// Verify that the computed flow values satisfy flow conservation rules
1172 void verifyWeights(const FlowFunction &Func) {
1173   const uint64_t NumBlocks = Func.Blocks.size();
1174   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1175   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1176   for (const auto &Jump : Func.Jumps) {
1177     InFlow[Jump.Target] += Jump.Flow;
1178     OutFlow[Jump.Source] += Jump.Flow;
1179   }
1180 
1181   uint64_t TotalInFlow = 0;
1182   uint64_t TotalOutFlow = 0;
1183   for (uint64_t I = 0; I < NumBlocks; I++) {
1184     auto &Block = Func.Blocks[I];
1185     if (Block.isEntry()) {
1186       TotalInFlow += Block.Flow;
1187       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1188     } else if (Block.isExit()) {
1189       TotalOutFlow += Block.Flow;
1190       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1191     } else {
1192       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1193       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1194     }
1195   }
1196   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
1197 
1198   // Verify that there are no isolated flow components
1199   // One could modify FlowFunction to hold edges indexed by the sources, which
1200   // will avoid a creation of the object
1201   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1202   for (const auto &Jump : Func.Jumps) {
1203     if (Jump.Flow > 0) {
1204       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1205     }
1206   }
1207 
1208   // Run BFS from the source along edges with positive flow
1209   std::queue<uint64_t> Queue;
1210   auto Visited = BitVector(NumBlocks, false);
1211   Queue.push(Func.Entry);
1212   Visited[Func.Entry] = true;
1213   while (!Queue.empty()) {
1214     uint64_t Src = Queue.front();
1215     Queue.pop();
1216     for (uint64_t Dst : PositiveFlowEdges[Src]) {
1217       if (!Visited[Dst]) {
1218         Queue.push(Dst);
1219         Visited[Dst] = true;
1220       }
1221     }
1222   }
1223 
1224   // Verify that every block that has a positive flow is reached from the source
1225   // along edges with a positive flow
1226   for (uint64_t I = 0; I < NumBlocks; I++) {
1227     auto &Block = Func.Blocks[I];
1228     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
1229   }
1230 }
1231 #endif
1232 
1233 } // end of anonymous namespace
1234 
1235 /// Apply the profile inference algorithm for a given flow function
1236 void llvm::applyFlowInference(FlowFunction &Func) {
1237   // Create and apply an inference network model
1238   auto InferenceNetwork = MinCostMaxFlow();
1239   initializeNetwork(InferenceNetwork, Func);
1240   InferenceNetwork.run();
1241 
1242   // Extract flow values for every block and every edge
1243   extractWeights(InferenceNetwork, Func);
1244 
1245   // Post-processing adjustments to the flow
1246   auto Adjuster = FlowAdjuster(Func);
1247   Adjuster.run();
1248 
1249 #ifndef NDEBUG
1250   // Verify the result
1251   verifyWeights(Func);
1252 #endif
1253 }
1254