xref: /llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp (revision 557efc9a8b68628c2c944678c6471dac30ed9e8e)
1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a profile inference algorithm. Given an incomplete and
10 // possibly imprecise block counts, the algorithm reconstructs realistic block
11 // and edge counts that satisfy flow conservation rules, while minimally modify
12 // input block counts.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Utils/SampleProfileInference.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/Debug.h"
20 #include <queue>
21 #include <set>
22 #include <stack>
23 
24 using namespace llvm;
25 #define DEBUG_TYPE "sample-profile-inference"
26 
27 namespace {
28 
29 static cl::opt<bool> SampleProfileEvenCountDistribution(
30     "sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
31     cl::ZeroOrMore,
32     cl::desc("Try to evenly distribute counts when there are multiple equally "
33              "likely options."));
34 
35 static cl::opt<unsigned> SampleProfileMaxDfsCalls(
36     "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
37     cl::desc("Maximum number of dfs iterations for even count distribution."));
38 
39 static cl::opt<unsigned> SampleProfileProfiCostInc(
40     "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
41     cl::desc("A cost of increasing a block's count by one."));
42 
43 static cl::opt<unsigned> SampleProfileProfiCostDec(
44     "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
45     cl::desc("A cost of decreasing a block's count by one."));
46 
47 static cl::opt<unsigned> SampleProfileProfiCostIncZero(
48     "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
49     cl::ZeroOrMore,
50     cl::desc("A cost of increasing a count of zero-weight block by one."));
51 
52 static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
53     "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
54     cl::ZeroOrMore,
55     cl::desc("A cost of increasing the entry block's count by one."));
56 
57 static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
58     "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
59     cl::ZeroOrMore,
60     cl::desc("A cost of decreasing the entry block's count by one."));
61 
62 /// A value indicating an infinite flow/capacity/weight of a block/edge.
63 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
64 /// during the execution.
65 static constexpr int64_t INF = ((int64_t)1) << 50;
66 
67 /// The minimum-cost maximum flow algorithm.
68 ///
69 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
70 /// network using a modified version of the classical Moore-Bellman-Ford
71 /// approach. The algorithm applies a number of augmentation iterations in which
72 /// flow is sent along paths of positive capacity from the source to the sink.
73 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
74 /// where m is the number of edges, n is the number of vertices, and v(f) is the
75 /// value of the maximum flow. However, the observed running time on typical
76 /// instances is sub-quadratic, that is, o(n^2).
77 ///
78 /// The input is a set of edges with specified costs and capacities, and a pair
79 /// of nodes (source and sink). The output is the flow along each edge of the
80 /// minimum total cost respecting the given edge capacities.
81 class MinCostMaxFlow {
82 public:
83   // Initialize algorithm's data structures for a network of a given size.
84   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
85     Source = SourceNode;
86     Target = SinkNode;
87 
88     Nodes = std::vector<Node>(NodeCount);
89     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
90     if (SampleProfileEvenCountDistribution)
91       AugmentingEdges =
92           std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
93   }
94 
95   // Run the algorithm.
96   int64_t run() {
97     // Iteratively find an augmentation path/dag in the network and send the
98     // flow along its edges
99     size_t AugmentationIters = applyFlowAugmentation();
100 
101     // Compute the total flow and its cost
102     int64_t TotalCost = 0;
103     int64_t TotalFlow = 0;
104     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
105       for (auto &Edge : Edges[Src]) {
106         if (Edge.Flow > 0) {
107           TotalCost += Edge.Cost * Edge.Flow;
108           if (Src == Source)
109             TotalFlow += Edge.Flow;
110         }
111       }
112     }
113     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
114                       << " iterations with " << TotalFlow << " total flow"
115                       << " of " << TotalCost << " cost\n");
116     (void)TotalFlow;
117     (void)AugmentationIters;
118     return TotalCost;
119   }
120 
121   /// Adding an edge to the network with a specified capacity and a cost.
122   /// Multiple edges between a pair of nodes are allowed but self-edges
123   /// are not supported.
124   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
125     assert(Capacity > 0 && "adding an edge of zero capacity");
126     assert(Src != Dst && "loop edge are not supported");
127 
128     Edge SrcEdge;
129     SrcEdge.Dst = Dst;
130     SrcEdge.Cost = Cost;
131     SrcEdge.Capacity = Capacity;
132     SrcEdge.Flow = 0;
133     SrcEdge.RevEdgeIndex = Edges[Dst].size();
134 
135     Edge DstEdge;
136     DstEdge.Dst = Src;
137     DstEdge.Cost = -Cost;
138     DstEdge.Capacity = 0;
139     DstEdge.Flow = 0;
140     DstEdge.RevEdgeIndex = Edges[Src].size();
141 
142     Edges[Src].push_back(SrcEdge);
143     Edges[Dst].push_back(DstEdge);
144   }
145 
146   /// Adding an edge to the network of infinite capacity and a given cost.
147   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
148     addEdge(Src, Dst, INF, Cost);
149   }
150 
151   /// Get the total flow from a given source node.
152   /// Returns a list of pairs (target node, amount of flow to the target).
153   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
154     std::vector<std::pair<uint64_t, int64_t>> Flow;
155     for (auto &Edge : Edges[Src]) {
156       if (Edge.Flow > 0)
157         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
158     }
159     return Flow;
160   }
161 
162   /// Get the total flow between a pair of nodes.
163   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
164     int64_t Flow = 0;
165     for (auto &Edge : Edges[Src]) {
166       if (Edge.Dst == Dst) {
167         Flow += Edge.Flow;
168       }
169     }
170     return Flow;
171   }
172 
173   /// A cost of taking an unlikely jump.
174   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
175   /// Minimum BaseDistance for the jump distance values in island joining.
176   static constexpr uint64_t MinBaseDistance = 10000;
177 
178 private:
179   /// Iteratively find an augmentation path/dag in the network and send the
180   /// flow along its edges. The method returns the number of applied iterations.
181   size_t applyFlowAugmentation() {
182     size_t AugmentationIters = 0;
183     while (findAugmentingPath()) {
184       uint64_t PathCapacity = computeAugmentingPathCapacity();
185       while (PathCapacity > 0) {
186         bool Progress = false;
187         if (SampleProfileEvenCountDistribution) {
188           // Identify node/edge candidates for augmentation
189           identifyShortestEdges(PathCapacity);
190 
191           // Find an augmenting DAG
192           auto AugmentingOrder = findAugmentingDAG();
193 
194           // Apply the DAG augmentation
195           Progress = augmentFlowAlongDAG(AugmentingOrder);
196           PathCapacity = computeAugmentingPathCapacity();
197         }
198 
199         if (!Progress) {
200           augmentFlowAlongPath(PathCapacity);
201           PathCapacity = 0;
202         }
203 
204         AugmentationIters++;
205       }
206     }
207     return AugmentationIters;
208   }
209 
210   /// Compute the capacity of the cannonical augmenting path. If the path is
211   /// saturated (that is, no flow can be sent along the path), then return 0.
212   uint64_t computeAugmentingPathCapacity() {
213     uint64_t PathCapacity = INF;
214     uint64_t Now = Target;
215     while (Now != Source) {
216       uint64_t Pred = Nodes[Now].ParentNode;
217       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
218 
219       assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
220       uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
221       PathCapacity = std::min(PathCapacity, EdgeCapacity);
222 
223       Now = Pred;
224     }
225     return PathCapacity;
226   }
227 
228   /// Check for existence of an augmenting path with a positive capacity.
229   bool findAugmentingPath() {
230     // Initialize data structures
231     for (auto &Node : Nodes) {
232       Node.Distance = INF;
233       Node.ParentNode = uint64_t(-1);
234       Node.ParentEdgeIndex = uint64_t(-1);
235       Node.Taken = false;
236     }
237 
238     std::queue<uint64_t> Queue;
239     Queue.push(Source);
240     Nodes[Source].Distance = 0;
241     Nodes[Source].Taken = true;
242     while (!Queue.empty()) {
243       uint64_t Src = Queue.front();
244       Queue.pop();
245       Nodes[Src].Taken = false;
246       // Although the residual network contains edges with negative costs
247       // (in particular, backward edges), it can be shown that there are no
248       // negative-weight cycles and the following two invariants are maintained:
249       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
250       // where Dist is the length of the shortest path between two nodes. This
251       // allows to prune the search-space of the path-finding algorithm using
252       // the following early-stop criteria:
253       // -- If we find a path with zero-distance from Source to Target, stop the
254       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
255       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
256       //    process node V, as it is guaranteed _not_ to be on a shortest path
257       //    from Source to Target; it follows from inequalities
258       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
259       //                         >= Dist[Source, V]
260       if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
261         break;
262       if (Nodes[Src].Distance > Nodes[Target].Distance)
263         continue;
264 
265       // Process adjacent edges
266       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
267         auto &Edge = Edges[Src][EdgeIdx];
268         if (Edge.Flow < Edge.Capacity) {
269           uint64_t Dst = Edge.Dst;
270           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
271           if (Nodes[Dst].Distance > NewDistance) {
272             // Update the distance and the parent node/edge
273             Nodes[Dst].Distance = NewDistance;
274             Nodes[Dst].ParentNode = Src;
275             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
276             // Add the node to the queue, if it is not there yet
277             if (!Nodes[Dst].Taken) {
278               Queue.push(Dst);
279               Nodes[Dst].Taken = true;
280             }
281           }
282         }
283       }
284     }
285 
286     return Nodes[Target].Distance != INF;
287   }
288 
289   /// Update the current flow along the augmenting path.
290   void augmentFlowAlongPath(uint64_t PathCapacity) {
291     assert(PathCapacity > 0 && "found an incorrect augmenting path");
292     uint64_t Now = Target;
293     while (Now != Source) {
294       uint64_t Pred = Nodes[Now].ParentNode;
295       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
296       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
297 
298       Edge.Flow += PathCapacity;
299       RevEdge.Flow -= PathCapacity;
300 
301       Now = Pred;
302     }
303   }
304 
305   /// Find an Augmenting DAG order using a modified version of DFS in which we
306   /// can visit a node multiple times. In the DFS search, when scanning each
307   /// edge out of a node, continue search at Edge.Dst endpoint if it has not
308   /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
309   /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
310   /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
311   /// that starts with Source and ends with Target.
312   std::vector<uint64_t> findAugmentingDAG() {
313     // We use a stack based implemenation of DFS to avoid recursion.
314     // Defining DFS data structures:
315     // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
316     //  - we are currently visiting Nodes[NodeIdx] and
317     //  - the next edge to scan is Edges[NodeIdx][EdgeIdx]
318     typedef std::pair<uint64_t, uint64_t> StackItemType;
319     std::stack<StackItemType> Stack;
320     std::vector<uint64_t> AugmentingOrder;
321 
322     // Phase 0: Initialize Node attributes and Time for DFS run
323     for (auto &Node : Nodes) {
324       Node.Discovery = 0;
325       Node.Finish = 0;
326       Node.NumCalls = 0;
327       Node.Taken = false;
328     }
329     uint64_t Time = 0;
330     // Mark Target as Taken
331     // Taken attribute will be propagated backwards from Target towards Source
332     Nodes[Target].Taken = true;
333 
334     // Phase 1: Start DFS traversal from Source
335     Stack.emplace(Source, 0);
336     Nodes[Source].Discovery = ++Time;
337     while (!Stack.empty()) {
338       auto NodeIdx = Stack.top().first;
339       auto EdgeIdx = Stack.top().second;
340 
341       // If we haven't scanned all edges out of NodeIdx, continue scanning
342       if (EdgeIdx < Edges[NodeIdx].size()) {
343         auto &Edge = Edges[NodeIdx][EdgeIdx];
344         auto &Dst = Nodes[Edge.Dst];
345         Stack.top().second++;
346 
347         if (Edge.OnShortestPath) {
348           // If we haven't seen Edge.Dst so far, continue DFS search there
349           if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
350             Dst.Discovery = ++Time;
351             Stack.emplace(Edge.Dst, 0);
352             Dst.NumCalls++;
353           } else if (Dst.Taken && Dst.Finish != 0) {
354             // Else, if Edge.Dst already have a path to Target, so that NodeIdx
355             Nodes[NodeIdx].Taken = true;
356           }
357         }
358       } else {
359         // If we are done scanning all edge out of NodeIdx
360         Stack.pop();
361         // If we haven't found a path from NodeIdx to Target, forget about it
362         if (!Nodes[NodeIdx].Taken) {
363           Nodes[NodeIdx].Discovery = 0;
364         } else {
365           // If we have found a path from NodeIdx to Target, then finish NodeIdx
366           // and propagate Taken flag to DFS parent unless at the Source
367           Nodes[NodeIdx].Finish = ++Time;
368           // NodeIdx == Source if and only if the stack is empty
369           if (NodeIdx != Source) {
370             assert(!Stack.empty() && "empty stack while running dfs");
371             Nodes[Stack.top().first].Taken = true;
372           }
373           AugmentingOrder.push_back(NodeIdx);
374         }
375       }
376     }
377     // Nodes are collected decreasing Finish time, so the order is reversed
378     std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
379 
380     // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
381     for (size_t Src : AugmentingOrder) {
382       AugmentingEdges[Src].clear();
383       for (auto &Edge : Edges[Src]) {
384         uint64_t Dst = Edge.Dst;
385         if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
386             Nodes[Dst].Finish < Nodes[Src].Finish) {
387           AugmentingEdges[Src].push_back(&Edge);
388         }
389       }
390       assert((Src == Target || !AugmentingEdges[Src].empty()) &&
391              "incorrectly constructed augmenting edges");
392     }
393 
394     return AugmentingOrder;
395   }
396 
397   /// Update the current flow along the given (acyclic) subgraph specified by
398   /// the vertex order, AugmentingOrder. The objective is to send as much flow
399   /// as possible while evenly distributing flow among successors of each node.
400   /// After the update at least one edge is saturated.
401   bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
402     // Phase 0: Initialization
403     for (uint64_t Src : AugmentingOrder) {
404       Nodes[Src].FracFlow = 0;
405       Nodes[Src].IntFlow = 0;
406       for (auto &Edge : AugmentingEdges[Src]) {
407         Edge->AugmentedFlow = 0;
408       }
409     }
410 
411     // Phase 1: Send a unit of fractional flow along the DAG
412     uint64_t MaxFlowAmount = INF;
413     Nodes[Source].FracFlow = 1.0;
414     for (uint64_t Src : AugmentingOrder) {
415       assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
416              "incorrectly computed fractional flow");
417       // Distribute flow evenly among successors of Src
418       uint64_t Degree = AugmentingEdges[Src].size();
419       for (auto &Edge : AugmentingEdges[Src]) {
420         double EdgeFlow = Nodes[Src].FracFlow / Degree;
421         Nodes[Edge->Dst].FracFlow += EdgeFlow;
422         if (Edge->Capacity == INF)
423           continue;
424         uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
425         MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
426       }
427     }
428     // Stop early if we cannot send any (integral) flow from Source to Target
429     if (MaxFlowAmount == 0)
430       return false;
431 
432     // Phase 2: Send an integral flow of MaxFlowAmount
433     Nodes[Source].IntFlow = MaxFlowAmount;
434     for (uint64_t Src : AugmentingOrder) {
435       if (Src == Target)
436         break;
437       // Distribute flow evenly among successors of Src, rounding up to make
438       // sure all flow is sent
439       uint64_t Degree = AugmentingEdges[Src].size();
440       // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
441       uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
442       for (auto &Edge : AugmentingEdges[Src]) {
443         uint64_t Dst = Edge->Dst;
444         uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
445         EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
446         Nodes[Dst].IntFlow += EdgeFlow;
447         Nodes[Src].IntFlow -= EdgeFlow;
448         Edge->AugmentedFlow += EdgeFlow;
449       }
450     }
451     assert(Nodes[Target].IntFlow <= MaxFlowAmount);
452     Nodes[Target].IntFlow = 0;
453 
454     // Phase 3: Send excess flow back traversing the nodes backwards.
455     // Because of rounding, not all flow can be sent along the edges of Src.
456     // Hence, sending the remaining flow back to maintain flow conservation
457     for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
458       uint64_t Src = AugmentingOrder[Idx - 1];
459       // Try to send excess flow back along each edge.
460       // Make sure we only send back flow we just augmented (AugmentedFlow).
461       for (auto &Edge : AugmentingEdges[Src]) {
462         uint64_t Dst = Edge->Dst;
463         if (Nodes[Dst].IntFlow == 0)
464           continue;
465         uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
466         Nodes[Dst].IntFlow -= EdgeFlow;
467         Nodes[Src].IntFlow += EdgeFlow;
468         Edge->AugmentedFlow -= EdgeFlow;
469       }
470     }
471 
472     // Phase 4: Update flow values along all edges
473     bool HasSaturatedEdges = false;
474     for (uint64_t Src : AugmentingOrder) {
475       // Verify that we have sent all the excess flow from the node
476       assert(Src == Source || Nodes[Src].IntFlow == 0);
477       for (auto &Edge : AugmentingEdges[Src]) {
478         assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
479         // Update flow values along the edge and its reverse copy
480         auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
481         Edge->Flow += Edge->AugmentedFlow;
482         RevEdge.Flow -= Edge->AugmentedFlow;
483         if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
484           HasSaturatedEdges = true;
485       }
486     }
487 
488     // The augmentation is successful iff at least one edge becomes saturated
489     return HasSaturatedEdges;
490   }
491 
492   /// Identify candidate (shortest) edges for augmentation.
493   void identifyShortestEdges(uint64_t PathCapacity) {
494     assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
495     // To make sure the augmentation DAG contains only edges with large residual
496     // capacity, we prune all edges whose capacity is below a fraction of
497     // the capacity of the augmented path.
498     // (All edges of the path itself are always in the DAG)
499     uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
500 
501     // Decide which edges are on a shortest path from Source to Target
502     for (size_t Src = 0; Src < Nodes.size(); Src++) {
503       // An edge cannot be augmenting if the endpoint has large distance
504       if (Nodes[Src].Distance > Nodes[Target].Distance)
505         continue;
506 
507       for (auto &Edge : Edges[Src]) {
508         uint64_t Dst = Edge.Dst;
509         Edge.OnShortestPath =
510             Src != Target && Dst != Source &&
511             Nodes[Dst].Distance <= Nodes[Target].Distance &&
512             Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
513             Edge.Capacity > Edge.Flow &&
514             uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
515       }
516     }
517   }
518 
519   /// A node in a flow network.
520   struct Node {
521     /// The cost of the cheapest path from the source to the current node.
522     int64_t Distance;
523     /// The node preceding the current one in the path.
524     uint64_t ParentNode;
525     /// The index of the edge between ParentNode and the current node.
526     uint64_t ParentEdgeIndex;
527     /// An indicator of whether the current node is in a queue.
528     bool Taken;
529 
530     /// Data fields utilized in DAG-augmentation:
531     /// Fractional flow.
532     double FracFlow;
533     /// Integral flow.
534     uint64_t IntFlow;
535     /// Discovery time.
536     uint64_t Discovery;
537     /// Finish time.
538     uint64_t Finish;
539     /// NumCalls.
540     uint64_t NumCalls;
541   };
542 
543   /// An edge in a flow network.
544   struct Edge {
545     /// The cost of the edge.
546     int64_t Cost;
547     /// The capacity of the edge.
548     int64_t Capacity;
549     /// The current flow on the edge.
550     int64_t Flow;
551     /// The destination node of the edge.
552     uint64_t Dst;
553     /// The index of the reverse edge between Dst and the current node.
554     uint64_t RevEdgeIndex;
555 
556     /// Data fields utilized in DAG-augmentation:
557     /// Whether the edge is currently on a shortest path from Source to Target.
558     bool OnShortestPath;
559     /// Extra flow along the edge.
560     uint64_t AugmentedFlow;
561   };
562 
563   /// The set of network nodes.
564   std::vector<Node> Nodes;
565   /// The set of network edges.
566   std::vector<std::vector<Edge>> Edges;
567   /// Source node of the flow.
568   uint64_t Source;
569   /// Target (sink) node of the flow.
570   uint64_t Target;
571   /// Augmenting edges.
572   std::vector<std::vector<Edge *>> AugmentingEdges;
573 };
574 
575 constexpr int64_t MinCostMaxFlow::AuxCostUnlikely;
576 constexpr uint64_t MinCostMaxFlow::MinBaseDistance;
577 
578 /// A post-processing adjustment of control flow. It applies two steps by
579 /// rerouting some flow and making it more realistic:
580 ///
581 /// - First, it removes all isolated components ("islands") with a positive flow
582 ///   that are unreachable from the entry block. For every such component, we
583 ///   find the shortest from the entry to an exit passing through the component,
584 ///   and increase the flow by one unit along the path.
585 ///
586 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
587 ///   with no sampled counts. Then it rebalnces the flow that goes through such
588 ///   a subgraph so that each branch is taken with probability 50%.
589 ///   An unknown subgraph is such that for every two nodes u and v:
590 ///     - u dominates v and u is not unknown;
591 ///     - v post-dominates u; and
592 ///     - all inner-nodes of all (u,v)-paths are unknown.
593 ///
594 class FlowAdjuster {
595 public:
596   FlowAdjuster(FlowFunction &Func) : Func(Func) {
597     assert(Func.Blocks[Func.Entry].isEntry() &&
598            "incorrect index of the entry block");
599   }
600 
601   // Run the post-processing
602   void run() {
603     /// Adjust the flow to get rid of isolated components.
604     joinIsolatedComponents();
605 
606     /// Rebalance the flow inside unknown subgraphs.
607     rebalanceUnknownSubgraphs();
608   }
609 
610 private:
611   void joinIsolatedComponents() {
612     // Find blocks that are reachable from the source
613     auto Visited = BitVector(NumBlocks(), false);
614     findReachable(Func.Entry, Visited);
615 
616     // Iterate over all non-reachable blocks and adjust their weights
617     for (uint64_t I = 0; I < NumBlocks(); I++) {
618       auto &Block = Func.Blocks[I];
619       if (Block.Flow > 0 && !Visited[I]) {
620         // Find a path from the entry to an exit passing through the block I
621         auto Path = findShortestPath(I);
622         // Increase the flow along the path
623         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
624                "incorrectly computed path adjusting control flow");
625         Func.Blocks[Func.Entry].Flow += 1;
626         for (auto &Jump : Path) {
627           Jump->Flow += 1;
628           Func.Blocks[Jump->Target].Flow += 1;
629           // Update reachability
630           findReachable(Jump->Target, Visited);
631         }
632       }
633     }
634   }
635 
636   /// Run BFS from a given block along the jumps with a positive flow and mark
637   /// all reachable blocks.
638   void findReachable(uint64_t Src, BitVector &Visited) {
639     if (Visited[Src])
640       return;
641     std::queue<uint64_t> Queue;
642     Queue.push(Src);
643     Visited[Src] = true;
644     while (!Queue.empty()) {
645       Src = Queue.front();
646       Queue.pop();
647       for (auto Jump : Func.Blocks[Src].SuccJumps) {
648         uint64_t Dst = Jump->Target;
649         if (Jump->Flow > 0 && !Visited[Dst]) {
650           Queue.push(Dst);
651           Visited[Dst] = true;
652         }
653       }
654     }
655   }
656 
657   /// Find the shortest path from the entry block to an exit block passing
658   /// through a given block.
659   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
660     // A path from the entry block to BlockIdx
661     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
662     // A path from BlockIdx to an exit block
663     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
664 
665     // Concatenate the two paths
666     std::vector<FlowJump *> Result;
667     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
668     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
669     return Result;
670   }
671 
672   /// Apply the Dijkstra algorithm to find the shortest path from a given
673   /// Source to a given Target block.
674   /// If Target == -1, then the path ends at an exit block.
675   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
676     // Quit early, if possible
677     if (Source == Target)
678       return std::vector<FlowJump *>();
679     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
680       return std::vector<FlowJump *>();
681 
682     // Initialize data structures
683     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
684     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
685     Distance[Source] = 0;
686     std::set<std::pair<uint64_t, uint64_t>> Queue;
687     Queue.insert(std::make_pair(Distance[Source], Source));
688 
689     // Run the Dijkstra algorithm
690     while (!Queue.empty()) {
691       uint64_t Src = Queue.begin()->second;
692       Queue.erase(Queue.begin());
693       // If we found a solution, quit early
694       if (Src == Target ||
695           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
696         break;
697 
698       for (auto Jump : Func.Blocks[Src].SuccJumps) {
699         uint64_t Dst = Jump->Target;
700         int64_t JumpDist = jumpDistance(Jump);
701         if (Distance[Dst] > Distance[Src] + JumpDist) {
702           Queue.erase(std::make_pair(Distance[Dst], Dst));
703 
704           Distance[Dst] = Distance[Src] + JumpDist;
705           Parent[Dst] = Jump;
706 
707           Queue.insert(std::make_pair(Distance[Dst], Dst));
708         }
709       }
710     }
711     // If Target is not provided, find the closest exit block
712     if (Target == AnyExitBlock) {
713       for (uint64_t I = 0; I < NumBlocks(); I++) {
714         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
715           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
716             Target = I;
717           }
718         }
719       }
720     }
721     assert(Parent[Target] != nullptr && "a path does not exist");
722 
723     // Extract the constructed path
724     std::vector<FlowJump *> Result;
725     uint64_t Now = Target;
726     while (Now != Source) {
727       assert(Now == Parent[Now]->Target && "incorrect parent jump");
728       Result.push_back(Parent[Now]);
729       Now = Parent[Now]->Source;
730     }
731     // Reverse the path, since it is extracted from Target to Source
732     std::reverse(Result.begin(), Result.end());
733     return Result;
734   }
735 
736   /// A distance of a path for a given jump.
737   /// In order to incite the path to use blocks/jumps with large positive flow,
738   /// and avoid changing branch probability of outgoing edges drastically,
739   /// set the jump distance so as:
740   ///   - to minimize the number of unlikely jumps used and subject to that,
741   ///   - to minimize the number of Flow == 0 jumps used and subject to that,
742   ///   - minimizes total multiplicative Flow increase for the remaining edges.
743   /// To capture this objective with integer distances, we round off fractional
744   /// parts to a multiple of 1 / BaseDistance.
745   int64_t jumpDistance(FlowJump *Jump) const {
746     uint64_t BaseDistance =
747         std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance),
748                  std::min(Func.Blocks[Func.Entry].Flow,
749                           MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
750     if (Jump->IsUnlikely)
751       return MinCostMaxFlow::AuxCostUnlikely;
752     if (Jump->Flow > 0)
753       return BaseDistance + BaseDistance / Jump->Flow;
754     return BaseDistance * NumBlocks();
755   };
756 
757   uint64_t NumBlocks() const { return Func.Blocks.size(); }
758 
759   /// Rebalance unknown subgraphs so that the flow is split evenly across the
760   /// outgoing branches of every block of the subgraph. The method iterates over
761   /// blocks with known weight and identifies unknown subgraphs rooted at the
762   /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
763   void rebalanceUnknownSubgraphs() {
764     // Try to find unknown subgraphs from each block
765     for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
766       auto SrcBlock = &Func.Blocks[I];
767       // Verify if rebalancing rooted at SrcBlock is feasible
768       if (!canRebalanceAtRoot(SrcBlock))
769         continue;
770 
771       // Find an unknown subgraphs starting at SrcBlock. Along the way,
772       // fill in known destinations and intermediate unknown blocks.
773       std::vector<FlowBlock *> UnknownBlocks;
774       std::vector<FlowBlock *> KnownDstBlocks;
775       findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
776 
777       // Verify if rebalancing of the subgraph is feasible. If the search is
778       // successful, find the unique destination block (which can be null)
779       FlowBlock *DstBlock = nullptr;
780       if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
781                                 DstBlock))
782         continue;
783 
784       // We cannot rebalance subgraphs containing cycles among unknown blocks
785       if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
786         continue;
787 
788       // Rebalance the flow
789       rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
790     }
791   }
792 
793   /// Verify if rebalancing rooted at a given block is possible.
794   bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
795     // Do not attempt to find unknown subgraphs from an unknown or a
796     // zero-flow block
797     if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
798       return false;
799 
800     // Do not attempt to process subgraphs from a block w/o unknown sucessors
801     bool HasUnknownSuccs = false;
802     for (auto Jump : SrcBlock->SuccJumps) {
803       if (Func.Blocks[Jump->Target].UnknownWeight) {
804         HasUnknownSuccs = true;
805         break;
806       }
807     }
808     if (!HasUnknownSuccs)
809       return false;
810 
811     return true;
812   }
813 
814   /// Find an unknown subgraph starting at block SrcBlock. The method sets
815   /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
816   void findUnknownSubgraph(const FlowBlock *SrcBlock,
817                            std::vector<FlowBlock *> &KnownDstBlocks,
818                            std::vector<FlowBlock *> &UnknownBlocks) {
819     // Run BFS from SrcBlock and make sure all paths are going through unknown
820     // blocks and end at a known DstBlock
821     auto Visited = BitVector(NumBlocks(), false);
822     std::queue<uint64_t> Queue;
823 
824     Queue.push(SrcBlock->Index);
825     Visited[SrcBlock->Index] = true;
826     while (!Queue.empty()) {
827       auto &Block = Func.Blocks[Queue.front()];
828       Queue.pop();
829       // Process blocks reachable from Block
830       for (auto Jump : Block.SuccJumps) {
831         // If Jump can be ignored, skip it
832         if (ignoreJump(SrcBlock, nullptr, Jump))
833           continue;
834 
835         uint64_t Dst = Jump->Target;
836         // If Dst has been visited, skip Jump
837         if (Visited[Dst])
838           continue;
839         // Process block Dst
840         Visited[Dst] = true;
841         if (!Func.Blocks[Dst].UnknownWeight) {
842           KnownDstBlocks.push_back(&Func.Blocks[Dst]);
843         } else {
844           Queue.push(Dst);
845           UnknownBlocks.push_back(&Func.Blocks[Dst]);
846         }
847       }
848     }
849   }
850 
851   /// Verify if rebalancing of the subgraph is feasible. If the checks are
852   /// successful, set the unique destination block, DstBlock (can be null).
853   bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
854                             const std::vector<FlowBlock *> &KnownDstBlocks,
855                             const std::vector<FlowBlock *> &UnknownBlocks,
856                             FlowBlock *&DstBlock) {
857     // If the list of unknown blocks is empty, we don't need rebalancing
858     if (UnknownBlocks.empty())
859       return false;
860 
861     // If there are multiple known sinks, we can't rebalance
862     if (KnownDstBlocks.size() > 1)
863       return false;
864     DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
865 
866     // Verify sinks of the subgraph
867     for (auto Block : UnknownBlocks) {
868       if (Block->SuccJumps.empty()) {
869         // If there are multiple (known and unknown) sinks, we can't rebalance
870         if (DstBlock != nullptr)
871           return false;
872         continue;
873       }
874       size_t NumIgnoredJumps = 0;
875       for (auto Jump : Block->SuccJumps) {
876         if (ignoreJump(SrcBlock, DstBlock, Jump))
877           NumIgnoredJumps++;
878       }
879       // If there is a non-sink block in UnknownBlocks with all jumps ignored,
880       // then we can't rebalance
881       if (NumIgnoredJumps == Block->SuccJumps.size())
882         return false;
883     }
884 
885     return true;
886   }
887 
888   /// Decide whether the Jump is ignored while processing an unknown subgraphs
889   /// rooted at basic block SrcBlock with the destination block, DstBlock.
890   bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
891                   const FlowJump *Jump) {
892     // Ignore unlikely jumps with zero flow
893     if (Jump->IsUnlikely && Jump->Flow == 0)
894       return true;
895 
896     auto JumpSource = &Func.Blocks[Jump->Source];
897     auto JumpTarget = &Func.Blocks[Jump->Target];
898 
899     // Do not ignore jumps coming into DstBlock
900     if (DstBlock != nullptr && JumpTarget == DstBlock)
901       return false;
902 
903     // Ignore jumps out of SrcBlock to known blocks
904     if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
905       return true;
906 
907     // Ignore jumps to known blocks with zero flow
908     if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
909       return true;
910 
911     return false;
912   }
913 
914   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
915   /// UnknownBlocks in the topological order (so that all jumps are "forward").
916   bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
917                          std::vector<FlowBlock *> &UnknownBlocks) {
918     // Extract local in-degrees in the considered subgraph
919     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
920     auto fillInDegree = [&](const FlowBlock *Block) {
921       for (auto Jump : Block->SuccJumps) {
922         if (ignoreJump(SrcBlock, DstBlock, Jump))
923           continue;
924         LocalInDegree[Jump->Target]++;
925       }
926     };
927     fillInDegree(SrcBlock);
928     for (auto Block : UnknownBlocks) {
929       fillInDegree(Block);
930     }
931     // A loop containing SrcBlock
932     if (LocalInDegree[SrcBlock->Index] > 0)
933       return false;
934 
935     std::vector<FlowBlock *> AcyclicOrder;
936     std::queue<uint64_t> Queue;
937     Queue.push(SrcBlock->Index);
938     while (!Queue.empty()) {
939       FlowBlock *Block = &Func.Blocks[Queue.front()];
940       Queue.pop();
941       // Stop propagation once we reach DstBlock, if any
942       if (DstBlock != nullptr && Block == DstBlock)
943         break;
944 
945       // Keep an acyclic order of unknown blocks
946       if (Block->UnknownWeight && Block != SrcBlock)
947         AcyclicOrder.push_back(Block);
948 
949       // Add to the queue all successors with zero local in-degree
950       for (auto Jump : Block->SuccJumps) {
951         if (ignoreJump(SrcBlock, DstBlock, Jump))
952           continue;
953         uint64_t Dst = Jump->Target;
954         LocalInDegree[Dst]--;
955         if (LocalInDegree[Dst] == 0) {
956           Queue.push(Dst);
957         }
958       }
959     }
960 
961     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
962     // of all blocks
963     if (UnknownBlocks.size() != AcyclicOrder.size())
964       return false;
965     UnknownBlocks = AcyclicOrder;
966     return true;
967   }
968 
969   /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
970   /// having UnknownBlocks intermediate blocks.
971   void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
972                                 const FlowBlock *DstBlock,
973                                 const std::vector<FlowBlock *> &UnknownBlocks) {
974     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
975 
976     // Ditribute flow from the source block
977     uint64_t BlockFlow = 0;
978     // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
979     for (auto Jump : SrcBlock->SuccJumps) {
980       if (ignoreJump(SrcBlock, DstBlock, Jump))
981         continue;
982       BlockFlow += Jump->Flow;
983     }
984     rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
985 
986     // Ditribute flow from the remaining blocks
987     for (auto Block : UnknownBlocks) {
988       assert(Block->UnknownWeight && "incorrect unknown subgraph");
989       uint64_t BlockFlow = 0;
990       // Block's flow is the sum of incoming flows
991       for (auto Jump : Block->PredJumps) {
992         BlockFlow += Jump->Flow;
993       }
994       Block->Flow = BlockFlow;
995       rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
996     }
997   }
998 
999   /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
1000   /// and ending at DstBlock.
1001   void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
1002                       const FlowBlock *Block, uint64_t BlockFlow) {
1003     // Process all successor jumps and update corresponding flow values
1004     size_t BlockDegree = 0;
1005     for (auto Jump : Block->SuccJumps) {
1006       if (ignoreJump(SrcBlock, DstBlock, Jump))
1007         continue;
1008       BlockDegree++;
1009     }
1010     // If all successor jumps of the block are ignored, skip it
1011     if (DstBlock == nullptr && BlockDegree == 0)
1012       return;
1013     assert(BlockDegree > 0 && "all outgoing jumps are ignored");
1014 
1015     // Each of the Block's successors gets the following amount of flow.
1016     // Rounding the value up so that all flow is propagated
1017     uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1018     for (auto Jump : Block->SuccJumps) {
1019       if (ignoreJump(SrcBlock, DstBlock, Jump))
1020         continue;
1021       uint64_t Flow = std::min(SuccFlow, BlockFlow);
1022       Jump->Flow = Flow;
1023       BlockFlow -= Flow;
1024     }
1025     assert(BlockFlow == 0 && "not all flow is propagated");
1026   }
1027 
1028   /// A constant indicating an arbitrary exit block of a function.
1029   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
1030 
1031   /// The function.
1032   FlowFunction &Func;
1033 };
1034 
1035 /// Initializing flow network for a given function.
1036 ///
1037 /// Every block is split into three nodes that are responsible for (i) an
1038 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
1039 /// reduction of the block weight.
1040 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
1041   uint64_t NumBlocks = Func.Blocks.size();
1042   assert(NumBlocks > 1 && "Too few blocks in a function");
1043   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
1044 
1045   // Pre-process data: make sure the entry weight is at least 1
1046   if (Func.Blocks[Func.Entry].Weight == 0) {
1047     Func.Blocks[Func.Entry].Weight = 1;
1048   }
1049   // Introducing dummy source/sink pairs to allow flow circulation.
1050   // The nodes corresponding to blocks of Func have indicies in the range
1051   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
1052   uint64_t S = 3 * NumBlocks;
1053   uint64_t T = S + 1;
1054   uint64_t S1 = S + 2;
1055   uint64_t T1 = S + 3;
1056 
1057   Network.initialize(3 * NumBlocks + 4, S1, T1);
1058 
1059   // Create three nodes for every block of the function
1060   for (uint64_t B = 0; B < NumBlocks; B++) {
1061     auto &Block = Func.Blocks[B];
1062     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
1063            "non-zero weight of a block w/o weight except for an entry");
1064 
1065     // Split every block into two nodes
1066     uint64_t Bin = 3 * B;
1067     uint64_t Bout = 3 * B + 1;
1068     uint64_t Baux = 3 * B + 2;
1069     if (Block.Weight > 0) {
1070       Network.addEdge(S1, Bout, Block.Weight, 0);
1071       Network.addEdge(Bin, T1, Block.Weight, 0);
1072     }
1073 
1074     // Edges from S and to T
1075     assert((!Block.isEntry() || !Block.isExit()) &&
1076            "a block cannot be an entry and an exit");
1077     if (Block.isEntry()) {
1078       Network.addEdge(S, Bin, 0);
1079     } else if (Block.isExit()) {
1080       Network.addEdge(Bout, T, 0);
1081     }
1082 
1083     // An auxiliary node to allow increase/reduction of block counts:
1084     // We assume that decreasing block counts is more expensive than increasing,
1085     // and thus, setting separate costs here. In the future we may want to tune
1086     // the relative costs so as to maximize the quality of generated profiles.
1087     int64_t AuxCostInc = SampleProfileProfiCostInc;
1088     int64_t AuxCostDec = SampleProfileProfiCostDec;
1089     if (Block.UnknownWeight) {
1090       // Do not penalize changing weights of blocks w/o known profile count
1091       AuxCostInc = 0;
1092       AuxCostDec = 0;
1093     } else {
1094       // Increasing the count for "cold" blocks with zero initial count is more
1095       // expensive than for "hot" ones
1096       if (Block.Weight == 0) {
1097         AuxCostInc = SampleProfileProfiCostIncZero;
1098       }
1099       // Modifying the count of the entry block is expensive
1100       if (Block.isEntry()) {
1101         AuxCostInc = SampleProfileProfiCostIncEntry;
1102         AuxCostDec = SampleProfileProfiCostDecEntry;
1103       }
1104     }
1105     // For blocks with self-edges, do not penalize a reduction of the count,
1106     // as all of the increase can be attributed to the self-edge
1107     if (Block.HasSelfEdge) {
1108       AuxCostDec = 0;
1109     }
1110 
1111     Network.addEdge(Bin, Baux, AuxCostInc);
1112     Network.addEdge(Baux, Bout, AuxCostInc);
1113     if (Block.Weight > 0) {
1114       Network.addEdge(Bout, Baux, AuxCostDec);
1115       Network.addEdge(Baux, Bin, AuxCostDec);
1116     }
1117   }
1118 
1119   // Creating edges for every jump
1120   for (auto &Jump : Func.Jumps) {
1121     uint64_t Src = Jump.Source;
1122     uint64_t Dst = Jump.Target;
1123     if (Src != Dst) {
1124       uint64_t SrcOut = 3 * Src + 1;
1125       uint64_t DstIn = 3 * Dst;
1126       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
1127       Network.addEdge(SrcOut, DstIn, Cost);
1128     }
1129   }
1130 
1131   // Make sure we have a valid flow circulation
1132   Network.addEdge(T, S, 0);
1133 }
1134 
1135 /// Extract resulting block and edge counts from the flow network.
1136 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
1137   uint64_t NumBlocks = Func.Blocks.size();
1138 
1139   // Extract resulting block counts
1140   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
1141     auto &Block = Func.Blocks[Src];
1142     uint64_t SrcOut = 3 * Src + 1;
1143     int64_t Flow = 0;
1144     for (auto &Adj : Network.getFlow(SrcOut)) {
1145       uint64_t DstIn = Adj.first;
1146       int64_t DstFlow = Adj.second;
1147       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
1148       if (!IsAuxNode || Block.HasSelfEdge) {
1149         Flow += DstFlow;
1150       }
1151     }
1152     Block.Flow = Flow;
1153     assert(Flow >= 0 && "negative block flow");
1154   }
1155 
1156   // Extract resulting jump counts
1157   for (auto &Jump : Func.Jumps) {
1158     uint64_t Src = Jump.Source;
1159     uint64_t Dst = Jump.Target;
1160     int64_t Flow = 0;
1161     if (Src != Dst) {
1162       uint64_t SrcOut = 3 * Src + 1;
1163       uint64_t DstIn = 3 * Dst;
1164       Flow = Network.getFlow(SrcOut, DstIn);
1165     } else {
1166       uint64_t SrcOut = 3 * Src + 1;
1167       uint64_t SrcAux = 3 * Src + 2;
1168       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
1169       if (AuxFlow > 0)
1170         Flow = AuxFlow;
1171     }
1172     Jump.Flow = Flow;
1173     assert(Flow >= 0 && "negative jump flow");
1174   }
1175 }
1176 
1177 #ifndef NDEBUG
1178 /// Verify that the computed flow values satisfy flow conservation rules
1179 void verifyWeights(const FlowFunction &Func) {
1180   const uint64_t NumBlocks = Func.Blocks.size();
1181   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
1182   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
1183   for (auto &Jump : Func.Jumps) {
1184     InFlow[Jump.Target] += Jump.Flow;
1185     OutFlow[Jump.Source] += Jump.Flow;
1186   }
1187 
1188   uint64_t TotalInFlow = 0;
1189   uint64_t TotalOutFlow = 0;
1190   for (uint64_t I = 0; I < NumBlocks; I++) {
1191     auto &Block = Func.Blocks[I];
1192     if (Block.isEntry()) {
1193       TotalInFlow += Block.Flow;
1194       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1195     } else if (Block.isExit()) {
1196       TotalOutFlow += Block.Flow;
1197       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1198     } else {
1199       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
1200       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
1201     }
1202   }
1203   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
1204 
1205   // Verify that there are no isolated flow components
1206   // One could modify FlowFunction to hold edges indexed by the sources, which
1207   // will avoid a creation of the object
1208   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
1209   for (auto &Jump : Func.Jumps) {
1210     if (Jump.Flow > 0) {
1211       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
1212     }
1213   }
1214 
1215   // Run BFS from the source along edges with positive flow
1216   std::queue<uint64_t> Queue;
1217   auto Visited = BitVector(NumBlocks, false);
1218   Queue.push(Func.Entry);
1219   Visited[Func.Entry] = true;
1220   while (!Queue.empty()) {
1221     uint64_t Src = Queue.front();
1222     Queue.pop();
1223     for (uint64_t Dst : PositiveFlowEdges[Src]) {
1224       if (!Visited[Dst]) {
1225         Queue.push(Dst);
1226         Visited[Dst] = true;
1227       }
1228     }
1229   }
1230 
1231   // Verify that every block that has a positive flow is reached from the source
1232   // along edges with a positive flow
1233   for (uint64_t I = 0; I < NumBlocks; I++) {
1234     auto &Block = Func.Blocks[I];
1235     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
1236   }
1237 }
1238 #endif
1239 
1240 } // end of anonymous namespace
1241 
1242 /// Apply the profile inference algorithm for a given flow function
1243 void llvm::applyFlowInference(FlowFunction &Func) {
1244   // Create and apply an inference network model
1245   auto InferenceNetwork = MinCostMaxFlow();
1246   initializeNetwork(InferenceNetwork, Func);
1247   InferenceNetwork.run();
1248 
1249   // Extract flow values for every block and every edge
1250   extractWeights(InferenceNetwork, Func);
1251 
1252   // Post-processing adjustments to the flow
1253   auto Adjuster = FlowAdjuster(Func);
1254   Adjuster.run();
1255 
1256 #ifndef NDEBUG
1257   // Verify the result
1258   verifyWeights(Func);
1259 #endif
1260 }
1261