xref: /llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp (revision 5f4ae5645754043f087c2c4ac58c7bffb0921ce3)
1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a profile inference algorithm. Given an incomplete and
10 // possibly imprecise block counts, the algorithm reconstructs realistic block
11 // and edge counts that satisfy flow conservation rules, while minimally modify
12 // input block counts.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Utils/SampleProfileInference.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/Support/Debug.h"
19 #include <queue>
20 #include <set>
21 
22 using namespace llvm;
23 #define DEBUG_TYPE "sample-profile-inference"
24 
25 namespace {
26 
27 /// A value indicating an infinite flow/capacity/weight of a block/edge.
28 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
29 /// during the execution.
30 static constexpr int64_t INF = ((int64_t)1) << 50;
31 
32 /// The minimum-cost maximum flow algorithm.
33 ///
34 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
35 /// network using a modified version of the classical Moore-Bellman-Ford
36 /// approach. The algorithm applies a number of augmentation iterations in which
37 /// flow is sent along paths of positive capacity from the source to the sink.
38 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
39 /// where m is the number of edges, n is the number of vertices, and v(f) is the
40 /// value of the maximum flow. However, the observed running time on typical
41 /// instances is sub-quadratic, that is, o(n^2).
42 ///
43 /// The input is a set of edges with specified costs and capacities, and a pair
44 /// of nodes (source and sink). The output is the flow along each edge of the
45 /// minimum total cost respecting the given edge capacities.
46 class MinCostMaxFlow {
47 public:
48   // Initialize algorithm's data structures for a network of a given size.
49   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
50     Source = SourceNode;
51     Target = SinkNode;
52 
53     Nodes = std::vector<Node>(NodeCount);
54     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
55   }
56 
57   // Run the algorithm.
58   int64_t run() {
59     // Find an augmenting path and update the flow along the path
60     size_t AugmentationIters = 0;
61     while (findAugmentingPath()) {
62       augmentFlowAlongPath();
63       AugmentationIters++;
64     }
65 
66     // Compute the total flow and its cost
67     int64_t TotalCost = 0;
68     int64_t TotalFlow = 0;
69     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
70       for (auto &Edge : Edges[Src]) {
71         if (Edge.Flow > 0) {
72           TotalCost += Edge.Cost * Edge.Flow;
73           if (Src == Source)
74             TotalFlow += Edge.Flow;
75         }
76       }
77     }
78     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
79                       << " iterations with " << TotalFlow << " total flow"
80                       << " of " << TotalCost << " cost\n");
81     (void)TotalFlow;
82     return TotalCost;
83   }
84 
85   /// Adding an edge to the network with a specified capacity and a cost.
86   /// Multiple edges between a pair of nodes are allowed but self-edges
87   /// are not supported.
88   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
89     assert(Capacity > 0 && "adding an edge of zero capacity");
90     assert(Src != Dst && "loop edge are not supported");
91 
92     Edge SrcEdge;
93     SrcEdge.Dst = Dst;
94     SrcEdge.Cost = Cost;
95     SrcEdge.Capacity = Capacity;
96     SrcEdge.Flow = 0;
97     SrcEdge.RevEdgeIndex = Edges[Dst].size();
98 
99     Edge DstEdge;
100     DstEdge.Dst = Src;
101     DstEdge.Cost = -Cost;
102     DstEdge.Capacity = 0;
103     DstEdge.Flow = 0;
104     DstEdge.RevEdgeIndex = Edges[Src].size();
105 
106     Edges[Src].push_back(SrcEdge);
107     Edges[Dst].push_back(DstEdge);
108   }
109 
110   /// Adding an edge to the network of infinite capacity and a given cost.
111   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
112     addEdge(Src, Dst, INF, Cost);
113   }
114 
115   /// Get the total flow from a given source node.
116   /// Returns a list of pairs (target node, amount of flow to the target).
117   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
118     std::vector<std::pair<uint64_t, int64_t>> Flow;
119     for (auto &Edge : Edges[Src]) {
120       if (Edge.Flow > 0)
121         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
122     }
123     return Flow;
124   }
125 
126   /// Get the total flow between a pair of nodes.
127   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
128     int64_t Flow = 0;
129     for (auto &Edge : Edges[Src]) {
130       if (Edge.Dst == Dst) {
131         Flow += Edge.Flow;
132       }
133     }
134     return Flow;
135   }
136 
137   /// A cost of increasing a block's count by one.
138   static constexpr int64_t AuxCostInc = 10;
139   /// A cost of decreasing a block's count by one.
140   static constexpr int64_t AuxCostDec = 20;
141   /// A cost of increasing a count of zero-weight block by one.
142   static constexpr int64_t AuxCostIncZero = 11;
143   /// A cost of increasing the entry block's count by one.
144   static constexpr int64_t AuxCostIncEntry = 40;
145   /// A cost of decreasing the entry block's count by one.
146   static constexpr int64_t AuxCostDecEntry = 10;
147   /// A cost of taking an unlikely jump.
148   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;
149 
150 private:
151   /// Check for existence of an augmenting path with a positive capacity.
152   bool findAugmentingPath() {
153     // Initialize data structures
154     for (auto &Node : Nodes) {
155       Node.Distance = INF;
156       Node.ParentNode = uint64_t(-1);
157       Node.ParentEdgeIndex = uint64_t(-1);
158       Node.Taken = false;
159     }
160 
161     std::queue<uint64_t> Queue;
162     Queue.push(Source);
163     Nodes[Source].Distance = 0;
164     Nodes[Source].Taken = true;
165     while (!Queue.empty()) {
166       uint64_t Src = Queue.front();
167       Queue.pop();
168       Nodes[Src].Taken = false;
169       // Although the residual network contains edges with negative costs
170       // (in particular, backward edges), it can be shown that there are no
171       // negative-weight cycles and the following two invariants are maintained:
172       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
173       // where Dist is the length of the shortest path between two nodes. This
174       // allows to prune the search-space of the path-finding algorithm using
175       // the following early-stop criteria:
176       // -- If we find a path with zero-distance from Source to Target, stop the
177       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
178       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
179       //    process node V, as it is guaranteed _not_ to be on a shortest path
180       //    from Source to Target; it follows from inequalities
181       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
182       //                         >= Dist[Source, V]
183       if (Nodes[Target].Distance == 0)
184         break;
185       if (Nodes[Src].Distance > Nodes[Target].Distance)
186         continue;
187 
188       // Process adjacent edges
189       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
190         auto &Edge = Edges[Src][EdgeIdx];
191         if (Edge.Flow < Edge.Capacity) {
192           uint64_t Dst = Edge.Dst;
193           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
194           if (Nodes[Dst].Distance > NewDistance) {
195             // Update the distance and the parent node/edge
196             Nodes[Dst].Distance = NewDistance;
197             Nodes[Dst].ParentNode = Src;
198             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
199             // Add the node to the queue, if it is not there yet
200             if (!Nodes[Dst].Taken) {
201               Queue.push(Dst);
202               Nodes[Dst].Taken = true;
203             }
204           }
205         }
206       }
207     }
208 
209     return Nodes[Target].Distance != INF;
210   }
211 
212   /// Update the current flow along the augmenting path.
213   void augmentFlowAlongPath() {
214     // Find path capacity
215     int64_t PathCapacity = INF;
216     uint64_t Now = Target;
217     while (Now != Source) {
218       uint64_t Pred = Nodes[Now].ParentNode;
219       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
220       PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
221       Now = Pred;
222     }
223 
224     assert(PathCapacity > 0 && "found an incorrect augmenting path");
225 
226     // Update the flow along the path
227     Now = Target;
228     while (Now != Source) {
229       uint64_t Pred = Nodes[Now].ParentNode;
230       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
231       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
232 
233       Edge.Flow += PathCapacity;
234       RevEdge.Flow -= PathCapacity;
235 
236       Now = Pred;
237     }
238   }
239 
240   /// An node in a flow network.
241   struct Node {
242     /// The cost of the cheapest path from the source to the current node.
243     int64_t Distance;
244     /// The node preceding the current one in the path.
245     uint64_t ParentNode;
246     /// The index of the edge between ParentNode and the current node.
247     uint64_t ParentEdgeIndex;
248     /// An indicator of whether the current node is in a queue.
249     bool Taken;
250   };
251   /// An edge in a flow network.
252   struct Edge {
253     /// The cost of the edge.
254     int64_t Cost;
255     /// The capacity of the edge.
256     int64_t Capacity;
257     /// The current flow on the edge.
258     int64_t Flow;
259     /// The destination node of the edge.
260     uint64_t Dst;
261     /// The index of the reverse edge between Dst and the current node.
262     uint64_t RevEdgeIndex;
263   };
264 
265   /// The set of network nodes.
266   std::vector<Node> Nodes;
267   /// The set of network edges.
268   std::vector<std::vector<Edge>> Edges;
269   /// Source node of the flow.
270   uint64_t Source;
271   /// Target (sink) node of the flow.
272   uint64_t Target;
273 };
274 
275 /// A post-processing adjustment of control flow. It applies two steps by
276 /// rerouting some flow and making it more realistic:
277 ///
278 /// - First, it removes all isolated components ("islands") with a positive flow
279 ///   that are unreachable from the entry block. For every such component, we
280 ///   find the shortest from the entry to an exit passing through the component,
281 ///   and increase the flow by one unit along the path.
282 ///
283 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
284 ///   with no sampled counts. Then it rebalnces the flow that goes through such
285 ///   a subgraph so that each branch is taken with probability 50%.
286 ///   An unknown subgraph is such that for every two nodes u and v:
287 ///     - u dominates v and u is not unknown;
288 ///     - v post-dominates u; and
289 ///     - all inner-nodes of all (u,v)-paths are unknown.
290 ///
291 class FlowAdjuster {
292 public:
293   FlowAdjuster(FlowFunction &Func) : Func(Func) {
294     assert(Func.Blocks[Func.Entry].isEntry() &&
295            "incorrect index of the entry block");
296   }
297 
298   // Run the post-processing
299   void run() {
300     /// Adjust the flow to get rid of isolated components.
301     joinIsolatedComponents();
302 
303     /// Rebalance the flow inside unknown subgraphs.
304     rebalanceUnknownSubgraphs();
305   }
306 
307   /// The probability for the first successor of a unknown subgraph
308   static constexpr double UnknownFirstSuccProbability = 0.5;
309 
310 private:
311   void joinIsolatedComponents() {
312     // Find blocks that are reachable from the source
313     auto Visited = BitVector(NumBlocks(), false);
314     findReachable(Func.Entry, Visited);
315 
316     // Iterate over all non-reachable blocks and adjust their weights
317     for (uint64_t I = 0; I < NumBlocks(); I++) {
318       auto &Block = Func.Blocks[I];
319       if (Block.Flow > 0 && !Visited[I]) {
320         // Find a path from the entry to an exit passing through the block I
321         auto Path = findShortestPath(I);
322         // Increase the flow along the path
323         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
324                "incorrectly computed path adjusting control flow");
325         Func.Blocks[Func.Entry].Flow += 1;
326         for (auto &Jump : Path) {
327           Jump->Flow += 1;
328           Func.Blocks[Jump->Target].Flow += 1;
329           // Update reachability
330           findReachable(Jump->Target, Visited);
331         }
332       }
333     }
334   }
335 
336   /// Run BFS from a given block along the jumps with a positive flow and mark
337   /// all reachable blocks.
338   void findReachable(uint64_t Src, BitVector &Visited) {
339     if (Visited[Src])
340       return;
341     std::queue<uint64_t> Queue;
342     Queue.push(Src);
343     Visited[Src] = true;
344     while (!Queue.empty()) {
345       Src = Queue.front();
346       Queue.pop();
347       for (auto Jump : Func.Blocks[Src].SuccJumps) {
348         uint64_t Dst = Jump->Target;
349         if (Jump->Flow > 0 && !Visited[Dst]) {
350           Queue.push(Dst);
351           Visited[Dst] = true;
352         }
353       }
354     }
355   }
356 
357   /// Find the shortest path from the entry block to an exit block passing
358   /// through a given block.
359   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
360     // A path from the entry block to BlockIdx
361     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
362     // A path from BlockIdx to an exit block
363     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
364 
365     // Concatenate the two paths
366     std::vector<FlowJump *> Result;
367     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
368     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
369     return Result;
370   }
371 
372   /// Apply the Dijkstra algorithm to find the shortest path from a given
373   /// Source to a given Target block.
374   /// If Target == -1, then the path ends at an exit block.
375   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
376     // Quit early, if possible
377     if (Source == Target)
378       return std::vector<FlowJump *>();
379     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
380       return std::vector<FlowJump *>();
381 
382     // Initialize data structures
383     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
384     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
385     Distance[Source] = 0;
386     std::set<std::pair<uint64_t, uint64_t>> Queue;
387     Queue.insert(std::make_pair(Distance[Source], Source));
388 
389     // Run the Dijkstra algorithm
390     while (!Queue.empty()) {
391       uint64_t Src = Queue.begin()->second;
392       Queue.erase(Queue.begin());
393       // If we found a solution, quit early
394       if (Src == Target ||
395           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
396         break;
397 
398       for (auto Jump : Func.Blocks[Src].SuccJumps) {
399         uint64_t Dst = Jump->Target;
400         int64_t JumpDist = jumpDistance(Jump);
401         if (Distance[Dst] > Distance[Src] + JumpDist) {
402           Queue.erase(std::make_pair(Distance[Dst], Dst));
403 
404           Distance[Dst] = Distance[Src] + JumpDist;
405           Parent[Dst] = Jump;
406 
407           Queue.insert(std::make_pair(Distance[Dst], Dst));
408         }
409       }
410     }
411     // If Target is not provided, find the closest exit block
412     if (Target == AnyExitBlock) {
413       for (uint64_t I = 0; I < NumBlocks(); I++) {
414         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
415           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
416             Target = I;
417           }
418         }
419       }
420     }
421     assert(Parent[Target] != nullptr && "a path does not exist");
422 
423     // Extract the constructed path
424     std::vector<FlowJump *> Result;
425     uint64_t Now = Target;
426     while (Now != Source) {
427       assert(Now == Parent[Now]->Target && "incorrect parent jump");
428       Result.push_back(Parent[Now]);
429       Now = Parent[Now]->Source;
430     }
431     // Reverse the path, since it is extracted from Target to Source
432     std::reverse(Result.begin(), Result.end());
433     return Result;
434   }
435 
436   /// A distance of a path for a given jump.
437   /// In order to incite the path to use blocks/jumps with large positive flow,
438   /// and avoid changing branch probability of outgoing edges drastically,
439   /// set the distance as follows:
440   ///   if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
441   ///   if Block.Weight > 0, then distance = 1
442   ///   otherwise distance >> 1
443   int64_t jumpDistance(FlowJump *Jump) const {
444     int64_t BaseDistance = 100;
445     if (Jump->IsUnlikely)
446       return MinCostMaxFlow::AuxCostUnlikely;
447     if (Jump->Flow > 0)
448       return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
449     if (Func.Blocks[Jump->Target].Weight > 0)
450       return BaseDistance;
451     return BaseDistance * (NumBlocks() + 1);
452   };
453 
454   uint64_t NumBlocks() const { return Func.Blocks.size(); }
455 
456   /// Rebalance unknown subgraphs so as each branch splits with probabilities
457   /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability
458   void rebalanceUnknownSubgraphs() {
459     static_assert(
460         UnknownFirstSuccProbability >= 0.0 &&
461             UnknownFirstSuccProbability <= 1.0,
462         "the share of the unknown successor should be between 0 and 1");
463     // Try to find unknown subgraphs from each non-unknown block
464     for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
465       auto SrcBlock = &Func.Blocks[I];
466       // Do not attempt to find unknown successors from a unknown or a
467       // zero-flow block
468       if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
469         continue;
470 
471       std::vector<FlowBlock *> UnknownSuccs;
472       FlowBlock *DstBlock = nullptr;
473       // Find a unknown subgraphs starting at block SrcBlock
474       if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs))
475         continue;
476       // At the moment, we do not rebalance subgraphs containing cycles among
477       // unknown blocks
478       if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs))
479         continue;
480 
481       // Rebalance the flow
482       rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs);
483     }
484   }
485 
486   /// Find a unknown subgraph starting at block SrcBlock.
487   /// If the search is successful, the method sets DstBlock and UnknownSuccs.
488   bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock,
489                            std::vector<FlowBlock *> &UnknownSuccs) {
490     // Run BFS from SrcBlock and make sure all paths are going through unknown
491     // blocks and end at a non-unknown DstBlock
492     auto Visited = BitVector(NumBlocks(), false);
493     std::queue<uint64_t> Queue;
494     DstBlock = nullptr;
495 
496     Queue.push(SrcBlock->Index);
497     Visited[SrcBlock->Index] = true;
498     while (!Queue.empty()) {
499       auto &Block = Func.Blocks[Queue.front()];
500       Queue.pop();
501       // Process blocks reachable from Block
502       for (auto Jump : Block.SuccJumps) {
503         uint64_t Dst = Jump->Target;
504         if (Visited[Dst])
505           continue;
506         Visited[Dst] = true;
507         if (!Func.Blocks[Dst].UnknownWeight) {
508           // If we see non-unique non-unknown block reachable from SrcBlock,
509           // stop processing and skip rebalancing
510           FlowBlock *CandidateDstBlock = &Func.Blocks[Dst];
511           if (DstBlock != nullptr && DstBlock != CandidateDstBlock)
512             return false;
513           DstBlock = CandidateDstBlock;
514         } else {
515           Queue.push(Dst);
516           UnknownSuccs.push_back(&Func.Blocks[Dst]);
517         }
518       }
519     }
520 
521     // If the list of unknown blocks is empty, we don't need rebalancing
522     if (UnknownSuccs.empty())
523       return false;
524     // If all reachable nodes from SrcBlock are unknown, skip rebalancing
525     if (DstBlock == nullptr)
526       return false;
527     // If any of the unknown blocks is an exit block, skip rebalancing
528     for (auto Block : UnknownSuccs) {
529       if (Block->isExit())
530         return false;
531     }
532 
533     return true;
534   }
535 
536   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
537   /// UnknownSuccs in the topological order (so that all jumps are "forward").
538   bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
539                          std::vector<FlowBlock *> &UnknownSuccs) {
540     // Extract local in-degrees in the considered subgraph
541     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
542     for (auto Jump : SrcBlock->SuccJumps) {
543       LocalInDegree[Jump->Target]++;
544     }
545     for (uint64_t I = 0; I < UnknownSuccs.size(); I++) {
546       for (auto Jump : UnknownSuccs[I]->SuccJumps) {
547         LocalInDegree[Jump->Target]++;
548       }
549     }
550     // A loop containing SrcBlock
551     if (LocalInDegree[SrcBlock->Index] > 0)
552       return false;
553 
554     std::vector<FlowBlock *> AcyclicOrder;
555     std::queue<uint64_t> Queue;
556     Queue.push(SrcBlock->Index);
557     while (!Queue.empty()) {
558       auto &Block = Func.Blocks[Queue.front()];
559       Queue.pop();
560       // Stop propagation once we reach DstBlock
561       if (Block.Index == DstBlock->Index)
562         break;
563 
564       AcyclicOrder.push_back(&Block);
565       // Add to the queue all successors with zero local in-degree
566       for (auto Jump : Block.SuccJumps) {
567         uint64_t Dst = Jump->Target;
568         LocalInDegree[Dst]--;
569         if (LocalInDegree[Dst] == 0) {
570           Queue.push(Dst);
571         }
572       }
573     }
574 
575     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
576     // of all blocks
577     if (UnknownSuccs.size() + 1 != AcyclicOrder.size())
578       return false;
579     UnknownSuccs = AcyclicOrder;
580     return true;
581   }
582 
583   /// Rebalance a given subgraph.
584   void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
585                                 std::vector<FlowBlock *> &UnknownSuccs) {
586     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
587     assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns");
588 
589     for (auto Block : UnknownSuccs) {
590       // Block's flow is the sum of incoming flows
591       uint64_t TotalFlow = 0;
592       if (Block == SrcBlock) {
593         TotalFlow = Block->Flow;
594       } else {
595         for (auto Jump : Block->PredJumps) {
596           TotalFlow += Jump->Flow;
597         }
598         Block->Flow = TotalFlow;
599       }
600 
601       // Process all successor jumps and update corresponding flow values
602       for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) {
603         auto Jump = Block->SuccJumps[I];
604         if (I + 1 == Block->SuccJumps.size()) {
605           Jump->Flow = TotalFlow;
606           continue;
607         }
608         uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability);
609         Jump->Flow = Flow;
610         TotalFlow -= Flow;
611       }
612     }
613   }
614 
615   /// A constant indicating an arbitrary exit block of a function.
616   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
617 
618   /// The function.
619   FlowFunction &Func;
620 };
621 
622 /// Initializing flow network for a given function.
623 ///
624 /// Every block is split into three nodes that are responsible for (i) an
625 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
626 /// reduction of the block weight.
627 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
628   uint64_t NumBlocks = Func.Blocks.size();
629   assert(NumBlocks > 1 && "Too few blocks in a function");
630   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
631 
632   // Pre-process data: make sure the entry weight is at least 1
633   if (Func.Blocks[Func.Entry].Weight == 0) {
634     Func.Blocks[Func.Entry].Weight = 1;
635   }
636   // Introducing dummy source/sink pairs to allow flow circulation.
637   // The nodes corresponding to blocks of Func have indicies in the range
638   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
639   uint64_t S = 3 * NumBlocks;
640   uint64_t T = S + 1;
641   uint64_t S1 = S + 2;
642   uint64_t T1 = S + 3;
643 
644   Network.initialize(3 * NumBlocks + 4, S1, T1);
645 
646   // Create three nodes for every block of the function
647   for (uint64_t B = 0; B < NumBlocks; B++) {
648     auto &Block = Func.Blocks[B];
649     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
650            "non-zero weight of a block w/o weight except for an entry");
651 
652     // Split every block into two nodes
653     uint64_t Bin = 3 * B;
654     uint64_t Bout = 3 * B + 1;
655     uint64_t Baux = 3 * B + 2;
656     if (Block.Weight > 0) {
657       Network.addEdge(S1, Bout, Block.Weight, 0);
658       Network.addEdge(Bin, T1, Block.Weight, 0);
659     }
660 
661     // Edges from S and to T
662     assert((!Block.isEntry() || !Block.isExit()) &&
663            "a block cannot be an entry and an exit");
664     if (Block.isEntry()) {
665       Network.addEdge(S, Bin, 0);
666     } else if (Block.isExit()) {
667       Network.addEdge(Bout, T, 0);
668     }
669 
670     // An auxiliary node to allow increase/reduction of block counts:
671     // We assume that decreasing block counts is more expensive than increasing,
672     // and thus, setting separate costs here. In the future we may want to tune
673     // the relative costs so as to maximize the quality of generated profiles.
674     int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
675     int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
676     if (Block.UnknownWeight) {
677       // Do not penalize changing weights of blocks w/o known profile count
678       AuxCostInc = 0;
679       AuxCostDec = 0;
680     } else {
681       // Increasing the count for "cold" blocks with zero initial count is more
682       // expensive than for "hot" ones
683       if (Block.Weight == 0) {
684         AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
685       }
686       // Modifying the count of the entry block is expensive
687       if (Block.isEntry()) {
688         AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
689         AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
690       }
691     }
692     // For blocks with self-edges, do not penalize a reduction of the count,
693     // as all of the increase can be attributed to the self-edge
694     if (Block.HasSelfEdge) {
695       AuxCostDec = 0;
696     }
697 
698     Network.addEdge(Bin, Baux, AuxCostInc);
699     Network.addEdge(Baux, Bout, AuxCostInc);
700     if (Block.Weight > 0) {
701       Network.addEdge(Bout, Baux, AuxCostDec);
702       Network.addEdge(Baux, Bin, AuxCostDec);
703     }
704   }
705 
706   // Creating edges for every jump
707   for (auto &Jump : Func.Jumps) {
708     uint64_t Src = Jump.Source;
709     uint64_t Dst = Jump.Target;
710     if (Src != Dst) {
711       uint64_t SrcOut = 3 * Src + 1;
712       uint64_t DstIn = 3 * Dst;
713       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
714       Network.addEdge(SrcOut, DstIn, Cost);
715     }
716   }
717 
718   // Make sure we have a valid flow circulation
719   Network.addEdge(T, S, 0);
720 }
721 
722 /// Extract resulting block and edge counts from the flow network.
723 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
724   uint64_t NumBlocks = Func.Blocks.size();
725 
726   // Extract resulting block counts
727   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
728     auto &Block = Func.Blocks[Src];
729     uint64_t SrcOut = 3 * Src + 1;
730     int64_t Flow = 0;
731     for (auto &Adj : Network.getFlow(SrcOut)) {
732       uint64_t DstIn = Adj.first;
733       int64_t DstFlow = Adj.second;
734       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
735       if (!IsAuxNode || Block.HasSelfEdge) {
736         Flow += DstFlow;
737       }
738     }
739     Block.Flow = Flow;
740     assert(Flow >= 0 && "negative block flow");
741   }
742 
743   // Extract resulting jump counts
744   for (auto &Jump : Func.Jumps) {
745     uint64_t Src = Jump.Source;
746     uint64_t Dst = Jump.Target;
747     int64_t Flow = 0;
748     if (Src != Dst) {
749       uint64_t SrcOut = 3 * Src + 1;
750       uint64_t DstIn = 3 * Dst;
751       Flow = Network.getFlow(SrcOut, DstIn);
752     } else {
753       uint64_t SrcOut = 3 * Src + 1;
754       uint64_t SrcAux = 3 * Src + 2;
755       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
756       if (AuxFlow > 0)
757         Flow = AuxFlow;
758     }
759     Jump.Flow = Flow;
760     assert(Flow >= 0 && "negative jump flow");
761   }
762 }
763 
764 #ifndef NDEBUG
765 /// Verify that the computed flow values satisfy flow conservation rules
766 void verifyWeights(const FlowFunction &Func) {
767   const uint64_t NumBlocks = Func.Blocks.size();
768   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
769   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
770   for (auto &Jump : Func.Jumps) {
771     InFlow[Jump.Target] += Jump.Flow;
772     OutFlow[Jump.Source] += Jump.Flow;
773   }
774 
775   uint64_t TotalInFlow = 0;
776   uint64_t TotalOutFlow = 0;
777   for (uint64_t I = 0; I < NumBlocks; I++) {
778     auto &Block = Func.Blocks[I];
779     if (Block.isEntry()) {
780       TotalInFlow += Block.Flow;
781       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
782     } else if (Block.isExit()) {
783       TotalOutFlow += Block.Flow;
784       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
785     } else {
786       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
787       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
788     }
789   }
790   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
791 
792   // Verify that there are no isolated flow components
793   // One could modify FlowFunction to hold edges indexed by the sources, which
794   // will avoid a creation of the object
795   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
796   for (auto &Jump : Func.Jumps) {
797     if (Jump.Flow > 0) {
798       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
799     }
800   }
801 
802   // Run BFS from the source along edges with positive flow
803   std::queue<uint64_t> Queue;
804   auto Visited = BitVector(NumBlocks, false);
805   Queue.push(Func.Entry);
806   Visited[Func.Entry] = true;
807   while (!Queue.empty()) {
808     uint64_t Src = Queue.front();
809     Queue.pop();
810     for (uint64_t Dst : PositiveFlowEdges[Src]) {
811       if (!Visited[Dst]) {
812         Queue.push(Dst);
813         Visited[Dst] = true;
814       }
815     }
816   }
817 
818   // Verify that every block that has a positive flow is reached from the source
819   // along edges with a positive flow
820   for (uint64_t I = 0; I < NumBlocks; I++) {
821     auto &Block = Func.Blocks[I];
822     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
823   }
824 }
825 #endif
826 
827 } // end of anonymous namespace
828 
829 /// Apply the profile inference algorithm for a given flow function
830 void llvm::applyFlowInference(FlowFunction &Func) {
831   // Create and apply an inference network model
832   auto InferenceNetwork = MinCostMaxFlow();
833   initializeNetwork(InferenceNetwork, Func);
834   InferenceNetwork.run();
835 
836   // Extract flow values for every block and every edge
837   extractWeights(InferenceNetwork, Func);
838 
839   // Post-processing adjustments to the flow
840   auto Adjuster = FlowAdjuster(Func);
841   Adjuster.run();
842 
843 #ifndef NDEBUG
844   // Verify the result
845   verifyWeights(Func);
846 #endif
847 }
848