xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a profile inference algorithm. Given an incomplete and
10 // possibly imprecise block counts, the algorithm reconstructs realistic block
11 // and edge counts that satisfy flow conservation rules, while minimally modify
12 // input block counts.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Utils/SampleProfileInference.h"
17 #include "llvm/Support/Debug.h"
18 #include <queue>
19 #include <set>
20 
21 using namespace llvm;
22 #define DEBUG_TYPE "sample-profile-inference"
23 
24 namespace {
25 
26 /// A value indicating an infinite flow/capacity/weight of a block/edge.
27 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
28 /// during the execution.
29 static constexpr int64_t INF = ((int64_t)1) << 50;
30 
31 /// The minimum-cost maximum flow algorithm.
32 ///
33 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
34 /// network using a modified version of the classical Moore-Bellman-Ford
35 /// approach. The algorithm applies a number of augmentation iterations in which
36 /// flow is sent along paths of positive capacity from the source to the sink.
37 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
38 /// where m is the number of edges, n is the number of vertices, and v(f) is the
39 /// value of the maximum flow. However, the observed running time on typical
40 /// instances is sub-quadratic, that is, o(n^2).
41 ///
42 /// The input is a set of edges with specified costs and capacities, and a pair
43 /// of nodes (source and sink). The output is the flow along each edge of the
44 /// minimum total cost respecting the given edge capacities.
45 class MinCostMaxFlow {
46 public:
47   // Initialize algorithm's data structures for a network of a given size.
48   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
49     Source = SourceNode;
50     Target = SinkNode;
51 
52     Nodes = std::vector<Node>(NodeCount);
53     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
54   }
55 
56   // Run the algorithm.
57   int64_t run() {
58     // Find an augmenting path and update the flow along the path
59     size_t AugmentationIters = 0;
60     while (findAugmentingPath()) {
61       augmentFlowAlongPath();
62       AugmentationIters++;
63     }
64 
65     // Compute the total flow and its cost
66     int64_t TotalCost = 0;
67     int64_t TotalFlow = 0;
68     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
69       for (auto &Edge : Edges[Src]) {
70         if (Edge.Flow > 0) {
71           TotalCost += Edge.Cost * Edge.Flow;
72           if (Src == Source)
73             TotalFlow += Edge.Flow;
74         }
75       }
76     }
77     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
78                       << " iterations with " << TotalFlow << " total flow"
79                       << " of " << TotalCost << " cost\n");
80     (void)TotalFlow;
81     return TotalCost;
82   }
83 
84   /// Adding an edge to the network with a specified capacity and a cost.
85   /// Multiple edges between a pair of nodes are allowed but self-edges
86   /// are not supported.
87   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
88     assert(Capacity > 0 && "adding an edge of zero capacity");
89     assert(Src != Dst && "loop edge are not supported");
90 
91     Edge SrcEdge;
92     SrcEdge.Dst = Dst;
93     SrcEdge.Cost = Cost;
94     SrcEdge.Capacity = Capacity;
95     SrcEdge.Flow = 0;
96     SrcEdge.RevEdgeIndex = Edges[Dst].size();
97 
98     Edge DstEdge;
99     DstEdge.Dst = Src;
100     DstEdge.Cost = -Cost;
101     DstEdge.Capacity = 0;
102     DstEdge.Flow = 0;
103     DstEdge.RevEdgeIndex = Edges[Src].size();
104 
105     Edges[Src].push_back(SrcEdge);
106     Edges[Dst].push_back(DstEdge);
107   }
108 
109   /// Adding an edge to the network of infinite capacity and a given cost.
110   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
111     addEdge(Src, Dst, INF, Cost);
112   }
113 
114   /// Get the total flow from a given source node.
115   /// Returns a list of pairs (target node, amount of flow to the target).
116   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
117     std::vector<std::pair<uint64_t, int64_t>> Flow;
118     for (auto &Edge : Edges[Src]) {
119       if (Edge.Flow > 0)
120         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
121     }
122     return Flow;
123   }
124 
125   /// Get the total flow between a pair of nodes.
126   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
127     int64_t Flow = 0;
128     for (auto &Edge : Edges[Src]) {
129       if (Edge.Dst == Dst) {
130         Flow += Edge.Flow;
131       }
132     }
133     return Flow;
134   }
135 
136   /// A cost of increasing a block's count by one.
137   static constexpr int64_t AuxCostInc = 10;
138   /// A cost of decreasing a block's count by one.
139   static constexpr int64_t AuxCostDec = 20;
140   /// A cost of increasing a count of zero-weight block by one.
141   static constexpr int64_t AuxCostIncZero = 11;
142   /// A cost of increasing the entry block's count by one.
143   static constexpr int64_t AuxCostIncEntry = 40;
144   /// A cost of decreasing the entry block's count by one.
145   static constexpr int64_t AuxCostDecEntry = 10;
146   /// A cost of taking an unlikely jump.
147   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;
148 
149 private:
150   /// Check for existence of an augmenting path with a positive capacity.
151   bool findAugmentingPath() {
152     // Initialize data structures
153     for (auto &Node : Nodes) {
154       Node.Distance = INF;
155       Node.ParentNode = uint64_t(-1);
156       Node.ParentEdgeIndex = uint64_t(-1);
157       Node.Taken = false;
158     }
159 
160     std::queue<uint64_t> Queue;
161     Queue.push(Source);
162     Nodes[Source].Distance = 0;
163     Nodes[Source].Taken = true;
164     while (!Queue.empty()) {
165       uint64_t Src = Queue.front();
166       Queue.pop();
167       Nodes[Src].Taken = false;
168       // Although the residual network contains edges with negative costs
169       // (in particular, backward edges), it can be shown that there are no
170       // negative-weight cycles and the following two invariants are maintained:
171       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
172       // where Dist is the length of the shortest path between two nodes. This
173       // allows to prune the search-space of the path-finding algorithm using
174       // the following early-stop criteria:
175       // -- If we find a path with zero-distance from Source to Target, stop the
176       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
177       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
178       //    process node V, as it is guaranteed _not_ to be on a shortest path
179       //    from Source to Target; it follows from inequalities
180       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
181       //                         >= Dist[Source, V]
182       if (Nodes[Target].Distance == 0)
183         break;
184       if (Nodes[Src].Distance > Nodes[Target].Distance)
185         continue;
186 
187       // Process adjacent edges
188       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
189         auto &Edge = Edges[Src][EdgeIdx];
190         if (Edge.Flow < Edge.Capacity) {
191           uint64_t Dst = Edge.Dst;
192           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
193           if (Nodes[Dst].Distance > NewDistance) {
194             // Update the distance and the parent node/edge
195             Nodes[Dst].Distance = NewDistance;
196             Nodes[Dst].ParentNode = Src;
197             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
198             // Add the node to the queue, if it is not there yet
199             if (!Nodes[Dst].Taken) {
200               Queue.push(Dst);
201               Nodes[Dst].Taken = true;
202             }
203           }
204         }
205       }
206     }
207 
208     return Nodes[Target].Distance != INF;
209   }
210 
211   /// Update the current flow along the augmenting path.
212   void augmentFlowAlongPath() {
213     // Find path capacity
214     int64_t PathCapacity = INF;
215     uint64_t Now = Target;
216     while (Now != Source) {
217       uint64_t Pred = Nodes[Now].ParentNode;
218       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
219       PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
220       Now = Pred;
221     }
222 
223     assert(PathCapacity > 0 && "found an incorrect augmenting path");
224 
225     // Update the flow along the path
226     Now = Target;
227     while (Now != Source) {
228       uint64_t Pred = Nodes[Now].ParentNode;
229       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
230       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
231 
232       Edge.Flow += PathCapacity;
233       RevEdge.Flow -= PathCapacity;
234 
235       Now = Pred;
236     }
237   }
238 
239   /// An node in a flow network.
240   struct Node {
241     /// The cost of the cheapest path from the source to the current node.
242     int64_t Distance;
243     /// The node preceding the current one in the path.
244     uint64_t ParentNode;
245     /// The index of the edge between ParentNode and the current node.
246     uint64_t ParentEdgeIndex;
247     /// An indicator of whether the current node is in a queue.
248     bool Taken;
249   };
250   /// An edge in a flow network.
251   struct Edge {
252     /// The cost of the edge.
253     int64_t Cost;
254     /// The capacity of the edge.
255     int64_t Capacity;
256     /// The current flow on the edge.
257     int64_t Flow;
258     /// The destination node of the edge.
259     uint64_t Dst;
260     /// The index of the reverse edge between Dst and the current node.
261     uint64_t RevEdgeIndex;
262   };
263 
264   /// The set of network nodes.
265   std::vector<Node> Nodes;
266   /// The set of network edges.
267   std::vector<std::vector<Edge>> Edges;
268   /// Source node of the flow.
269   uint64_t Source;
270   /// Target (sink) node of the flow.
271   uint64_t Target;
272 };
273 
274 /// A post-processing adjustment of control flow. It applies two steps by
275 /// rerouting some flow and making it more realistic:
276 ///
277 /// - First, it removes all isolated components ("islands") with a positive flow
278 ///   that are unreachable from the entry block. For every such component, we
279 ///   find the shortest from the entry to an exit passing through the component,
280 ///   and increase the flow by one unit along the path.
281 ///
282 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
283 ///   with no sampled counts. Then it rebalnces the flow that goes through such
284 ///   a subgraph so that each branch is taken with probability 50%.
285 ///   An unknown subgraph is such that for every two nodes u and v:
286 ///     - u dominates v and u is not unknown;
287 ///     - v post-dominates u; and
288 ///     - all inner-nodes of all (u,v)-paths are unknown.
289 ///
290 class FlowAdjuster {
291 public:
292   FlowAdjuster(FlowFunction &Func) : Func(Func) {
293     assert(Func.Blocks[Func.Entry].isEntry() &&
294            "incorrect index of the entry block");
295   }
296 
297   // Run the post-processing
298   void run() {
299     /// Adjust the flow to get rid of isolated components.
300     joinIsolatedComponents();
301 
302     /// Rebalance the flow inside unknown subgraphs.
303     rebalanceUnknownSubgraphs();
304   }
305 
306   /// The probability for the first successor of a unknown subgraph
307   static constexpr double UnknownFirstSuccProbability = 0.5;
308 
309 private:
310   void joinIsolatedComponents() {
311     // Find blocks that are reachable from the source
312     auto Visited = std::vector<bool>(NumBlocks(), false);
313     findReachable(Func.Entry, Visited);
314 
315     // Iterate over all non-reachable blocks and adjust their weights
316     for (uint64_t I = 0; I < NumBlocks(); I++) {
317       auto &Block = Func.Blocks[I];
318       if (Block.Flow > 0 && !Visited[I]) {
319         // Find a path from the entry to an exit passing through the block I
320         auto Path = findShortestPath(I);
321         // Increase the flow along the path
322         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
323                "incorrectly computed path adjusting control flow");
324         Func.Blocks[Func.Entry].Flow += 1;
325         for (auto &Jump : Path) {
326           Jump->Flow += 1;
327           Func.Blocks[Jump->Target].Flow += 1;
328           // Update reachability
329           findReachable(Jump->Target, Visited);
330         }
331       }
332     }
333   }
334 
335   /// Run BFS from a given block along the jumps with a positive flow and mark
336   /// all reachable blocks.
337   void findReachable(uint64_t Src, std::vector<bool> &Visited) {
338     if (Visited[Src])
339       return;
340     std::queue<uint64_t> Queue;
341     Queue.push(Src);
342     Visited[Src] = true;
343     while (!Queue.empty()) {
344       Src = Queue.front();
345       Queue.pop();
346       for (auto Jump : Func.Blocks[Src].SuccJumps) {
347         uint64_t Dst = Jump->Target;
348         if (Jump->Flow > 0 && !Visited[Dst]) {
349           Queue.push(Dst);
350           Visited[Dst] = true;
351         }
352       }
353     }
354   }
355 
356   /// Find the shortest path from the entry block to an exit block passing
357   /// through a given block.
358   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
359     // A path from the entry block to BlockIdx
360     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
361     // A path from BlockIdx to an exit block
362     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
363 
364     // Concatenate the two paths
365     std::vector<FlowJump *> Result;
366     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
367     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
368     return Result;
369   }
370 
371   /// Apply the Dijkstra algorithm to find the shortest path from a given
372   /// Source to a given Target block.
373   /// If Target == -1, then the path ends at an exit block.
374   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
375     // Quit early, if possible
376     if (Source == Target)
377       return std::vector<FlowJump *>();
378     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
379       return std::vector<FlowJump *>();
380 
381     // Initialize data structures
382     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
383     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
384     Distance[Source] = 0;
385     std::set<std::pair<uint64_t, uint64_t>> Queue;
386     Queue.insert(std::make_pair(Distance[Source], Source));
387 
388     // Run the Dijkstra algorithm
389     while (!Queue.empty()) {
390       uint64_t Src = Queue.begin()->second;
391       Queue.erase(Queue.begin());
392       // If we found a solution, quit early
393       if (Src == Target ||
394           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
395         break;
396 
397       for (auto Jump : Func.Blocks[Src].SuccJumps) {
398         uint64_t Dst = Jump->Target;
399         int64_t JumpDist = jumpDistance(Jump);
400         if (Distance[Dst] > Distance[Src] + JumpDist) {
401           Queue.erase(std::make_pair(Distance[Dst], Dst));
402 
403           Distance[Dst] = Distance[Src] + JumpDist;
404           Parent[Dst] = Jump;
405 
406           Queue.insert(std::make_pair(Distance[Dst], Dst));
407         }
408       }
409     }
410     // If Target is not provided, find the closest exit block
411     if (Target == AnyExitBlock) {
412       for (uint64_t I = 0; I < NumBlocks(); I++) {
413         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
414           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
415             Target = I;
416           }
417         }
418       }
419     }
420     assert(Parent[Target] != nullptr && "a path does not exist");
421 
422     // Extract the constructed path
423     std::vector<FlowJump *> Result;
424     uint64_t Now = Target;
425     while (Now != Source) {
426       assert(Now == Parent[Now]->Target && "incorrect parent jump");
427       Result.push_back(Parent[Now]);
428       Now = Parent[Now]->Source;
429     }
430     // Reverse the path, since it is extracted from Target to Source
431     std::reverse(Result.begin(), Result.end());
432     return Result;
433   }
434 
435   /// A distance of a path for a given jump.
436   /// In order to incite the path to use blocks/jumps with large positive flow,
437   /// and avoid changing branch probability of outgoing edges drastically,
438   /// set the distance as follows:
439   ///   if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
440   ///   if Block.Weight > 0, then distance = 1
441   ///   otherwise distance >> 1
442   int64_t jumpDistance(FlowJump *Jump) const {
443     int64_t BaseDistance = 100;
444     if (Jump->IsUnlikely)
445       return MinCostMaxFlow::AuxCostUnlikely;
446     if (Jump->Flow > 0)
447       return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
448     if (Func.Blocks[Jump->Target].Weight > 0)
449       return BaseDistance;
450     return BaseDistance * (NumBlocks() + 1);
451   };
452 
453   uint64_t NumBlocks() const { return Func.Blocks.size(); }
454 
455   /// Rebalance unknown subgraphs so as each branch splits with probabilities
456   /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability
457   void rebalanceUnknownSubgraphs() {
458     assert(UnknownFirstSuccProbability >= 0.0 &&
459            UnknownFirstSuccProbability <= 1.0 &&
460            "the share of the unknown successor should be between 0 and 1");
461     // Try to find unknown subgraphs from each non-unknown block
462     for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
463       auto SrcBlock = &Func.Blocks[I];
464       // Do not attempt to find unknown successors from a unknown or a
465       // zero-flow block
466       if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
467         continue;
468 
469       std::vector<FlowBlock *> UnknownSuccs;
470       FlowBlock *DstBlock = nullptr;
471       // Find a unknown subgraphs starting at block SrcBlock
472       if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs))
473         continue;
474       // At the moment, we do not rebalance subgraphs containing cycles among
475       // unknown blocks
476       if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs))
477         continue;
478 
479       // Rebalance the flow
480       rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs);
481     }
482   }
483 
484   /// Find a unknown subgraph starting at block SrcBlock.
485   /// If the search is successful, the method sets DstBlock and UnknownSuccs.
486   bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock,
487                            std::vector<FlowBlock *> &UnknownSuccs) {
488     // Run BFS from SrcBlock and make sure all paths are going through unknown
489     // blocks and end at a non-unknown DstBlock
490     auto Visited = std::vector<bool>(NumBlocks(), false);
491     std::queue<uint64_t> Queue;
492     DstBlock = nullptr;
493 
494     Queue.push(SrcBlock->Index);
495     Visited[SrcBlock->Index] = true;
496     while (!Queue.empty()) {
497       auto &Block = Func.Blocks[Queue.front()];
498       Queue.pop();
499       // Process blocks reachable from Block
500       for (auto Jump : Block.SuccJumps) {
501         uint64_t Dst = Jump->Target;
502         if (Visited[Dst])
503           continue;
504         Visited[Dst] = true;
505         if (!Func.Blocks[Dst].UnknownWeight) {
506           // If we see non-unique non-unknown block reachable from SrcBlock,
507           // stop processing and skip rebalancing
508           FlowBlock *CandidateDstBlock = &Func.Blocks[Dst];
509           if (DstBlock != nullptr && DstBlock != CandidateDstBlock)
510             return false;
511           DstBlock = CandidateDstBlock;
512         } else {
513           Queue.push(Dst);
514           UnknownSuccs.push_back(&Func.Blocks[Dst]);
515         }
516       }
517     }
518 
519     // If the list of unknown blocks is empty, we don't need rebalancing
520     if (UnknownSuccs.empty())
521       return false;
522     // If all reachable nodes from SrcBlock are unknown, skip rebalancing
523     if (DstBlock == nullptr)
524       return false;
525     // If any of the unknown blocks is an exit block, skip rebalancing
526     for (auto Block : UnknownSuccs) {
527       if (Block->isExit())
528         return false;
529     }
530 
531     return true;
532   }
533 
534   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
535   /// UnknownSuccs in the topological order (so that all jumps are "forward").
536   bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
537                          std::vector<FlowBlock *> &UnknownSuccs) {
538     // Extract local in-degrees in the considered subgraph
539     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
540     for (auto Jump : SrcBlock->SuccJumps) {
541       LocalInDegree[Jump->Target]++;
542     }
543     for (uint64_t I = 0; I < UnknownSuccs.size(); I++) {
544       for (auto Jump : UnknownSuccs[I]->SuccJumps) {
545         LocalInDegree[Jump->Target]++;
546       }
547     }
548     // A loop containing SrcBlock
549     if (LocalInDegree[SrcBlock->Index] > 0)
550       return false;
551 
552     std::vector<FlowBlock *> AcyclicOrder;
553     std::queue<uint64_t> Queue;
554     Queue.push(SrcBlock->Index);
555     while (!Queue.empty()) {
556       auto &Block = Func.Blocks[Queue.front()];
557       Queue.pop();
558       // Stop propagation once we reach DstBlock
559       if (Block.Index == DstBlock->Index)
560         break;
561 
562       AcyclicOrder.push_back(&Block);
563       // Add to the queue all successors with zero local in-degree
564       for (auto Jump : Block.SuccJumps) {
565         uint64_t Dst = Jump->Target;
566         LocalInDegree[Dst]--;
567         if (LocalInDegree[Dst] == 0) {
568           Queue.push(Dst);
569         }
570       }
571     }
572 
573     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
574     // of all blocks
575     if (UnknownSuccs.size() + 1 != AcyclicOrder.size())
576       return false;
577     UnknownSuccs = AcyclicOrder;
578     return true;
579   }
580 
581   /// Rebalance a given subgraph.
582   void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
583                                 std::vector<FlowBlock *> &UnknownSuccs) {
584     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
585     assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns");
586 
587     for (auto Block : UnknownSuccs) {
588       // Block's flow is the sum of incoming flows
589       uint64_t TotalFlow = 0;
590       if (Block == SrcBlock) {
591         TotalFlow = Block->Flow;
592       } else {
593         for (auto Jump : Block->PredJumps) {
594           TotalFlow += Jump->Flow;
595         }
596         Block->Flow = TotalFlow;
597       }
598 
599       // Process all successor jumps and update corresponding flow values
600       for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) {
601         auto Jump = Block->SuccJumps[I];
602         if (I + 1 == Block->SuccJumps.size()) {
603           Jump->Flow = TotalFlow;
604           continue;
605         }
606         uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability);
607         Jump->Flow = Flow;
608         TotalFlow -= Flow;
609       }
610     }
611   }
612 
613   /// A constant indicating an arbitrary exit block of a function.
614   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
615 
616   /// The function.
617   FlowFunction &Func;
618 };
619 
620 /// Initializing flow network for a given function.
621 ///
622 /// Every block is split into three nodes that are responsible for (i) an
623 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
624 /// reduction of the block weight.
625 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
626   uint64_t NumBlocks = Func.Blocks.size();
627   assert(NumBlocks > 1 && "Too few blocks in a function");
628   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
629 
630   // Pre-process data: make sure the entry weight is at least 1
631   if (Func.Blocks[Func.Entry].Weight == 0) {
632     Func.Blocks[Func.Entry].Weight = 1;
633   }
634   // Introducing dummy source/sink pairs to allow flow circulation.
635   // The nodes corresponding to blocks of Func have indicies in the range
636   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
637   uint64_t S = 3 * NumBlocks;
638   uint64_t T = S + 1;
639   uint64_t S1 = S + 2;
640   uint64_t T1 = S + 3;
641 
642   Network.initialize(3 * NumBlocks + 4, S1, T1);
643 
644   // Create three nodes for every block of the function
645   for (uint64_t B = 0; B < NumBlocks; B++) {
646     auto &Block = Func.Blocks[B];
647     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
648            "non-zero weight of a block w/o weight except for an entry");
649 
650     // Split every block into two nodes
651     uint64_t Bin = 3 * B;
652     uint64_t Bout = 3 * B + 1;
653     uint64_t Baux = 3 * B + 2;
654     if (Block.Weight > 0) {
655       Network.addEdge(S1, Bout, Block.Weight, 0);
656       Network.addEdge(Bin, T1, Block.Weight, 0);
657     }
658 
659     // Edges from S and to T
660     assert((!Block.isEntry() || !Block.isExit()) &&
661            "a block cannot be an entry and an exit");
662     if (Block.isEntry()) {
663       Network.addEdge(S, Bin, 0);
664     } else if (Block.isExit()) {
665       Network.addEdge(Bout, T, 0);
666     }
667 
668     // An auxiliary node to allow increase/reduction of block counts:
669     // We assume that decreasing block counts is more expensive than increasing,
670     // and thus, setting separate costs here. In the future we may want to tune
671     // the relative costs so as to maximize the quality of generated profiles.
672     int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
673     int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
674     if (Block.UnknownWeight) {
675       // Do not penalize changing weights of blocks w/o known profile count
676       AuxCostInc = 0;
677       AuxCostDec = 0;
678     } else {
679       // Increasing the count for "cold" blocks with zero initial count is more
680       // expensive than for "hot" ones
681       if (Block.Weight == 0) {
682         AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
683       }
684       // Modifying the count of the entry block is expensive
685       if (Block.isEntry()) {
686         AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
687         AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
688       }
689     }
690     // For blocks with self-edges, do not penalize a reduction of the count,
691     // as all of the increase can be attributed to the self-edge
692     if (Block.HasSelfEdge) {
693       AuxCostDec = 0;
694     }
695 
696     Network.addEdge(Bin, Baux, AuxCostInc);
697     Network.addEdge(Baux, Bout, AuxCostInc);
698     if (Block.Weight > 0) {
699       Network.addEdge(Bout, Baux, AuxCostDec);
700       Network.addEdge(Baux, Bin, AuxCostDec);
701     }
702   }
703 
704   // Creating edges for every jump
705   for (auto &Jump : Func.Jumps) {
706     uint64_t Src = Jump.Source;
707     uint64_t Dst = Jump.Target;
708     if (Src != Dst) {
709       uint64_t SrcOut = 3 * Src + 1;
710       uint64_t DstIn = 3 * Dst;
711       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
712       Network.addEdge(SrcOut, DstIn, Cost);
713     }
714   }
715 
716   // Make sure we have a valid flow circulation
717   Network.addEdge(T, S, 0);
718 }
719 
720 /// Extract resulting block and edge counts from the flow network.
721 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
722   uint64_t NumBlocks = Func.Blocks.size();
723 
724   // Extract resulting block counts
725   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
726     auto &Block = Func.Blocks[Src];
727     uint64_t SrcOut = 3 * Src + 1;
728     int64_t Flow = 0;
729     for (auto &Adj : Network.getFlow(SrcOut)) {
730       uint64_t DstIn = Adj.first;
731       int64_t DstFlow = Adj.second;
732       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
733       if (!IsAuxNode || Block.HasSelfEdge) {
734         Flow += DstFlow;
735       }
736     }
737     Block.Flow = Flow;
738     assert(Flow >= 0 && "negative block flow");
739   }
740 
741   // Extract resulting jump counts
742   for (auto &Jump : Func.Jumps) {
743     uint64_t Src = Jump.Source;
744     uint64_t Dst = Jump.Target;
745     int64_t Flow = 0;
746     if (Src != Dst) {
747       uint64_t SrcOut = 3 * Src + 1;
748       uint64_t DstIn = 3 * Dst;
749       Flow = Network.getFlow(SrcOut, DstIn);
750     } else {
751       uint64_t SrcOut = 3 * Src + 1;
752       uint64_t SrcAux = 3 * Src + 2;
753       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
754       if (AuxFlow > 0)
755         Flow = AuxFlow;
756     }
757     Jump.Flow = Flow;
758     assert(Flow >= 0 && "negative jump flow");
759   }
760 }
761 
762 #ifndef NDEBUG
763 /// Verify that the computed flow values satisfy flow conservation rules
764 void verifyWeights(const FlowFunction &Func) {
765   const uint64_t NumBlocks = Func.Blocks.size();
766   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
767   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
768   for (auto &Jump : Func.Jumps) {
769     InFlow[Jump.Target] += Jump.Flow;
770     OutFlow[Jump.Source] += Jump.Flow;
771   }
772 
773   uint64_t TotalInFlow = 0;
774   uint64_t TotalOutFlow = 0;
775   for (uint64_t I = 0; I < NumBlocks; I++) {
776     auto &Block = Func.Blocks[I];
777     if (Block.isEntry()) {
778       TotalInFlow += Block.Flow;
779       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
780     } else if (Block.isExit()) {
781       TotalOutFlow += Block.Flow;
782       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
783     } else {
784       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
785       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
786     }
787   }
788   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
789 
790   // Verify that there are no isolated flow components
791   // One could modify FlowFunction to hold edges indexed by the sources, which
792   // will avoid a creation of the object
793   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
794   for (auto &Jump : Func.Jumps) {
795     if (Jump.Flow > 0) {
796       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
797     }
798   }
799 
800   // Run BFS from the source along edges with positive flow
801   std::queue<uint64_t> Queue;
802   auto Visited = std::vector<bool>(NumBlocks, false);
803   Queue.push(Func.Entry);
804   Visited[Func.Entry] = true;
805   while (!Queue.empty()) {
806     uint64_t Src = Queue.front();
807     Queue.pop();
808     for (uint64_t Dst : PositiveFlowEdges[Src]) {
809       if (!Visited[Dst]) {
810         Queue.push(Dst);
811         Visited[Dst] = true;
812       }
813     }
814   }
815 
816   // Verify that every block that has a positive flow is reached from the source
817   // along edges with a positive flow
818   for (uint64_t I = 0; I < NumBlocks; I++) {
819     auto &Block = Func.Blocks[I];
820     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
821   }
822 }
823 #endif
824 
825 } // end of anonymous namespace
826 
827 /// Apply the profile inference algorithm for a given flow function
828 void llvm::applyFlowInference(FlowFunction &Func) {
829   // Create and apply an inference network model
830   auto InferenceNetwork = MinCostMaxFlow();
831   initializeNetwork(InferenceNetwork, Func);
832   InferenceNetwork.run();
833 
834   // Extract flow values for every block and every edge
835   extractWeights(InferenceNetwork, Func);
836 
837   // Post-processing adjustments to the flow
838   auto Adjuster = FlowAdjuster(Func);
839   Adjuster.run();
840 
841 #ifndef NDEBUG
842   // Verify the result
843   verifyWeights(Func);
844 #endif
845 }
846