xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp (revision 4824e7fd18a1223177218d4aec1b3c6c5c4a444e)
1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a profile inference algorithm. Given an incomplete and
10 // possibly imprecise block counts, the algorithm reconstructs realistic block
11 // and edge counts that satisfy flow conservation rules, while minimally modify
12 // input block counts.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Utils/SampleProfileInference.h"
17 #include "llvm/Support/Debug.h"
18 #include <queue>
19 #include <set>
20 
21 using namespace llvm;
22 #define DEBUG_TYPE "sample-profile-inference"
23 
24 namespace {
25 
26 /// A value indicating an infinite flow/capacity/weight of a block/edge.
27 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
28 /// during the execution.
29 static constexpr int64_t INF = ((int64_t)1) << 50;
30 
31 /// The minimum-cost maximum flow algorithm.
32 ///
33 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
34 /// network using a modified version of the classical Moore-Bellman-Ford
35 /// approach. The algorithm applies a number of augmentation iterations in which
36 /// flow is sent along paths of positive capacity from the source to the sink.
37 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
38 /// where m is the number of edges, n is the number of vertices, and v(f) is the
39 /// value of the maximum flow. However, the observed running time on typical
40 /// instances is sub-quadratic, that is, o(n^2).
41 ///
42 /// The input is a set of edges with specified costs and capacities, and a pair
43 /// of nodes (source and sink). The output is the flow along each edge of the
44 /// minimum total cost respecting the given edge capacities.
45 class MinCostMaxFlow {
46 public:
47   // Initialize algorithm's data structures for a network of a given size.
48   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
49     Source = SourceNode;
50     Target = SinkNode;
51 
52     Nodes = std::vector<Node>(NodeCount);
53     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
54   }
55 
56   // Run the algorithm.
57   int64_t run() {
58     // Find an augmenting path and update the flow along the path
59     size_t AugmentationIters = 0;
60     while (findAugmentingPath()) {
61       augmentFlowAlongPath();
62       AugmentationIters++;
63     }
64 
65     // Compute the total flow and its cost
66     int64_t TotalCost = 0;
67     int64_t TotalFlow = 0;
68     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
69       for (auto &Edge : Edges[Src]) {
70         if (Edge.Flow > 0) {
71           TotalCost += Edge.Cost * Edge.Flow;
72           if (Src == Source)
73             TotalFlow += Edge.Flow;
74         }
75       }
76     }
77     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
78                       << " iterations with " << TotalFlow << " total flow"
79                       << " of " << TotalCost << " cost\n");
80     (void)TotalFlow;
81     return TotalCost;
82   }
83 
84   /// Adding an edge to the network with a specified capacity and a cost.
85   /// Multiple edges between a pair of nodes are allowed but self-edges
86   /// are not supported.
87   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
88     assert(Capacity > 0 && "adding an edge of zero capacity");
89     assert(Src != Dst && "loop edge are not supported");
90 
91     Edge SrcEdge;
92     SrcEdge.Dst = Dst;
93     SrcEdge.Cost = Cost;
94     SrcEdge.Capacity = Capacity;
95     SrcEdge.Flow = 0;
96     SrcEdge.RevEdgeIndex = Edges[Dst].size();
97 
98     Edge DstEdge;
99     DstEdge.Dst = Src;
100     DstEdge.Cost = -Cost;
101     DstEdge.Capacity = 0;
102     DstEdge.Flow = 0;
103     DstEdge.RevEdgeIndex = Edges[Src].size();
104 
105     Edges[Src].push_back(SrcEdge);
106     Edges[Dst].push_back(DstEdge);
107   }
108 
109   /// Adding an edge to the network of infinite capacity and a given cost.
110   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
111     addEdge(Src, Dst, INF, Cost);
112   }
113 
114   /// Get the total flow from a given source node.
115   /// Returns a list of pairs (target node, amount of flow to the target).
116   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
117     std::vector<std::pair<uint64_t, int64_t>> Flow;
118     for (auto &Edge : Edges[Src]) {
119       if (Edge.Flow > 0)
120         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
121     }
122     return Flow;
123   }
124 
125   /// Get the total flow between a pair of nodes.
126   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
127     int64_t Flow = 0;
128     for (auto &Edge : Edges[Src]) {
129       if (Edge.Dst == Dst) {
130         Flow += Edge.Flow;
131       }
132     }
133     return Flow;
134   }
135 
136   /// A cost of increasing a block's count by one.
137   static constexpr int64_t AuxCostInc = 10;
138   /// A cost of decreasing a block's count by one.
139   static constexpr int64_t AuxCostDec = 20;
140   /// A cost of increasing a count of zero-weight block by one.
141   static constexpr int64_t AuxCostIncZero = 11;
142   /// A cost of increasing the entry block's count by one.
143   static constexpr int64_t AuxCostIncEntry = 40;
144   /// A cost of decreasing the entry block's count by one.
145   static constexpr int64_t AuxCostDecEntry = 10;
146   /// A cost of taking an unlikely jump.
147   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;
148 
149 private:
150   /// Check for existence of an augmenting path with a positive capacity.
151   bool findAugmentingPath() {
152     // Initialize data structures
153     for (auto &Node : Nodes) {
154       Node.Distance = INF;
155       Node.ParentNode = uint64_t(-1);
156       Node.ParentEdgeIndex = uint64_t(-1);
157       Node.Taken = false;
158     }
159 
160     std::queue<uint64_t> Queue;
161     Queue.push(Source);
162     Nodes[Source].Distance = 0;
163     Nodes[Source].Taken = true;
164     while (!Queue.empty()) {
165       uint64_t Src = Queue.front();
166       Queue.pop();
167       Nodes[Src].Taken = false;
168       // Although the residual network contains edges with negative costs
169       // (in particular, backward edges), it can be shown that there are no
170       // negative-weight cycles and the following two invariants are maintained:
171       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
172       // where Dist is the length of the shortest path between two nodes. This
173       // allows to prune the search-space of the path-finding algorithm using
174       // the following early-stop criteria:
175       // -- If we find a path with zero-distance from Source to Target, stop the
176       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
177       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
178       //    process node V, as it is guaranteed _not_ to be on a shortest path
179       //    from Source to Target; it follows from inequalities
180       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
181       //                         >= Dist[Source, V]
182       if (Nodes[Target].Distance == 0)
183         break;
184       if (Nodes[Src].Distance > Nodes[Target].Distance)
185         continue;
186 
187       // Process adjacent edges
188       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
189         auto &Edge = Edges[Src][EdgeIdx];
190         if (Edge.Flow < Edge.Capacity) {
191           uint64_t Dst = Edge.Dst;
192           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
193           if (Nodes[Dst].Distance > NewDistance) {
194             // Update the distance and the parent node/edge
195             Nodes[Dst].Distance = NewDistance;
196             Nodes[Dst].ParentNode = Src;
197             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
198             // Add the node to the queue, if it is not there yet
199             if (!Nodes[Dst].Taken) {
200               Queue.push(Dst);
201               Nodes[Dst].Taken = true;
202             }
203           }
204         }
205       }
206     }
207 
208     return Nodes[Target].Distance != INF;
209   }
210 
211   /// Update the current flow along the augmenting path.
212   void augmentFlowAlongPath() {
213     // Find path capacity
214     int64_t PathCapacity = INF;
215     uint64_t Now = Target;
216     while (Now != Source) {
217       uint64_t Pred = Nodes[Now].ParentNode;
218       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
219       PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
220       Now = Pred;
221     }
222 
223     assert(PathCapacity > 0 && "found incorrect augmenting path");
224 
225     // Update the flow along the path
226     Now = Target;
227     while (Now != Source) {
228       uint64_t Pred = Nodes[Now].ParentNode;
229       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
230       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
231 
232       Edge.Flow += PathCapacity;
233       RevEdge.Flow -= PathCapacity;
234 
235       Now = Pred;
236     }
237   }
238 
239   /// An node in a flow network.
240   struct Node {
241     /// The cost of the cheapest path from the source to the current node.
242     int64_t Distance;
243     /// The node preceding the current one in the path.
244     uint64_t ParentNode;
245     /// The index of the edge between ParentNode and the current node.
246     uint64_t ParentEdgeIndex;
247     /// An indicator of whether the current node is in a queue.
248     bool Taken;
249   };
250   /// An edge in a flow network.
251   struct Edge {
252     /// The cost of the edge.
253     int64_t Cost;
254     /// The capacity of the edge.
255     int64_t Capacity;
256     /// The current flow on the edge.
257     int64_t Flow;
258     /// The destination node of the edge.
259     uint64_t Dst;
260     /// The index of the reverse edge between Dst and the current node.
261     uint64_t RevEdgeIndex;
262   };
263 
264   /// The set of network nodes.
265   std::vector<Node> Nodes;
266   /// The set of network edges.
267   std::vector<std::vector<Edge>> Edges;
268   /// Source node of the flow.
269   uint64_t Source;
270   /// Target (sink) node of the flow.
271   uint64_t Target;
272 };
273 
274 /// Initializing flow network for a given function.
275 ///
276 /// Every block is split into three nodes that are responsible for (i) an
277 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
278 /// reduction of the block weight.
279 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
280   uint64_t NumBlocks = Func.Blocks.size();
281   assert(NumBlocks > 1 && "Too few blocks in a function");
282   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
283 
284   // Pre-process data: make sure the entry weight is at least 1
285   if (Func.Blocks[Func.Entry].Weight == 0) {
286     Func.Blocks[Func.Entry].Weight = 1;
287   }
288   // Introducing dummy source/sink pairs to allow flow circulation.
289   // The nodes corresponding to blocks of Func have indicies in the range
290   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
291   uint64_t S = 3 * NumBlocks;
292   uint64_t T = S + 1;
293   uint64_t S1 = S + 2;
294   uint64_t T1 = S + 3;
295 
296   Network.initialize(3 * NumBlocks + 4, S1, T1);
297 
298   // Create three nodes for every block of the function
299   for (uint64_t B = 0; B < NumBlocks; B++) {
300     auto &Block = Func.Blocks[B];
301     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
302            "non-zero weight of a block w/o weight except for an entry");
303 
304     // Split every block into two nodes
305     uint64_t Bin = 3 * B;
306     uint64_t Bout = 3 * B + 1;
307     uint64_t Baux = 3 * B + 2;
308     if (Block.Weight > 0) {
309       Network.addEdge(S1, Bout, Block.Weight, 0);
310       Network.addEdge(Bin, T1, Block.Weight, 0);
311     }
312 
313     // Edges from S and to T
314     assert((!Block.isEntry() || !Block.isExit()) &&
315            "a block cannot be an entry and an exit");
316     if (Block.isEntry()) {
317       Network.addEdge(S, Bin, 0);
318     } else if (Block.isExit()) {
319       Network.addEdge(Bout, T, 0);
320     }
321 
322     // An auxiliary node to allow increase/reduction of block counts:
323     // We assume that decreasing block counts is more expensive than increasing,
324     // and thus, setting separate costs here. In the future we may want to tune
325     // the relative costs so as to maximize the quality of generated profiles.
326     int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
327     int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
328     if (Block.UnknownWeight) {
329       // Do not penalize changing weights of blocks w/o known profile count
330       AuxCostInc = 0;
331       AuxCostDec = 0;
332     } else {
333       // Increasing the count for "cold" blocks with zero initial count is more
334       // expensive than for "hot" ones
335       if (Block.Weight == 0) {
336         AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
337       }
338       // Modifying the count of the entry block is expensive
339       if (Block.isEntry()) {
340         AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
341         AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
342       }
343     }
344     // For blocks with self-edges, do not penalize a reduction of the count,
345     // as all of the increase can be attributed to the self-edge
346     if (Block.HasSelfEdge) {
347       AuxCostDec = 0;
348     }
349 
350     Network.addEdge(Bin, Baux, AuxCostInc);
351     Network.addEdge(Baux, Bout, AuxCostInc);
352     if (Block.Weight > 0) {
353       Network.addEdge(Bout, Baux, AuxCostDec);
354       Network.addEdge(Baux, Bin, AuxCostDec);
355     }
356   }
357 
358   // Creating edges for every jump
359   for (auto &Jump : Func.Jumps) {
360     uint64_t Src = Jump.Source;
361     uint64_t Dst = Jump.Target;
362     if (Src != Dst) {
363       uint64_t SrcOut = 3 * Src + 1;
364       uint64_t DstIn = 3 * Dst;
365       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
366       Network.addEdge(SrcOut, DstIn, Cost);
367     }
368   }
369 
370   // Make sure we have a valid flow circulation
371   Network.addEdge(T, S, 0);
372 }
373 
374 /// Extract resulting block and edge counts from the flow network.
375 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
376   uint64_t NumBlocks = Func.Blocks.size();
377 
378   // Extract resulting block counts
379   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
380     auto &Block = Func.Blocks[Src];
381     uint64_t SrcOut = 3 * Src + 1;
382     int64_t Flow = 0;
383     for (auto &Adj : Network.getFlow(SrcOut)) {
384       uint64_t DstIn = Adj.first;
385       int64_t DstFlow = Adj.second;
386       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
387       if (!IsAuxNode || Block.HasSelfEdge) {
388         Flow += DstFlow;
389       }
390     }
391     Block.Flow = Flow;
392     assert(Flow >= 0 && "negative block flow");
393   }
394 
395   // Extract resulting jump counts
396   for (auto &Jump : Func.Jumps) {
397     uint64_t Src = Jump.Source;
398     uint64_t Dst = Jump.Target;
399     int64_t Flow = 0;
400     if (Src != Dst) {
401       uint64_t SrcOut = 3 * Src + 1;
402       uint64_t DstIn = 3 * Dst;
403       Flow = Network.getFlow(SrcOut, DstIn);
404     } else {
405       uint64_t SrcOut = 3 * Src + 1;
406       uint64_t SrcAux = 3 * Src + 2;
407       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
408       if (AuxFlow > 0)
409         Flow = AuxFlow;
410     }
411     Jump.Flow = Flow;
412     assert(Flow >= 0 && "negative jump flow");
413   }
414 }
415 
416 #ifndef NDEBUG
417 /// Verify that the computed flow values satisfy flow conservation rules
418 void verifyWeights(const FlowFunction &Func) {
419   const uint64_t NumBlocks = Func.Blocks.size();
420   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
421   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
422   for (auto &Jump : Func.Jumps) {
423     InFlow[Jump.Target] += Jump.Flow;
424     OutFlow[Jump.Source] += Jump.Flow;
425   }
426 
427   uint64_t TotalInFlow = 0;
428   uint64_t TotalOutFlow = 0;
429   for (uint64_t I = 0; I < NumBlocks; I++) {
430     auto &Block = Func.Blocks[I];
431     if (Block.isEntry()) {
432       TotalInFlow += Block.Flow;
433       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
434     } else if (Block.isExit()) {
435       TotalOutFlow += Block.Flow;
436       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
437     } else {
438       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
439       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
440     }
441   }
442   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
443 }
444 #endif
445 
446 } // end of anonymous namespace
447 
448 /// Apply the profile inference algorithm for a given flow function
449 void llvm::applyFlowInference(FlowFunction &Func) {
450   // Create and apply an inference network model
451   auto InferenceNetwork = MinCostMaxFlow();
452   initializeNetwork(InferenceNetwork, Func);
453   InferenceNetwork.run();
454 
455   // Extract flow values for every block and every edge
456   extractWeights(InferenceNetwork, Func);
457 
458 #ifndef NDEBUG
459   // Verify the result
460   verifyWeights(Func);
461 #endif
462 }
463