xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
14824e7fdSDimitry Andric //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
24824e7fdSDimitry Andric //
34824e7fdSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44824e7fdSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
54824e7fdSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64824e7fdSDimitry Andric //
74824e7fdSDimitry Andric //===----------------------------------------------------------------------===//
84824e7fdSDimitry Andric //
94824e7fdSDimitry Andric // This file implements a profile inference algorithm. Given an incomplete and
104824e7fdSDimitry Andric // possibly imprecise block counts, the algorithm reconstructs realistic block
114824e7fdSDimitry Andric // and edge counts that satisfy flow conservation rules, while minimally modify
124824e7fdSDimitry Andric // input block counts.
134824e7fdSDimitry Andric //
144824e7fdSDimitry Andric //===----------------------------------------------------------------------===//
154824e7fdSDimitry Andric 
164824e7fdSDimitry Andric #include "llvm/Transforms/Utils/SampleProfileInference.h"
174824e7fdSDimitry Andric #include "llvm/Support/Debug.h"
184824e7fdSDimitry Andric #include <queue>
194824e7fdSDimitry Andric #include <set>
204824e7fdSDimitry Andric 
214824e7fdSDimitry Andric using namespace llvm;
224824e7fdSDimitry Andric #define DEBUG_TYPE "sample-profile-inference"
234824e7fdSDimitry Andric 
244824e7fdSDimitry Andric namespace {
254824e7fdSDimitry Andric 
264824e7fdSDimitry Andric /// A value indicating an infinite flow/capacity/weight of a block/edge.
274824e7fdSDimitry Andric /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
284824e7fdSDimitry Andric /// during the execution.
294824e7fdSDimitry Andric static constexpr int64_t INF = ((int64_t)1) << 50;
304824e7fdSDimitry Andric 
314824e7fdSDimitry Andric /// The minimum-cost maximum flow algorithm.
324824e7fdSDimitry Andric ///
334824e7fdSDimitry Andric /// The algorithm finds the maximum flow of minimum cost on a given (directed)
344824e7fdSDimitry Andric /// network using a modified version of the classical Moore-Bellman-Ford
354824e7fdSDimitry Andric /// approach. The algorithm applies a number of augmentation iterations in which
364824e7fdSDimitry Andric /// flow is sent along paths of positive capacity from the source to the sink.
374824e7fdSDimitry Andric /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
384824e7fdSDimitry Andric /// where m is the number of edges, n is the number of vertices, and v(f) is the
394824e7fdSDimitry Andric /// value of the maximum flow. However, the observed running time on typical
404824e7fdSDimitry Andric /// instances is sub-quadratic, that is, o(n^2).
414824e7fdSDimitry Andric ///
424824e7fdSDimitry Andric /// The input is a set of edges with specified costs and capacities, and a pair
434824e7fdSDimitry Andric /// of nodes (source and sink). The output is the flow along each edge of the
444824e7fdSDimitry Andric /// minimum total cost respecting the given edge capacities.
454824e7fdSDimitry Andric class MinCostMaxFlow {
464824e7fdSDimitry Andric public:
474824e7fdSDimitry Andric   // Initialize algorithm's data structures for a network of a given size.
484824e7fdSDimitry Andric   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
494824e7fdSDimitry Andric     Source = SourceNode;
504824e7fdSDimitry Andric     Target = SinkNode;
514824e7fdSDimitry Andric 
524824e7fdSDimitry Andric     Nodes = std::vector<Node>(NodeCount);
534824e7fdSDimitry Andric     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
544824e7fdSDimitry Andric   }
554824e7fdSDimitry Andric 
564824e7fdSDimitry Andric   // Run the algorithm.
574824e7fdSDimitry Andric   int64_t run() {
584824e7fdSDimitry Andric     // Find an augmenting path and update the flow along the path
594824e7fdSDimitry Andric     size_t AugmentationIters = 0;
604824e7fdSDimitry Andric     while (findAugmentingPath()) {
614824e7fdSDimitry Andric       augmentFlowAlongPath();
624824e7fdSDimitry Andric       AugmentationIters++;
634824e7fdSDimitry Andric     }
644824e7fdSDimitry Andric 
654824e7fdSDimitry Andric     // Compute the total flow and its cost
664824e7fdSDimitry Andric     int64_t TotalCost = 0;
674824e7fdSDimitry Andric     int64_t TotalFlow = 0;
684824e7fdSDimitry Andric     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
694824e7fdSDimitry Andric       for (auto &Edge : Edges[Src]) {
704824e7fdSDimitry Andric         if (Edge.Flow > 0) {
714824e7fdSDimitry Andric           TotalCost += Edge.Cost * Edge.Flow;
724824e7fdSDimitry Andric           if (Src == Source)
734824e7fdSDimitry Andric             TotalFlow += Edge.Flow;
744824e7fdSDimitry Andric         }
754824e7fdSDimitry Andric       }
764824e7fdSDimitry Andric     }
774824e7fdSDimitry Andric     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
784824e7fdSDimitry Andric                       << " iterations with " << TotalFlow << " total flow"
794824e7fdSDimitry Andric                       << " of " << TotalCost << " cost\n");
804824e7fdSDimitry Andric     (void)TotalFlow;
814824e7fdSDimitry Andric     return TotalCost;
824824e7fdSDimitry Andric   }
834824e7fdSDimitry Andric 
844824e7fdSDimitry Andric   /// Adding an edge to the network with a specified capacity and a cost.
854824e7fdSDimitry Andric   /// Multiple edges between a pair of nodes are allowed but self-edges
864824e7fdSDimitry Andric   /// are not supported.
874824e7fdSDimitry Andric   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
884824e7fdSDimitry Andric     assert(Capacity > 0 && "adding an edge of zero capacity");
894824e7fdSDimitry Andric     assert(Src != Dst && "loop edge are not supported");
904824e7fdSDimitry Andric 
914824e7fdSDimitry Andric     Edge SrcEdge;
924824e7fdSDimitry Andric     SrcEdge.Dst = Dst;
934824e7fdSDimitry Andric     SrcEdge.Cost = Cost;
944824e7fdSDimitry Andric     SrcEdge.Capacity = Capacity;
954824e7fdSDimitry Andric     SrcEdge.Flow = 0;
964824e7fdSDimitry Andric     SrcEdge.RevEdgeIndex = Edges[Dst].size();
974824e7fdSDimitry Andric 
984824e7fdSDimitry Andric     Edge DstEdge;
994824e7fdSDimitry Andric     DstEdge.Dst = Src;
1004824e7fdSDimitry Andric     DstEdge.Cost = -Cost;
1014824e7fdSDimitry Andric     DstEdge.Capacity = 0;
1024824e7fdSDimitry Andric     DstEdge.Flow = 0;
1034824e7fdSDimitry Andric     DstEdge.RevEdgeIndex = Edges[Src].size();
1044824e7fdSDimitry Andric 
1054824e7fdSDimitry Andric     Edges[Src].push_back(SrcEdge);
1064824e7fdSDimitry Andric     Edges[Dst].push_back(DstEdge);
1074824e7fdSDimitry Andric   }
1084824e7fdSDimitry Andric 
1094824e7fdSDimitry Andric   /// Adding an edge to the network of infinite capacity and a given cost.
1104824e7fdSDimitry Andric   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
1114824e7fdSDimitry Andric     addEdge(Src, Dst, INF, Cost);
1124824e7fdSDimitry Andric   }
1134824e7fdSDimitry Andric 
1144824e7fdSDimitry Andric   /// Get the total flow from a given source node.
1154824e7fdSDimitry Andric   /// Returns a list of pairs (target node, amount of flow to the target).
1164824e7fdSDimitry Andric   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
1174824e7fdSDimitry Andric     std::vector<std::pair<uint64_t, int64_t>> Flow;
1184824e7fdSDimitry Andric     for (auto &Edge : Edges[Src]) {
1194824e7fdSDimitry Andric       if (Edge.Flow > 0)
1204824e7fdSDimitry Andric         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
1214824e7fdSDimitry Andric     }
1224824e7fdSDimitry Andric     return Flow;
1234824e7fdSDimitry Andric   }
1244824e7fdSDimitry Andric 
1254824e7fdSDimitry Andric   /// Get the total flow between a pair of nodes.
1264824e7fdSDimitry Andric   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
1274824e7fdSDimitry Andric     int64_t Flow = 0;
1284824e7fdSDimitry Andric     for (auto &Edge : Edges[Src]) {
1294824e7fdSDimitry Andric       if (Edge.Dst == Dst) {
1304824e7fdSDimitry Andric         Flow += Edge.Flow;
1314824e7fdSDimitry Andric       }
1324824e7fdSDimitry Andric     }
1334824e7fdSDimitry Andric     return Flow;
1344824e7fdSDimitry Andric   }
1354824e7fdSDimitry Andric 
1364824e7fdSDimitry Andric   /// A cost of increasing a block's count by one.
1374824e7fdSDimitry Andric   static constexpr int64_t AuxCostInc = 10;
1384824e7fdSDimitry Andric   /// A cost of decreasing a block's count by one.
1394824e7fdSDimitry Andric   static constexpr int64_t AuxCostDec = 20;
1404824e7fdSDimitry Andric   /// A cost of increasing a count of zero-weight block by one.
1414824e7fdSDimitry Andric   static constexpr int64_t AuxCostIncZero = 11;
1424824e7fdSDimitry Andric   /// A cost of increasing the entry block's count by one.
1434824e7fdSDimitry Andric   static constexpr int64_t AuxCostIncEntry = 40;
1444824e7fdSDimitry Andric   /// A cost of decreasing the entry block's count by one.
1454824e7fdSDimitry Andric   static constexpr int64_t AuxCostDecEntry = 10;
1464824e7fdSDimitry Andric   /// A cost of taking an unlikely jump.
1474824e7fdSDimitry Andric   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;
1484824e7fdSDimitry Andric 
1494824e7fdSDimitry Andric private:
1504824e7fdSDimitry Andric   /// Check for existence of an augmenting path with a positive capacity.
1514824e7fdSDimitry Andric   bool findAugmentingPath() {
1524824e7fdSDimitry Andric     // Initialize data structures
1534824e7fdSDimitry Andric     for (auto &Node : Nodes) {
1544824e7fdSDimitry Andric       Node.Distance = INF;
1554824e7fdSDimitry Andric       Node.ParentNode = uint64_t(-1);
1564824e7fdSDimitry Andric       Node.ParentEdgeIndex = uint64_t(-1);
1574824e7fdSDimitry Andric       Node.Taken = false;
1584824e7fdSDimitry Andric     }
1594824e7fdSDimitry Andric 
1604824e7fdSDimitry Andric     std::queue<uint64_t> Queue;
1614824e7fdSDimitry Andric     Queue.push(Source);
1624824e7fdSDimitry Andric     Nodes[Source].Distance = 0;
1634824e7fdSDimitry Andric     Nodes[Source].Taken = true;
1644824e7fdSDimitry Andric     while (!Queue.empty()) {
1654824e7fdSDimitry Andric       uint64_t Src = Queue.front();
1664824e7fdSDimitry Andric       Queue.pop();
1674824e7fdSDimitry Andric       Nodes[Src].Taken = false;
1684824e7fdSDimitry Andric       // Although the residual network contains edges with negative costs
1694824e7fdSDimitry Andric       // (in particular, backward edges), it can be shown that there are no
1704824e7fdSDimitry Andric       // negative-weight cycles and the following two invariants are maintained:
1714824e7fdSDimitry Andric       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
1724824e7fdSDimitry Andric       // where Dist is the length of the shortest path between two nodes. This
1734824e7fdSDimitry Andric       // allows to prune the search-space of the path-finding algorithm using
1744824e7fdSDimitry Andric       // the following early-stop criteria:
1754824e7fdSDimitry Andric       // -- If we find a path with zero-distance from Source to Target, stop the
1764824e7fdSDimitry Andric       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
1774824e7fdSDimitry Andric       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
1784824e7fdSDimitry Andric       //    process node V, as it is guaranteed _not_ to be on a shortest path
1794824e7fdSDimitry Andric       //    from Source to Target; it follows from inequalities
1804824e7fdSDimitry Andric       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
1814824e7fdSDimitry Andric       //                         >= Dist[Source, V]
1824824e7fdSDimitry Andric       if (Nodes[Target].Distance == 0)
1834824e7fdSDimitry Andric         break;
1844824e7fdSDimitry Andric       if (Nodes[Src].Distance > Nodes[Target].Distance)
1854824e7fdSDimitry Andric         continue;
1864824e7fdSDimitry Andric 
1874824e7fdSDimitry Andric       // Process adjacent edges
1884824e7fdSDimitry Andric       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
1894824e7fdSDimitry Andric         auto &Edge = Edges[Src][EdgeIdx];
1904824e7fdSDimitry Andric         if (Edge.Flow < Edge.Capacity) {
1914824e7fdSDimitry Andric           uint64_t Dst = Edge.Dst;
1924824e7fdSDimitry Andric           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
1934824e7fdSDimitry Andric           if (Nodes[Dst].Distance > NewDistance) {
1944824e7fdSDimitry Andric             // Update the distance and the parent node/edge
1954824e7fdSDimitry Andric             Nodes[Dst].Distance = NewDistance;
1964824e7fdSDimitry Andric             Nodes[Dst].ParentNode = Src;
1974824e7fdSDimitry Andric             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
1984824e7fdSDimitry Andric             // Add the node to the queue, if it is not there yet
1994824e7fdSDimitry Andric             if (!Nodes[Dst].Taken) {
2004824e7fdSDimitry Andric               Queue.push(Dst);
2014824e7fdSDimitry Andric               Nodes[Dst].Taken = true;
2024824e7fdSDimitry Andric             }
2034824e7fdSDimitry Andric           }
2044824e7fdSDimitry Andric         }
2054824e7fdSDimitry Andric       }
2064824e7fdSDimitry Andric     }
2074824e7fdSDimitry Andric 
2084824e7fdSDimitry Andric     return Nodes[Target].Distance != INF;
2094824e7fdSDimitry Andric   }
2104824e7fdSDimitry Andric 
2114824e7fdSDimitry Andric   /// Update the current flow along the augmenting path.
2124824e7fdSDimitry Andric   void augmentFlowAlongPath() {
2134824e7fdSDimitry Andric     // Find path capacity
2144824e7fdSDimitry Andric     int64_t PathCapacity = INF;
2154824e7fdSDimitry Andric     uint64_t Now = Target;
2164824e7fdSDimitry Andric     while (Now != Source) {
2174824e7fdSDimitry Andric       uint64_t Pred = Nodes[Now].ParentNode;
2184824e7fdSDimitry Andric       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
2194824e7fdSDimitry Andric       PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
2204824e7fdSDimitry Andric       Now = Pred;
2214824e7fdSDimitry Andric     }
2224824e7fdSDimitry Andric 
223*0eae32dcSDimitry Andric     assert(PathCapacity > 0 && "found an incorrect augmenting path");
2244824e7fdSDimitry Andric 
2254824e7fdSDimitry Andric     // Update the flow along the path
2264824e7fdSDimitry Andric     Now = Target;
2274824e7fdSDimitry Andric     while (Now != Source) {
2284824e7fdSDimitry Andric       uint64_t Pred = Nodes[Now].ParentNode;
2294824e7fdSDimitry Andric       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
2304824e7fdSDimitry Andric       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
2314824e7fdSDimitry Andric 
2324824e7fdSDimitry Andric       Edge.Flow += PathCapacity;
2334824e7fdSDimitry Andric       RevEdge.Flow -= PathCapacity;
2344824e7fdSDimitry Andric 
2354824e7fdSDimitry Andric       Now = Pred;
2364824e7fdSDimitry Andric     }
2374824e7fdSDimitry Andric   }
2384824e7fdSDimitry Andric 
2394824e7fdSDimitry Andric   /// An node in a flow network.
2404824e7fdSDimitry Andric   struct Node {
2414824e7fdSDimitry Andric     /// The cost of the cheapest path from the source to the current node.
2424824e7fdSDimitry Andric     int64_t Distance;
2434824e7fdSDimitry Andric     /// The node preceding the current one in the path.
2444824e7fdSDimitry Andric     uint64_t ParentNode;
2454824e7fdSDimitry Andric     /// The index of the edge between ParentNode and the current node.
2464824e7fdSDimitry Andric     uint64_t ParentEdgeIndex;
2474824e7fdSDimitry Andric     /// An indicator of whether the current node is in a queue.
2484824e7fdSDimitry Andric     bool Taken;
2494824e7fdSDimitry Andric   };
2504824e7fdSDimitry Andric   /// An edge in a flow network.
2514824e7fdSDimitry Andric   struct Edge {
2524824e7fdSDimitry Andric     /// The cost of the edge.
2534824e7fdSDimitry Andric     int64_t Cost;
2544824e7fdSDimitry Andric     /// The capacity of the edge.
2554824e7fdSDimitry Andric     int64_t Capacity;
2564824e7fdSDimitry Andric     /// The current flow on the edge.
2574824e7fdSDimitry Andric     int64_t Flow;
2584824e7fdSDimitry Andric     /// The destination node of the edge.
2594824e7fdSDimitry Andric     uint64_t Dst;
2604824e7fdSDimitry Andric     /// The index of the reverse edge between Dst and the current node.
2614824e7fdSDimitry Andric     uint64_t RevEdgeIndex;
2624824e7fdSDimitry Andric   };
2634824e7fdSDimitry Andric 
2644824e7fdSDimitry Andric   /// The set of network nodes.
2654824e7fdSDimitry Andric   std::vector<Node> Nodes;
2664824e7fdSDimitry Andric   /// The set of network edges.
2674824e7fdSDimitry Andric   std::vector<std::vector<Edge>> Edges;
2684824e7fdSDimitry Andric   /// Source node of the flow.
2694824e7fdSDimitry Andric   uint64_t Source;
2704824e7fdSDimitry Andric   /// Target (sink) node of the flow.
2714824e7fdSDimitry Andric   uint64_t Target;
2724824e7fdSDimitry Andric };
2734824e7fdSDimitry Andric 
274*0eae32dcSDimitry Andric /// A post-processing adjustment of control flow. It applies two steps by
275*0eae32dcSDimitry Andric /// rerouting some flow and making it more realistic:
276*0eae32dcSDimitry Andric ///
277*0eae32dcSDimitry Andric /// - First, it removes all isolated components ("islands") with a positive flow
278*0eae32dcSDimitry Andric ///   that are unreachable from the entry block. For every such component, we
279*0eae32dcSDimitry Andric ///   find the shortest from the entry to an exit passing through the component,
280*0eae32dcSDimitry Andric ///   and increase the flow by one unit along the path.
281*0eae32dcSDimitry Andric ///
282*0eae32dcSDimitry Andric /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
283*0eae32dcSDimitry Andric ///   with no sampled counts. Then it rebalnces the flow that goes through such
284*0eae32dcSDimitry Andric ///   a subgraph so that each branch is taken with probability 50%.
285*0eae32dcSDimitry Andric ///   An unknown subgraph is such that for every two nodes u and v:
286*0eae32dcSDimitry Andric ///     - u dominates v and u is not unknown;
287*0eae32dcSDimitry Andric ///     - v post-dominates u; and
288*0eae32dcSDimitry Andric ///     - all inner-nodes of all (u,v)-paths are unknown.
289*0eae32dcSDimitry Andric ///
290*0eae32dcSDimitry Andric class FlowAdjuster {
291*0eae32dcSDimitry Andric public:
292*0eae32dcSDimitry Andric   FlowAdjuster(FlowFunction &Func) : Func(Func) {
293*0eae32dcSDimitry Andric     assert(Func.Blocks[Func.Entry].isEntry() &&
294*0eae32dcSDimitry Andric            "incorrect index of the entry block");
295*0eae32dcSDimitry Andric   }
296*0eae32dcSDimitry Andric 
297*0eae32dcSDimitry Andric   // Run the post-processing
298*0eae32dcSDimitry Andric   void run() {
299*0eae32dcSDimitry Andric     /// Adjust the flow to get rid of isolated components.
300*0eae32dcSDimitry Andric     joinIsolatedComponents();
301*0eae32dcSDimitry Andric 
302*0eae32dcSDimitry Andric     /// Rebalance the flow inside unknown subgraphs.
303*0eae32dcSDimitry Andric     rebalanceUnknownSubgraphs();
304*0eae32dcSDimitry Andric   }
305*0eae32dcSDimitry Andric 
306*0eae32dcSDimitry Andric   /// The probability for the first successor of a unknown subgraph
307*0eae32dcSDimitry Andric   static constexpr double UnknownFirstSuccProbability = 0.5;
308*0eae32dcSDimitry Andric 
309*0eae32dcSDimitry Andric private:
310*0eae32dcSDimitry Andric   void joinIsolatedComponents() {
311*0eae32dcSDimitry Andric     // Find blocks that are reachable from the source
312*0eae32dcSDimitry Andric     auto Visited = std::vector<bool>(NumBlocks(), false);
313*0eae32dcSDimitry Andric     findReachable(Func.Entry, Visited);
314*0eae32dcSDimitry Andric 
315*0eae32dcSDimitry Andric     // Iterate over all non-reachable blocks and adjust their weights
316*0eae32dcSDimitry Andric     for (uint64_t I = 0; I < NumBlocks(); I++) {
317*0eae32dcSDimitry Andric       auto &Block = Func.Blocks[I];
318*0eae32dcSDimitry Andric       if (Block.Flow > 0 && !Visited[I]) {
319*0eae32dcSDimitry Andric         // Find a path from the entry to an exit passing through the block I
320*0eae32dcSDimitry Andric         auto Path = findShortestPath(I);
321*0eae32dcSDimitry Andric         // Increase the flow along the path
322*0eae32dcSDimitry Andric         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
323*0eae32dcSDimitry Andric                "incorrectly computed path adjusting control flow");
324*0eae32dcSDimitry Andric         Func.Blocks[Func.Entry].Flow += 1;
325*0eae32dcSDimitry Andric         for (auto &Jump : Path) {
326*0eae32dcSDimitry Andric           Jump->Flow += 1;
327*0eae32dcSDimitry Andric           Func.Blocks[Jump->Target].Flow += 1;
328*0eae32dcSDimitry Andric           // Update reachability
329*0eae32dcSDimitry Andric           findReachable(Jump->Target, Visited);
330*0eae32dcSDimitry Andric         }
331*0eae32dcSDimitry Andric       }
332*0eae32dcSDimitry Andric     }
333*0eae32dcSDimitry Andric   }
334*0eae32dcSDimitry Andric 
335*0eae32dcSDimitry Andric   /// Run BFS from a given block along the jumps with a positive flow and mark
336*0eae32dcSDimitry Andric   /// all reachable blocks.
337*0eae32dcSDimitry Andric   void findReachable(uint64_t Src, std::vector<bool> &Visited) {
338*0eae32dcSDimitry Andric     if (Visited[Src])
339*0eae32dcSDimitry Andric       return;
340*0eae32dcSDimitry Andric     std::queue<uint64_t> Queue;
341*0eae32dcSDimitry Andric     Queue.push(Src);
342*0eae32dcSDimitry Andric     Visited[Src] = true;
343*0eae32dcSDimitry Andric     while (!Queue.empty()) {
344*0eae32dcSDimitry Andric       Src = Queue.front();
345*0eae32dcSDimitry Andric       Queue.pop();
346*0eae32dcSDimitry Andric       for (auto Jump : Func.Blocks[Src].SuccJumps) {
347*0eae32dcSDimitry Andric         uint64_t Dst = Jump->Target;
348*0eae32dcSDimitry Andric         if (Jump->Flow > 0 && !Visited[Dst]) {
349*0eae32dcSDimitry Andric           Queue.push(Dst);
350*0eae32dcSDimitry Andric           Visited[Dst] = true;
351*0eae32dcSDimitry Andric         }
352*0eae32dcSDimitry Andric       }
353*0eae32dcSDimitry Andric     }
354*0eae32dcSDimitry Andric   }
355*0eae32dcSDimitry Andric 
356*0eae32dcSDimitry Andric   /// Find the shortest path from the entry block to an exit block passing
357*0eae32dcSDimitry Andric   /// through a given block.
358*0eae32dcSDimitry Andric   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
359*0eae32dcSDimitry Andric     // A path from the entry block to BlockIdx
360*0eae32dcSDimitry Andric     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
361*0eae32dcSDimitry Andric     // A path from BlockIdx to an exit block
362*0eae32dcSDimitry Andric     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
363*0eae32dcSDimitry Andric 
364*0eae32dcSDimitry Andric     // Concatenate the two paths
365*0eae32dcSDimitry Andric     std::vector<FlowJump *> Result;
366*0eae32dcSDimitry Andric     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
367*0eae32dcSDimitry Andric     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
368*0eae32dcSDimitry Andric     return Result;
369*0eae32dcSDimitry Andric   }
370*0eae32dcSDimitry Andric 
371*0eae32dcSDimitry Andric   /// Apply the Dijkstra algorithm to find the shortest path from a given
372*0eae32dcSDimitry Andric   /// Source to a given Target block.
373*0eae32dcSDimitry Andric   /// If Target == -1, then the path ends at an exit block.
374*0eae32dcSDimitry Andric   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
375*0eae32dcSDimitry Andric     // Quit early, if possible
376*0eae32dcSDimitry Andric     if (Source == Target)
377*0eae32dcSDimitry Andric       return std::vector<FlowJump *>();
378*0eae32dcSDimitry Andric     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
379*0eae32dcSDimitry Andric       return std::vector<FlowJump *>();
380*0eae32dcSDimitry Andric 
381*0eae32dcSDimitry Andric     // Initialize data structures
382*0eae32dcSDimitry Andric     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
383*0eae32dcSDimitry Andric     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
384*0eae32dcSDimitry Andric     Distance[Source] = 0;
385*0eae32dcSDimitry Andric     std::set<std::pair<uint64_t, uint64_t>> Queue;
386*0eae32dcSDimitry Andric     Queue.insert(std::make_pair(Distance[Source], Source));
387*0eae32dcSDimitry Andric 
388*0eae32dcSDimitry Andric     // Run the Dijkstra algorithm
389*0eae32dcSDimitry Andric     while (!Queue.empty()) {
390*0eae32dcSDimitry Andric       uint64_t Src = Queue.begin()->second;
391*0eae32dcSDimitry Andric       Queue.erase(Queue.begin());
392*0eae32dcSDimitry Andric       // If we found a solution, quit early
393*0eae32dcSDimitry Andric       if (Src == Target ||
394*0eae32dcSDimitry Andric           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
395*0eae32dcSDimitry Andric         break;
396*0eae32dcSDimitry Andric 
397*0eae32dcSDimitry Andric       for (auto Jump : Func.Blocks[Src].SuccJumps) {
398*0eae32dcSDimitry Andric         uint64_t Dst = Jump->Target;
399*0eae32dcSDimitry Andric         int64_t JumpDist = jumpDistance(Jump);
400*0eae32dcSDimitry Andric         if (Distance[Dst] > Distance[Src] + JumpDist) {
401*0eae32dcSDimitry Andric           Queue.erase(std::make_pair(Distance[Dst], Dst));
402*0eae32dcSDimitry Andric 
403*0eae32dcSDimitry Andric           Distance[Dst] = Distance[Src] + JumpDist;
404*0eae32dcSDimitry Andric           Parent[Dst] = Jump;
405*0eae32dcSDimitry Andric 
406*0eae32dcSDimitry Andric           Queue.insert(std::make_pair(Distance[Dst], Dst));
407*0eae32dcSDimitry Andric         }
408*0eae32dcSDimitry Andric       }
409*0eae32dcSDimitry Andric     }
410*0eae32dcSDimitry Andric     // If Target is not provided, find the closest exit block
411*0eae32dcSDimitry Andric     if (Target == AnyExitBlock) {
412*0eae32dcSDimitry Andric       for (uint64_t I = 0; I < NumBlocks(); I++) {
413*0eae32dcSDimitry Andric         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
414*0eae32dcSDimitry Andric           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
415*0eae32dcSDimitry Andric             Target = I;
416*0eae32dcSDimitry Andric           }
417*0eae32dcSDimitry Andric         }
418*0eae32dcSDimitry Andric       }
419*0eae32dcSDimitry Andric     }
420*0eae32dcSDimitry Andric     assert(Parent[Target] != nullptr && "a path does not exist");
421*0eae32dcSDimitry Andric 
422*0eae32dcSDimitry Andric     // Extract the constructed path
423*0eae32dcSDimitry Andric     std::vector<FlowJump *> Result;
424*0eae32dcSDimitry Andric     uint64_t Now = Target;
425*0eae32dcSDimitry Andric     while (Now != Source) {
426*0eae32dcSDimitry Andric       assert(Now == Parent[Now]->Target && "incorrect parent jump");
427*0eae32dcSDimitry Andric       Result.push_back(Parent[Now]);
428*0eae32dcSDimitry Andric       Now = Parent[Now]->Source;
429*0eae32dcSDimitry Andric     }
430*0eae32dcSDimitry Andric     // Reverse the path, since it is extracted from Target to Source
431*0eae32dcSDimitry Andric     std::reverse(Result.begin(), Result.end());
432*0eae32dcSDimitry Andric     return Result;
433*0eae32dcSDimitry Andric   }
434*0eae32dcSDimitry Andric 
435*0eae32dcSDimitry Andric   /// A distance of a path for a given jump.
436*0eae32dcSDimitry Andric   /// In order to incite the path to use blocks/jumps with large positive flow,
437*0eae32dcSDimitry Andric   /// and avoid changing branch probability of outgoing edges drastically,
438*0eae32dcSDimitry Andric   /// set the distance as follows:
439*0eae32dcSDimitry Andric   ///   if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
440*0eae32dcSDimitry Andric   ///   if Block.Weight > 0, then distance = 1
441*0eae32dcSDimitry Andric   ///   otherwise distance >> 1
442*0eae32dcSDimitry Andric   int64_t jumpDistance(FlowJump *Jump) const {
443*0eae32dcSDimitry Andric     int64_t BaseDistance = 100;
444*0eae32dcSDimitry Andric     if (Jump->IsUnlikely)
445*0eae32dcSDimitry Andric       return MinCostMaxFlow::AuxCostUnlikely;
446*0eae32dcSDimitry Andric     if (Jump->Flow > 0)
447*0eae32dcSDimitry Andric       return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
448*0eae32dcSDimitry Andric     if (Func.Blocks[Jump->Target].Weight > 0)
449*0eae32dcSDimitry Andric       return BaseDistance;
450*0eae32dcSDimitry Andric     return BaseDistance * (NumBlocks() + 1);
451*0eae32dcSDimitry Andric   };
452*0eae32dcSDimitry Andric 
453*0eae32dcSDimitry Andric   uint64_t NumBlocks() const { return Func.Blocks.size(); }
454*0eae32dcSDimitry Andric 
455*0eae32dcSDimitry Andric   /// Rebalance unknown subgraphs so as each branch splits with probabilities
456*0eae32dcSDimitry Andric   /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability
457*0eae32dcSDimitry Andric   void rebalanceUnknownSubgraphs() {
458*0eae32dcSDimitry Andric     assert(UnknownFirstSuccProbability >= 0.0 &&
459*0eae32dcSDimitry Andric            UnknownFirstSuccProbability <= 1.0 &&
460*0eae32dcSDimitry Andric            "the share of the unknown successor should be between 0 and 1");
461*0eae32dcSDimitry Andric     // Try to find unknown subgraphs from each non-unknown block
462*0eae32dcSDimitry Andric     for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
463*0eae32dcSDimitry Andric       auto SrcBlock = &Func.Blocks[I];
464*0eae32dcSDimitry Andric       // Do not attempt to find unknown successors from a unknown or a
465*0eae32dcSDimitry Andric       // zero-flow block
466*0eae32dcSDimitry Andric       if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
467*0eae32dcSDimitry Andric         continue;
468*0eae32dcSDimitry Andric 
469*0eae32dcSDimitry Andric       std::vector<FlowBlock *> UnknownSuccs;
470*0eae32dcSDimitry Andric       FlowBlock *DstBlock = nullptr;
471*0eae32dcSDimitry Andric       // Find a unknown subgraphs starting at block SrcBlock
472*0eae32dcSDimitry Andric       if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs))
473*0eae32dcSDimitry Andric         continue;
474*0eae32dcSDimitry Andric       // At the moment, we do not rebalance subgraphs containing cycles among
475*0eae32dcSDimitry Andric       // unknown blocks
476*0eae32dcSDimitry Andric       if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs))
477*0eae32dcSDimitry Andric         continue;
478*0eae32dcSDimitry Andric 
479*0eae32dcSDimitry Andric       // Rebalance the flow
480*0eae32dcSDimitry Andric       rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs);
481*0eae32dcSDimitry Andric     }
482*0eae32dcSDimitry Andric   }
483*0eae32dcSDimitry Andric 
484*0eae32dcSDimitry Andric   /// Find a unknown subgraph starting at block SrcBlock.
485*0eae32dcSDimitry Andric   /// If the search is successful, the method sets DstBlock and UnknownSuccs.
486*0eae32dcSDimitry Andric   bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock,
487*0eae32dcSDimitry Andric                            std::vector<FlowBlock *> &UnknownSuccs) {
488*0eae32dcSDimitry Andric     // Run BFS from SrcBlock and make sure all paths are going through unknown
489*0eae32dcSDimitry Andric     // blocks and end at a non-unknown DstBlock
490*0eae32dcSDimitry Andric     auto Visited = std::vector<bool>(NumBlocks(), false);
491*0eae32dcSDimitry Andric     std::queue<uint64_t> Queue;
492*0eae32dcSDimitry Andric     DstBlock = nullptr;
493*0eae32dcSDimitry Andric 
494*0eae32dcSDimitry Andric     Queue.push(SrcBlock->Index);
495*0eae32dcSDimitry Andric     Visited[SrcBlock->Index] = true;
496*0eae32dcSDimitry Andric     while (!Queue.empty()) {
497*0eae32dcSDimitry Andric       auto &Block = Func.Blocks[Queue.front()];
498*0eae32dcSDimitry Andric       Queue.pop();
499*0eae32dcSDimitry Andric       // Process blocks reachable from Block
500*0eae32dcSDimitry Andric       for (auto Jump : Block.SuccJumps) {
501*0eae32dcSDimitry Andric         uint64_t Dst = Jump->Target;
502*0eae32dcSDimitry Andric         if (Visited[Dst])
503*0eae32dcSDimitry Andric           continue;
504*0eae32dcSDimitry Andric         Visited[Dst] = true;
505*0eae32dcSDimitry Andric         if (!Func.Blocks[Dst].UnknownWeight) {
506*0eae32dcSDimitry Andric           // If we see non-unique non-unknown block reachable from SrcBlock,
507*0eae32dcSDimitry Andric           // stop processing and skip rebalancing
508*0eae32dcSDimitry Andric           FlowBlock *CandidateDstBlock = &Func.Blocks[Dst];
509*0eae32dcSDimitry Andric           if (DstBlock != nullptr && DstBlock != CandidateDstBlock)
510*0eae32dcSDimitry Andric             return false;
511*0eae32dcSDimitry Andric           DstBlock = CandidateDstBlock;
512*0eae32dcSDimitry Andric         } else {
513*0eae32dcSDimitry Andric           Queue.push(Dst);
514*0eae32dcSDimitry Andric           UnknownSuccs.push_back(&Func.Blocks[Dst]);
515*0eae32dcSDimitry Andric         }
516*0eae32dcSDimitry Andric       }
517*0eae32dcSDimitry Andric     }
518*0eae32dcSDimitry Andric 
519*0eae32dcSDimitry Andric     // If the list of unknown blocks is empty, we don't need rebalancing
520*0eae32dcSDimitry Andric     if (UnknownSuccs.empty())
521*0eae32dcSDimitry Andric       return false;
522*0eae32dcSDimitry Andric     // If all reachable nodes from SrcBlock are unknown, skip rebalancing
523*0eae32dcSDimitry Andric     if (DstBlock == nullptr)
524*0eae32dcSDimitry Andric       return false;
525*0eae32dcSDimitry Andric     // If any of the unknown blocks is an exit block, skip rebalancing
526*0eae32dcSDimitry Andric     for (auto Block : UnknownSuccs) {
527*0eae32dcSDimitry Andric       if (Block->isExit())
528*0eae32dcSDimitry Andric         return false;
529*0eae32dcSDimitry Andric     }
530*0eae32dcSDimitry Andric 
531*0eae32dcSDimitry Andric     return true;
532*0eae32dcSDimitry Andric   }
533*0eae32dcSDimitry Andric 
534*0eae32dcSDimitry Andric   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
535*0eae32dcSDimitry Andric   /// UnknownSuccs in the topological order (so that all jumps are "forward").
536*0eae32dcSDimitry Andric   bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
537*0eae32dcSDimitry Andric                          std::vector<FlowBlock *> &UnknownSuccs) {
538*0eae32dcSDimitry Andric     // Extract local in-degrees in the considered subgraph
539*0eae32dcSDimitry Andric     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
540*0eae32dcSDimitry Andric     for (auto Jump : SrcBlock->SuccJumps) {
541*0eae32dcSDimitry Andric       LocalInDegree[Jump->Target]++;
542*0eae32dcSDimitry Andric     }
543*0eae32dcSDimitry Andric     for (uint64_t I = 0; I < UnknownSuccs.size(); I++) {
544*0eae32dcSDimitry Andric       for (auto Jump : UnknownSuccs[I]->SuccJumps) {
545*0eae32dcSDimitry Andric         LocalInDegree[Jump->Target]++;
546*0eae32dcSDimitry Andric       }
547*0eae32dcSDimitry Andric     }
548*0eae32dcSDimitry Andric     // A loop containing SrcBlock
549*0eae32dcSDimitry Andric     if (LocalInDegree[SrcBlock->Index] > 0)
550*0eae32dcSDimitry Andric       return false;
551*0eae32dcSDimitry Andric 
552*0eae32dcSDimitry Andric     std::vector<FlowBlock *> AcyclicOrder;
553*0eae32dcSDimitry Andric     std::queue<uint64_t> Queue;
554*0eae32dcSDimitry Andric     Queue.push(SrcBlock->Index);
555*0eae32dcSDimitry Andric     while (!Queue.empty()) {
556*0eae32dcSDimitry Andric       auto &Block = Func.Blocks[Queue.front()];
557*0eae32dcSDimitry Andric       Queue.pop();
558*0eae32dcSDimitry Andric       // Stop propagation once we reach DstBlock
559*0eae32dcSDimitry Andric       if (Block.Index == DstBlock->Index)
560*0eae32dcSDimitry Andric         break;
561*0eae32dcSDimitry Andric 
562*0eae32dcSDimitry Andric       AcyclicOrder.push_back(&Block);
563*0eae32dcSDimitry Andric       // Add to the queue all successors with zero local in-degree
564*0eae32dcSDimitry Andric       for (auto Jump : Block.SuccJumps) {
565*0eae32dcSDimitry Andric         uint64_t Dst = Jump->Target;
566*0eae32dcSDimitry Andric         LocalInDegree[Dst]--;
567*0eae32dcSDimitry Andric         if (LocalInDegree[Dst] == 0) {
568*0eae32dcSDimitry Andric           Queue.push(Dst);
569*0eae32dcSDimitry Andric         }
570*0eae32dcSDimitry Andric       }
571*0eae32dcSDimitry Andric     }
572*0eae32dcSDimitry Andric 
573*0eae32dcSDimitry Andric     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
574*0eae32dcSDimitry Andric     // of all blocks
575*0eae32dcSDimitry Andric     if (UnknownSuccs.size() + 1 != AcyclicOrder.size())
576*0eae32dcSDimitry Andric       return false;
577*0eae32dcSDimitry Andric     UnknownSuccs = AcyclicOrder;
578*0eae32dcSDimitry Andric     return true;
579*0eae32dcSDimitry Andric   }
580*0eae32dcSDimitry Andric 
581*0eae32dcSDimitry Andric   /// Rebalance a given subgraph.
582*0eae32dcSDimitry Andric   void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
583*0eae32dcSDimitry Andric                                 std::vector<FlowBlock *> &UnknownSuccs) {
584*0eae32dcSDimitry Andric     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
585*0eae32dcSDimitry Andric     assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns");
586*0eae32dcSDimitry Andric 
587*0eae32dcSDimitry Andric     for (auto Block : UnknownSuccs) {
588*0eae32dcSDimitry Andric       // Block's flow is the sum of incoming flows
589*0eae32dcSDimitry Andric       uint64_t TotalFlow = 0;
590*0eae32dcSDimitry Andric       if (Block == SrcBlock) {
591*0eae32dcSDimitry Andric         TotalFlow = Block->Flow;
592*0eae32dcSDimitry Andric       } else {
593*0eae32dcSDimitry Andric         for (auto Jump : Block->PredJumps) {
594*0eae32dcSDimitry Andric           TotalFlow += Jump->Flow;
595*0eae32dcSDimitry Andric         }
596*0eae32dcSDimitry Andric         Block->Flow = TotalFlow;
597*0eae32dcSDimitry Andric       }
598*0eae32dcSDimitry Andric 
599*0eae32dcSDimitry Andric       // Process all successor jumps and update corresponding flow values
600*0eae32dcSDimitry Andric       for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) {
601*0eae32dcSDimitry Andric         auto Jump = Block->SuccJumps[I];
602*0eae32dcSDimitry Andric         if (I + 1 == Block->SuccJumps.size()) {
603*0eae32dcSDimitry Andric           Jump->Flow = TotalFlow;
604*0eae32dcSDimitry Andric           continue;
605*0eae32dcSDimitry Andric         }
606*0eae32dcSDimitry Andric         uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability);
607*0eae32dcSDimitry Andric         Jump->Flow = Flow;
608*0eae32dcSDimitry Andric         TotalFlow -= Flow;
609*0eae32dcSDimitry Andric       }
610*0eae32dcSDimitry Andric     }
611*0eae32dcSDimitry Andric   }
612*0eae32dcSDimitry Andric 
613*0eae32dcSDimitry Andric   /// A constant indicating an arbitrary exit block of a function.
614*0eae32dcSDimitry Andric   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
615*0eae32dcSDimitry Andric 
616*0eae32dcSDimitry Andric   /// The function.
617*0eae32dcSDimitry Andric   FlowFunction &Func;
618*0eae32dcSDimitry Andric };
619*0eae32dcSDimitry Andric 
6204824e7fdSDimitry Andric /// Initializing flow network for a given function.
6214824e7fdSDimitry Andric ///
6224824e7fdSDimitry Andric /// Every block is split into three nodes that are responsible for (i) an
6234824e7fdSDimitry Andric /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
6244824e7fdSDimitry Andric /// reduction of the block weight.
6254824e7fdSDimitry Andric void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
6264824e7fdSDimitry Andric   uint64_t NumBlocks = Func.Blocks.size();
6274824e7fdSDimitry Andric   assert(NumBlocks > 1 && "Too few blocks in a function");
6284824e7fdSDimitry Andric   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
6294824e7fdSDimitry Andric 
6304824e7fdSDimitry Andric   // Pre-process data: make sure the entry weight is at least 1
6314824e7fdSDimitry Andric   if (Func.Blocks[Func.Entry].Weight == 0) {
6324824e7fdSDimitry Andric     Func.Blocks[Func.Entry].Weight = 1;
6334824e7fdSDimitry Andric   }
6344824e7fdSDimitry Andric   // Introducing dummy source/sink pairs to allow flow circulation.
6354824e7fdSDimitry Andric   // The nodes corresponding to blocks of Func have indicies in the range
6364824e7fdSDimitry Andric   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
6374824e7fdSDimitry Andric   uint64_t S = 3 * NumBlocks;
6384824e7fdSDimitry Andric   uint64_t T = S + 1;
6394824e7fdSDimitry Andric   uint64_t S1 = S + 2;
6404824e7fdSDimitry Andric   uint64_t T1 = S + 3;
6414824e7fdSDimitry Andric 
6424824e7fdSDimitry Andric   Network.initialize(3 * NumBlocks + 4, S1, T1);
6434824e7fdSDimitry Andric 
6444824e7fdSDimitry Andric   // Create three nodes for every block of the function
6454824e7fdSDimitry Andric   for (uint64_t B = 0; B < NumBlocks; B++) {
6464824e7fdSDimitry Andric     auto &Block = Func.Blocks[B];
6474824e7fdSDimitry Andric     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
6484824e7fdSDimitry Andric            "non-zero weight of a block w/o weight except for an entry");
6494824e7fdSDimitry Andric 
6504824e7fdSDimitry Andric     // Split every block into two nodes
6514824e7fdSDimitry Andric     uint64_t Bin = 3 * B;
6524824e7fdSDimitry Andric     uint64_t Bout = 3 * B + 1;
6534824e7fdSDimitry Andric     uint64_t Baux = 3 * B + 2;
6544824e7fdSDimitry Andric     if (Block.Weight > 0) {
6554824e7fdSDimitry Andric       Network.addEdge(S1, Bout, Block.Weight, 0);
6564824e7fdSDimitry Andric       Network.addEdge(Bin, T1, Block.Weight, 0);
6574824e7fdSDimitry Andric     }
6584824e7fdSDimitry Andric 
6594824e7fdSDimitry Andric     // Edges from S and to T
6604824e7fdSDimitry Andric     assert((!Block.isEntry() || !Block.isExit()) &&
6614824e7fdSDimitry Andric            "a block cannot be an entry and an exit");
6624824e7fdSDimitry Andric     if (Block.isEntry()) {
6634824e7fdSDimitry Andric       Network.addEdge(S, Bin, 0);
6644824e7fdSDimitry Andric     } else if (Block.isExit()) {
6654824e7fdSDimitry Andric       Network.addEdge(Bout, T, 0);
6664824e7fdSDimitry Andric     }
6674824e7fdSDimitry Andric 
6684824e7fdSDimitry Andric     // An auxiliary node to allow increase/reduction of block counts:
6694824e7fdSDimitry Andric     // We assume that decreasing block counts is more expensive than increasing,
6704824e7fdSDimitry Andric     // and thus, setting separate costs here. In the future we may want to tune
6714824e7fdSDimitry Andric     // the relative costs so as to maximize the quality of generated profiles.
6724824e7fdSDimitry Andric     int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
6734824e7fdSDimitry Andric     int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
6744824e7fdSDimitry Andric     if (Block.UnknownWeight) {
6754824e7fdSDimitry Andric       // Do not penalize changing weights of blocks w/o known profile count
6764824e7fdSDimitry Andric       AuxCostInc = 0;
6774824e7fdSDimitry Andric       AuxCostDec = 0;
6784824e7fdSDimitry Andric     } else {
6794824e7fdSDimitry Andric       // Increasing the count for "cold" blocks with zero initial count is more
6804824e7fdSDimitry Andric       // expensive than for "hot" ones
6814824e7fdSDimitry Andric       if (Block.Weight == 0) {
6824824e7fdSDimitry Andric         AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
6834824e7fdSDimitry Andric       }
6844824e7fdSDimitry Andric       // Modifying the count of the entry block is expensive
6854824e7fdSDimitry Andric       if (Block.isEntry()) {
6864824e7fdSDimitry Andric         AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
6874824e7fdSDimitry Andric         AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
6884824e7fdSDimitry Andric       }
6894824e7fdSDimitry Andric     }
6904824e7fdSDimitry Andric     // For blocks with self-edges, do not penalize a reduction of the count,
6914824e7fdSDimitry Andric     // as all of the increase can be attributed to the self-edge
6924824e7fdSDimitry Andric     if (Block.HasSelfEdge) {
6934824e7fdSDimitry Andric       AuxCostDec = 0;
6944824e7fdSDimitry Andric     }
6954824e7fdSDimitry Andric 
6964824e7fdSDimitry Andric     Network.addEdge(Bin, Baux, AuxCostInc);
6974824e7fdSDimitry Andric     Network.addEdge(Baux, Bout, AuxCostInc);
6984824e7fdSDimitry Andric     if (Block.Weight > 0) {
6994824e7fdSDimitry Andric       Network.addEdge(Bout, Baux, AuxCostDec);
7004824e7fdSDimitry Andric       Network.addEdge(Baux, Bin, AuxCostDec);
7014824e7fdSDimitry Andric     }
7024824e7fdSDimitry Andric   }
7034824e7fdSDimitry Andric 
7044824e7fdSDimitry Andric   // Creating edges for every jump
7054824e7fdSDimitry Andric   for (auto &Jump : Func.Jumps) {
7064824e7fdSDimitry Andric     uint64_t Src = Jump.Source;
7074824e7fdSDimitry Andric     uint64_t Dst = Jump.Target;
7084824e7fdSDimitry Andric     if (Src != Dst) {
7094824e7fdSDimitry Andric       uint64_t SrcOut = 3 * Src + 1;
7104824e7fdSDimitry Andric       uint64_t DstIn = 3 * Dst;
7114824e7fdSDimitry Andric       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
7124824e7fdSDimitry Andric       Network.addEdge(SrcOut, DstIn, Cost);
7134824e7fdSDimitry Andric     }
7144824e7fdSDimitry Andric   }
7154824e7fdSDimitry Andric 
7164824e7fdSDimitry Andric   // Make sure we have a valid flow circulation
7174824e7fdSDimitry Andric   Network.addEdge(T, S, 0);
7184824e7fdSDimitry Andric }
7194824e7fdSDimitry Andric 
7204824e7fdSDimitry Andric /// Extract resulting block and edge counts from the flow network.
7214824e7fdSDimitry Andric void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
7224824e7fdSDimitry Andric   uint64_t NumBlocks = Func.Blocks.size();
7234824e7fdSDimitry Andric 
7244824e7fdSDimitry Andric   // Extract resulting block counts
7254824e7fdSDimitry Andric   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
7264824e7fdSDimitry Andric     auto &Block = Func.Blocks[Src];
7274824e7fdSDimitry Andric     uint64_t SrcOut = 3 * Src + 1;
7284824e7fdSDimitry Andric     int64_t Flow = 0;
7294824e7fdSDimitry Andric     for (auto &Adj : Network.getFlow(SrcOut)) {
7304824e7fdSDimitry Andric       uint64_t DstIn = Adj.first;
7314824e7fdSDimitry Andric       int64_t DstFlow = Adj.second;
7324824e7fdSDimitry Andric       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
7334824e7fdSDimitry Andric       if (!IsAuxNode || Block.HasSelfEdge) {
7344824e7fdSDimitry Andric         Flow += DstFlow;
7354824e7fdSDimitry Andric       }
7364824e7fdSDimitry Andric     }
7374824e7fdSDimitry Andric     Block.Flow = Flow;
7384824e7fdSDimitry Andric     assert(Flow >= 0 && "negative block flow");
7394824e7fdSDimitry Andric   }
7404824e7fdSDimitry Andric 
7414824e7fdSDimitry Andric   // Extract resulting jump counts
7424824e7fdSDimitry Andric   for (auto &Jump : Func.Jumps) {
7434824e7fdSDimitry Andric     uint64_t Src = Jump.Source;
7444824e7fdSDimitry Andric     uint64_t Dst = Jump.Target;
7454824e7fdSDimitry Andric     int64_t Flow = 0;
7464824e7fdSDimitry Andric     if (Src != Dst) {
7474824e7fdSDimitry Andric       uint64_t SrcOut = 3 * Src + 1;
7484824e7fdSDimitry Andric       uint64_t DstIn = 3 * Dst;
7494824e7fdSDimitry Andric       Flow = Network.getFlow(SrcOut, DstIn);
7504824e7fdSDimitry Andric     } else {
7514824e7fdSDimitry Andric       uint64_t SrcOut = 3 * Src + 1;
7524824e7fdSDimitry Andric       uint64_t SrcAux = 3 * Src + 2;
7534824e7fdSDimitry Andric       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
7544824e7fdSDimitry Andric       if (AuxFlow > 0)
7554824e7fdSDimitry Andric         Flow = AuxFlow;
7564824e7fdSDimitry Andric     }
7574824e7fdSDimitry Andric     Jump.Flow = Flow;
7584824e7fdSDimitry Andric     assert(Flow >= 0 && "negative jump flow");
7594824e7fdSDimitry Andric   }
7604824e7fdSDimitry Andric }
7614824e7fdSDimitry Andric 
7624824e7fdSDimitry Andric #ifndef NDEBUG
7634824e7fdSDimitry Andric /// Verify that the computed flow values satisfy flow conservation rules
7644824e7fdSDimitry Andric void verifyWeights(const FlowFunction &Func) {
7654824e7fdSDimitry Andric   const uint64_t NumBlocks = Func.Blocks.size();
7664824e7fdSDimitry Andric   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
7674824e7fdSDimitry Andric   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
7684824e7fdSDimitry Andric   for (auto &Jump : Func.Jumps) {
7694824e7fdSDimitry Andric     InFlow[Jump.Target] += Jump.Flow;
7704824e7fdSDimitry Andric     OutFlow[Jump.Source] += Jump.Flow;
7714824e7fdSDimitry Andric   }
7724824e7fdSDimitry Andric 
7734824e7fdSDimitry Andric   uint64_t TotalInFlow = 0;
7744824e7fdSDimitry Andric   uint64_t TotalOutFlow = 0;
7754824e7fdSDimitry Andric   for (uint64_t I = 0; I < NumBlocks; I++) {
7764824e7fdSDimitry Andric     auto &Block = Func.Blocks[I];
7774824e7fdSDimitry Andric     if (Block.isEntry()) {
7784824e7fdSDimitry Andric       TotalInFlow += Block.Flow;
7794824e7fdSDimitry Andric       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
7804824e7fdSDimitry Andric     } else if (Block.isExit()) {
7814824e7fdSDimitry Andric       TotalOutFlow += Block.Flow;
7824824e7fdSDimitry Andric       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
7834824e7fdSDimitry Andric     } else {
7844824e7fdSDimitry Andric       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
7854824e7fdSDimitry Andric       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
7864824e7fdSDimitry Andric     }
7874824e7fdSDimitry Andric   }
7884824e7fdSDimitry Andric   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
789*0eae32dcSDimitry Andric 
790*0eae32dcSDimitry Andric   // Verify that there are no isolated flow components
791*0eae32dcSDimitry Andric   // One could modify FlowFunction to hold edges indexed by the sources, which
792*0eae32dcSDimitry Andric   // will avoid a creation of the object
793*0eae32dcSDimitry Andric   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
794*0eae32dcSDimitry Andric   for (auto &Jump : Func.Jumps) {
795*0eae32dcSDimitry Andric     if (Jump.Flow > 0) {
796*0eae32dcSDimitry Andric       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
797*0eae32dcSDimitry Andric     }
798*0eae32dcSDimitry Andric   }
799*0eae32dcSDimitry Andric 
800*0eae32dcSDimitry Andric   // Run BFS from the source along edges with positive flow
801*0eae32dcSDimitry Andric   std::queue<uint64_t> Queue;
802*0eae32dcSDimitry Andric   auto Visited = std::vector<bool>(NumBlocks, false);
803*0eae32dcSDimitry Andric   Queue.push(Func.Entry);
804*0eae32dcSDimitry Andric   Visited[Func.Entry] = true;
805*0eae32dcSDimitry Andric   while (!Queue.empty()) {
806*0eae32dcSDimitry Andric     uint64_t Src = Queue.front();
807*0eae32dcSDimitry Andric     Queue.pop();
808*0eae32dcSDimitry Andric     for (uint64_t Dst : PositiveFlowEdges[Src]) {
809*0eae32dcSDimitry Andric       if (!Visited[Dst]) {
810*0eae32dcSDimitry Andric         Queue.push(Dst);
811*0eae32dcSDimitry Andric         Visited[Dst] = true;
812*0eae32dcSDimitry Andric       }
813*0eae32dcSDimitry Andric     }
814*0eae32dcSDimitry Andric   }
815*0eae32dcSDimitry Andric 
816*0eae32dcSDimitry Andric   // Verify that every block that has a positive flow is reached from the source
817*0eae32dcSDimitry Andric   // along edges with a positive flow
818*0eae32dcSDimitry Andric   for (uint64_t I = 0; I < NumBlocks; I++) {
819*0eae32dcSDimitry Andric     auto &Block = Func.Blocks[I];
820*0eae32dcSDimitry Andric     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
821*0eae32dcSDimitry Andric   }
8224824e7fdSDimitry Andric }
8234824e7fdSDimitry Andric #endif
8244824e7fdSDimitry Andric 
8254824e7fdSDimitry Andric } // end of anonymous namespace
8264824e7fdSDimitry Andric 
8274824e7fdSDimitry Andric /// Apply the profile inference algorithm for a given flow function
8284824e7fdSDimitry Andric void llvm::applyFlowInference(FlowFunction &Func) {
8294824e7fdSDimitry Andric   // Create and apply an inference network model
8304824e7fdSDimitry Andric   auto InferenceNetwork = MinCostMaxFlow();
8314824e7fdSDimitry Andric   initializeNetwork(InferenceNetwork, Func);
8324824e7fdSDimitry Andric   InferenceNetwork.run();
8334824e7fdSDimitry Andric 
8344824e7fdSDimitry Andric   // Extract flow values for every block and every edge
8354824e7fdSDimitry Andric   extractWeights(InferenceNetwork, Func);
8364824e7fdSDimitry Andric 
837*0eae32dcSDimitry Andric   // Post-processing adjustments to the flow
838*0eae32dcSDimitry Andric   auto Adjuster = FlowAdjuster(Func);
839*0eae32dcSDimitry Andric   Adjuster.run();
840*0eae32dcSDimitry Andric 
8414824e7fdSDimitry Andric #ifndef NDEBUG
8424824e7fdSDimitry Andric   // Verify the result
8434824e7fdSDimitry Andric   verifyWeights(Func);
8444824e7fdSDimitry Andric #endif
8454824e7fdSDimitry Andric }
846