xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Utils/SampleProfileInference.cpp (revision 04eeddc0aa8e0a417a16eaf9d7d095207f4a8623)
14824e7fdSDimitry Andric //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
24824e7fdSDimitry Andric //
34824e7fdSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44824e7fdSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
54824e7fdSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64824e7fdSDimitry Andric //
74824e7fdSDimitry Andric //===----------------------------------------------------------------------===//
84824e7fdSDimitry Andric //
94824e7fdSDimitry Andric // This file implements a profile inference algorithm. Given an incomplete and
104824e7fdSDimitry Andric // possibly imprecise block counts, the algorithm reconstructs realistic block
114824e7fdSDimitry Andric // and edge counts that satisfy flow conservation rules, while minimally modify
124824e7fdSDimitry Andric // input block counts.
134824e7fdSDimitry Andric //
144824e7fdSDimitry Andric //===----------------------------------------------------------------------===//
154824e7fdSDimitry Andric 
164824e7fdSDimitry Andric #include "llvm/Transforms/Utils/SampleProfileInference.h"
17*04eeddc0SDimitry Andric #include "llvm/ADT/BitVector.h"
184824e7fdSDimitry Andric #include "llvm/Support/Debug.h"
194824e7fdSDimitry Andric #include <queue>
204824e7fdSDimitry Andric #include <set>
214824e7fdSDimitry Andric 
224824e7fdSDimitry Andric using namespace llvm;
234824e7fdSDimitry Andric #define DEBUG_TYPE "sample-profile-inference"
244824e7fdSDimitry Andric 
254824e7fdSDimitry Andric namespace {
264824e7fdSDimitry Andric 
274824e7fdSDimitry Andric /// A value indicating an infinite flow/capacity/weight of a block/edge.
284824e7fdSDimitry Andric /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
294824e7fdSDimitry Andric /// during the execution.
304824e7fdSDimitry Andric static constexpr int64_t INF = ((int64_t)1) << 50;
314824e7fdSDimitry Andric 
324824e7fdSDimitry Andric /// The minimum-cost maximum flow algorithm.
334824e7fdSDimitry Andric ///
344824e7fdSDimitry Andric /// The algorithm finds the maximum flow of minimum cost on a given (directed)
354824e7fdSDimitry Andric /// network using a modified version of the classical Moore-Bellman-Ford
364824e7fdSDimitry Andric /// approach. The algorithm applies a number of augmentation iterations in which
374824e7fdSDimitry Andric /// flow is sent along paths of positive capacity from the source to the sink.
384824e7fdSDimitry Andric /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
394824e7fdSDimitry Andric /// where m is the number of edges, n is the number of vertices, and v(f) is the
404824e7fdSDimitry Andric /// value of the maximum flow. However, the observed running time on typical
414824e7fdSDimitry Andric /// instances is sub-quadratic, that is, o(n^2).
424824e7fdSDimitry Andric ///
434824e7fdSDimitry Andric /// The input is a set of edges with specified costs and capacities, and a pair
444824e7fdSDimitry Andric /// of nodes (source and sink). The output is the flow along each edge of the
454824e7fdSDimitry Andric /// minimum total cost respecting the given edge capacities.
464824e7fdSDimitry Andric class MinCostMaxFlow {
474824e7fdSDimitry Andric public:
484824e7fdSDimitry Andric   // Initialize algorithm's data structures for a network of a given size.
494824e7fdSDimitry Andric   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
504824e7fdSDimitry Andric     Source = SourceNode;
514824e7fdSDimitry Andric     Target = SinkNode;
524824e7fdSDimitry Andric 
534824e7fdSDimitry Andric     Nodes = std::vector<Node>(NodeCount);
544824e7fdSDimitry Andric     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
554824e7fdSDimitry Andric   }
564824e7fdSDimitry Andric 
574824e7fdSDimitry Andric   // Run the algorithm.
584824e7fdSDimitry Andric   int64_t run() {
594824e7fdSDimitry Andric     // Find an augmenting path and update the flow along the path
604824e7fdSDimitry Andric     size_t AugmentationIters = 0;
614824e7fdSDimitry Andric     while (findAugmentingPath()) {
624824e7fdSDimitry Andric       augmentFlowAlongPath();
634824e7fdSDimitry Andric       AugmentationIters++;
644824e7fdSDimitry Andric     }
654824e7fdSDimitry Andric 
664824e7fdSDimitry Andric     // Compute the total flow and its cost
674824e7fdSDimitry Andric     int64_t TotalCost = 0;
684824e7fdSDimitry Andric     int64_t TotalFlow = 0;
694824e7fdSDimitry Andric     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
704824e7fdSDimitry Andric       for (auto &Edge : Edges[Src]) {
714824e7fdSDimitry Andric         if (Edge.Flow > 0) {
724824e7fdSDimitry Andric           TotalCost += Edge.Cost * Edge.Flow;
734824e7fdSDimitry Andric           if (Src == Source)
744824e7fdSDimitry Andric             TotalFlow += Edge.Flow;
754824e7fdSDimitry Andric         }
764824e7fdSDimitry Andric       }
774824e7fdSDimitry Andric     }
784824e7fdSDimitry Andric     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
794824e7fdSDimitry Andric                       << " iterations with " << TotalFlow << " total flow"
804824e7fdSDimitry Andric                       << " of " << TotalCost << " cost\n");
814824e7fdSDimitry Andric     (void)TotalFlow;
824824e7fdSDimitry Andric     return TotalCost;
834824e7fdSDimitry Andric   }
844824e7fdSDimitry Andric 
854824e7fdSDimitry Andric   /// Adding an edge to the network with a specified capacity and a cost.
864824e7fdSDimitry Andric   /// Multiple edges between a pair of nodes are allowed but self-edges
874824e7fdSDimitry Andric   /// are not supported.
884824e7fdSDimitry Andric   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
894824e7fdSDimitry Andric     assert(Capacity > 0 && "adding an edge of zero capacity");
904824e7fdSDimitry Andric     assert(Src != Dst && "loop edge are not supported");
914824e7fdSDimitry Andric 
924824e7fdSDimitry Andric     Edge SrcEdge;
934824e7fdSDimitry Andric     SrcEdge.Dst = Dst;
944824e7fdSDimitry Andric     SrcEdge.Cost = Cost;
954824e7fdSDimitry Andric     SrcEdge.Capacity = Capacity;
964824e7fdSDimitry Andric     SrcEdge.Flow = 0;
974824e7fdSDimitry Andric     SrcEdge.RevEdgeIndex = Edges[Dst].size();
984824e7fdSDimitry Andric 
994824e7fdSDimitry Andric     Edge DstEdge;
1004824e7fdSDimitry Andric     DstEdge.Dst = Src;
1014824e7fdSDimitry Andric     DstEdge.Cost = -Cost;
1024824e7fdSDimitry Andric     DstEdge.Capacity = 0;
1034824e7fdSDimitry Andric     DstEdge.Flow = 0;
1044824e7fdSDimitry Andric     DstEdge.RevEdgeIndex = Edges[Src].size();
1054824e7fdSDimitry Andric 
1064824e7fdSDimitry Andric     Edges[Src].push_back(SrcEdge);
1074824e7fdSDimitry Andric     Edges[Dst].push_back(DstEdge);
1084824e7fdSDimitry Andric   }
1094824e7fdSDimitry Andric 
1104824e7fdSDimitry Andric   /// Adding an edge to the network of infinite capacity and a given cost.
1114824e7fdSDimitry Andric   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
1124824e7fdSDimitry Andric     addEdge(Src, Dst, INF, Cost);
1134824e7fdSDimitry Andric   }
1144824e7fdSDimitry Andric 
1154824e7fdSDimitry Andric   /// Get the total flow from a given source node.
1164824e7fdSDimitry Andric   /// Returns a list of pairs (target node, amount of flow to the target).
1174824e7fdSDimitry Andric   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
1184824e7fdSDimitry Andric     std::vector<std::pair<uint64_t, int64_t>> Flow;
1194824e7fdSDimitry Andric     for (auto &Edge : Edges[Src]) {
1204824e7fdSDimitry Andric       if (Edge.Flow > 0)
1214824e7fdSDimitry Andric         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
1224824e7fdSDimitry Andric     }
1234824e7fdSDimitry Andric     return Flow;
1244824e7fdSDimitry Andric   }
1254824e7fdSDimitry Andric 
1264824e7fdSDimitry Andric   /// Get the total flow between a pair of nodes.
1274824e7fdSDimitry Andric   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
1284824e7fdSDimitry Andric     int64_t Flow = 0;
1294824e7fdSDimitry Andric     for (auto &Edge : Edges[Src]) {
1304824e7fdSDimitry Andric       if (Edge.Dst == Dst) {
1314824e7fdSDimitry Andric         Flow += Edge.Flow;
1324824e7fdSDimitry Andric       }
1334824e7fdSDimitry Andric     }
1344824e7fdSDimitry Andric     return Flow;
1354824e7fdSDimitry Andric   }
1364824e7fdSDimitry Andric 
1374824e7fdSDimitry Andric   /// A cost of increasing a block's count by one.
1384824e7fdSDimitry Andric   static constexpr int64_t AuxCostInc = 10;
1394824e7fdSDimitry Andric   /// A cost of decreasing a block's count by one.
1404824e7fdSDimitry Andric   static constexpr int64_t AuxCostDec = 20;
1414824e7fdSDimitry Andric   /// A cost of increasing a count of zero-weight block by one.
1424824e7fdSDimitry Andric   static constexpr int64_t AuxCostIncZero = 11;
1434824e7fdSDimitry Andric   /// A cost of increasing the entry block's count by one.
1444824e7fdSDimitry Andric   static constexpr int64_t AuxCostIncEntry = 40;
1454824e7fdSDimitry Andric   /// A cost of decreasing the entry block's count by one.
1464824e7fdSDimitry Andric   static constexpr int64_t AuxCostDecEntry = 10;
1474824e7fdSDimitry Andric   /// A cost of taking an unlikely jump.
148*04eeddc0SDimitry Andric   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
1494824e7fdSDimitry Andric 
1504824e7fdSDimitry Andric private:
1514824e7fdSDimitry Andric   /// Check for existence of an augmenting path with a positive capacity.
1524824e7fdSDimitry Andric   bool findAugmentingPath() {
1534824e7fdSDimitry Andric     // Initialize data structures
1544824e7fdSDimitry Andric     for (auto &Node : Nodes) {
1554824e7fdSDimitry Andric       Node.Distance = INF;
1564824e7fdSDimitry Andric       Node.ParentNode = uint64_t(-1);
1574824e7fdSDimitry Andric       Node.ParentEdgeIndex = uint64_t(-1);
1584824e7fdSDimitry Andric       Node.Taken = false;
1594824e7fdSDimitry Andric     }
1604824e7fdSDimitry Andric 
1614824e7fdSDimitry Andric     std::queue<uint64_t> Queue;
1624824e7fdSDimitry Andric     Queue.push(Source);
1634824e7fdSDimitry Andric     Nodes[Source].Distance = 0;
1644824e7fdSDimitry Andric     Nodes[Source].Taken = true;
1654824e7fdSDimitry Andric     while (!Queue.empty()) {
1664824e7fdSDimitry Andric       uint64_t Src = Queue.front();
1674824e7fdSDimitry Andric       Queue.pop();
1684824e7fdSDimitry Andric       Nodes[Src].Taken = false;
1694824e7fdSDimitry Andric       // Although the residual network contains edges with negative costs
1704824e7fdSDimitry Andric       // (in particular, backward edges), it can be shown that there are no
1714824e7fdSDimitry Andric       // negative-weight cycles and the following two invariants are maintained:
1724824e7fdSDimitry Andric       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
1734824e7fdSDimitry Andric       // where Dist is the length of the shortest path between two nodes. This
1744824e7fdSDimitry Andric       // allows to prune the search-space of the path-finding algorithm using
1754824e7fdSDimitry Andric       // the following early-stop criteria:
1764824e7fdSDimitry Andric       // -- If we find a path with zero-distance from Source to Target, stop the
1774824e7fdSDimitry Andric       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
1784824e7fdSDimitry Andric       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
1794824e7fdSDimitry Andric       //    process node V, as it is guaranteed _not_ to be on a shortest path
1804824e7fdSDimitry Andric       //    from Source to Target; it follows from inequalities
1814824e7fdSDimitry Andric       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
1824824e7fdSDimitry Andric       //                         >= Dist[Source, V]
1834824e7fdSDimitry Andric       if (Nodes[Target].Distance == 0)
1844824e7fdSDimitry Andric         break;
1854824e7fdSDimitry Andric       if (Nodes[Src].Distance > Nodes[Target].Distance)
1864824e7fdSDimitry Andric         continue;
1874824e7fdSDimitry Andric 
1884824e7fdSDimitry Andric       // Process adjacent edges
1894824e7fdSDimitry Andric       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
1904824e7fdSDimitry Andric         auto &Edge = Edges[Src][EdgeIdx];
1914824e7fdSDimitry Andric         if (Edge.Flow < Edge.Capacity) {
1924824e7fdSDimitry Andric           uint64_t Dst = Edge.Dst;
1934824e7fdSDimitry Andric           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
1944824e7fdSDimitry Andric           if (Nodes[Dst].Distance > NewDistance) {
1954824e7fdSDimitry Andric             // Update the distance and the parent node/edge
1964824e7fdSDimitry Andric             Nodes[Dst].Distance = NewDistance;
1974824e7fdSDimitry Andric             Nodes[Dst].ParentNode = Src;
1984824e7fdSDimitry Andric             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
1994824e7fdSDimitry Andric             // Add the node to the queue, if it is not there yet
2004824e7fdSDimitry Andric             if (!Nodes[Dst].Taken) {
2014824e7fdSDimitry Andric               Queue.push(Dst);
2024824e7fdSDimitry Andric               Nodes[Dst].Taken = true;
2034824e7fdSDimitry Andric             }
2044824e7fdSDimitry Andric           }
2054824e7fdSDimitry Andric         }
2064824e7fdSDimitry Andric       }
2074824e7fdSDimitry Andric     }
2084824e7fdSDimitry Andric 
2094824e7fdSDimitry Andric     return Nodes[Target].Distance != INF;
2104824e7fdSDimitry Andric   }
2114824e7fdSDimitry Andric 
2124824e7fdSDimitry Andric   /// Update the current flow along the augmenting path.
2134824e7fdSDimitry Andric   void augmentFlowAlongPath() {
2144824e7fdSDimitry Andric     // Find path capacity
2154824e7fdSDimitry Andric     int64_t PathCapacity = INF;
2164824e7fdSDimitry Andric     uint64_t Now = Target;
2174824e7fdSDimitry Andric     while (Now != Source) {
2184824e7fdSDimitry Andric       uint64_t Pred = Nodes[Now].ParentNode;
2194824e7fdSDimitry Andric       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
2204824e7fdSDimitry Andric       PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
2214824e7fdSDimitry Andric       Now = Pred;
2224824e7fdSDimitry Andric     }
2234824e7fdSDimitry Andric 
2240eae32dcSDimitry Andric     assert(PathCapacity > 0 && "found an incorrect augmenting path");
2254824e7fdSDimitry Andric 
2264824e7fdSDimitry Andric     // Update the flow along the path
2274824e7fdSDimitry Andric     Now = Target;
2284824e7fdSDimitry Andric     while (Now != Source) {
2294824e7fdSDimitry Andric       uint64_t Pred = Nodes[Now].ParentNode;
2304824e7fdSDimitry Andric       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
2314824e7fdSDimitry Andric       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
2324824e7fdSDimitry Andric 
2334824e7fdSDimitry Andric       Edge.Flow += PathCapacity;
2344824e7fdSDimitry Andric       RevEdge.Flow -= PathCapacity;
2354824e7fdSDimitry Andric 
2364824e7fdSDimitry Andric       Now = Pred;
2374824e7fdSDimitry Andric     }
2384824e7fdSDimitry Andric   }
2394824e7fdSDimitry Andric 
240*04eeddc0SDimitry Andric   /// A node in a flow network.
2414824e7fdSDimitry Andric   struct Node {
2424824e7fdSDimitry Andric     /// The cost of the cheapest path from the source to the current node.
2434824e7fdSDimitry Andric     int64_t Distance;
2444824e7fdSDimitry Andric     /// The node preceding the current one in the path.
2454824e7fdSDimitry Andric     uint64_t ParentNode;
2464824e7fdSDimitry Andric     /// The index of the edge between ParentNode and the current node.
2474824e7fdSDimitry Andric     uint64_t ParentEdgeIndex;
2484824e7fdSDimitry Andric     /// An indicator of whether the current node is in a queue.
2494824e7fdSDimitry Andric     bool Taken;
2504824e7fdSDimitry Andric   };
2514824e7fdSDimitry Andric   /// An edge in a flow network.
2524824e7fdSDimitry Andric   struct Edge {
2534824e7fdSDimitry Andric     /// The cost of the edge.
2544824e7fdSDimitry Andric     int64_t Cost;
2554824e7fdSDimitry Andric     /// The capacity of the edge.
2564824e7fdSDimitry Andric     int64_t Capacity;
2574824e7fdSDimitry Andric     /// The current flow on the edge.
2584824e7fdSDimitry Andric     int64_t Flow;
2594824e7fdSDimitry Andric     /// The destination node of the edge.
2604824e7fdSDimitry Andric     uint64_t Dst;
2614824e7fdSDimitry Andric     /// The index of the reverse edge between Dst and the current node.
2624824e7fdSDimitry Andric     uint64_t RevEdgeIndex;
2634824e7fdSDimitry Andric   };
2644824e7fdSDimitry Andric 
2654824e7fdSDimitry Andric   /// The set of network nodes.
2664824e7fdSDimitry Andric   std::vector<Node> Nodes;
2674824e7fdSDimitry Andric   /// The set of network edges.
2684824e7fdSDimitry Andric   std::vector<std::vector<Edge>> Edges;
2694824e7fdSDimitry Andric   /// Source node of the flow.
2704824e7fdSDimitry Andric   uint64_t Source;
2714824e7fdSDimitry Andric   /// Target (sink) node of the flow.
2724824e7fdSDimitry Andric   uint64_t Target;
2734824e7fdSDimitry Andric };
2744824e7fdSDimitry Andric 
2750eae32dcSDimitry Andric /// A post-processing adjustment of control flow. It applies two steps by
2760eae32dcSDimitry Andric /// rerouting some flow and making it more realistic:
2770eae32dcSDimitry Andric ///
2780eae32dcSDimitry Andric /// - First, it removes all isolated components ("islands") with a positive flow
2790eae32dcSDimitry Andric ///   that are unreachable from the entry block. For every such component, we
2800eae32dcSDimitry Andric ///   find the shortest from the entry to an exit passing through the component,
2810eae32dcSDimitry Andric ///   and increase the flow by one unit along the path.
2820eae32dcSDimitry Andric ///
2830eae32dcSDimitry Andric /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
2840eae32dcSDimitry Andric ///   with no sampled counts. Then it rebalnces the flow that goes through such
2850eae32dcSDimitry Andric ///   a subgraph so that each branch is taken with probability 50%.
2860eae32dcSDimitry Andric ///   An unknown subgraph is such that for every two nodes u and v:
2870eae32dcSDimitry Andric ///     - u dominates v and u is not unknown;
2880eae32dcSDimitry Andric ///     - v post-dominates u; and
2890eae32dcSDimitry Andric ///     - all inner-nodes of all (u,v)-paths are unknown.
2900eae32dcSDimitry Andric ///
2910eae32dcSDimitry Andric class FlowAdjuster {
2920eae32dcSDimitry Andric public:
2930eae32dcSDimitry Andric   FlowAdjuster(FlowFunction &Func) : Func(Func) {
2940eae32dcSDimitry Andric     assert(Func.Blocks[Func.Entry].isEntry() &&
2950eae32dcSDimitry Andric            "incorrect index of the entry block");
2960eae32dcSDimitry Andric   }
2970eae32dcSDimitry Andric 
2980eae32dcSDimitry Andric   // Run the post-processing
2990eae32dcSDimitry Andric   void run() {
3000eae32dcSDimitry Andric     /// Adjust the flow to get rid of isolated components.
3010eae32dcSDimitry Andric     joinIsolatedComponents();
3020eae32dcSDimitry Andric 
3030eae32dcSDimitry Andric     /// Rebalance the flow inside unknown subgraphs.
3040eae32dcSDimitry Andric     rebalanceUnknownSubgraphs();
3050eae32dcSDimitry Andric   }
3060eae32dcSDimitry Andric 
3070eae32dcSDimitry Andric private:
3080eae32dcSDimitry Andric   void joinIsolatedComponents() {
3090eae32dcSDimitry Andric     // Find blocks that are reachable from the source
310*04eeddc0SDimitry Andric     auto Visited = BitVector(NumBlocks(), false);
3110eae32dcSDimitry Andric     findReachable(Func.Entry, Visited);
3120eae32dcSDimitry Andric 
3130eae32dcSDimitry Andric     // Iterate over all non-reachable blocks and adjust their weights
3140eae32dcSDimitry Andric     for (uint64_t I = 0; I < NumBlocks(); I++) {
3150eae32dcSDimitry Andric       auto &Block = Func.Blocks[I];
3160eae32dcSDimitry Andric       if (Block.Flow > 0 && !Visited[I]) {
3170eae32dcSDimitry Andric         // Find a path from the entry to an exit passing through the block I
3180eae32dcSDimitry Andric         auto Path = findShortestPath(I);
3190eae32dcSDimitry Andric         // Increase the flow along the path
3200eae32dcSDimitry Andric         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
3210eae32dcSDimitry Andric                "incorrectly computed path adjusting control flow");
3220eae32dcSDimitry Andric         Func.Blocks[Func.Entry].Flow += 1;
3230eae32dcSDimitry Andric         for (auto &Jump : Path) {
3240eae32dcSDimitry Andric           Jump->Flow += 1;
3250eae32dcSDimitry Andric           Func.Blocks[Jump->Target].Flow += 1;
3260eae32dcSDimitry Andric           // Update reachability
3270eae32dcSDimitry Andric           findReachable(Jump->Target, Visited);
3280eae32dcSDimitry Andric         }
3290eae32dcSDimitry Andric       }
3300eae32dcSDimitry Andric     }
3310eae32dcSDimitry Andric   }
3320eae32dcSDimitry Andric 
3330eae32dcSDimitry Andric   /// Run BFS from a given block along the jumps with a positive flow and mark
3340eae32dcSDimitry Andric   /// all reachable blocks.
335*04eeddc0SDimitry Andric   void findReachable(uint64_t Src, BitVector &Visited) {
3360eae32dcSDimitry Andric     if (Visited[Src])
3370eae32dcSDimitry Andric       return;
3380eae32dcSDimitry Andric     std::queue<uint64_t> Queue;
3390eae32dcSDimitry Andric     Queue.push(Src);
3400eae32dcSDimitry Andric     Visited[Src] = true;
3410eae32dcSDimitry Andric     while (!Queue.empty()) {
3420eae32dcSDimitry Andric       Src = Queue.front();
3430eae32dcSDimitry Andric       Queue.pop();
3440eae32dcSDimitry Andric       for (auto Jump : Func.Blocks[Src].SuccJumps) {
3450eae32dcSDimitry Andric         uint64_t Dst = Jump->Target;
3460eae32dcSDimitry Andric         if (Jump->Flow > 0 && !Visited[Dst]) {
3470eae32dcSDimitry Andric           Queue.push(Dst);
3480eae32dcSDimitry Andric           Visited[Dst] = true;
3490eae32dcSDimitry Andric         }
3500eae32dcSDimitry Andric       }
3510eae32dcSDimitry Andric     }
3520eae32dcSDimitry Andric   }
3530eae32dcSDimitry Andric 
3540eae32dcSDimitry Andric   /// Find the shortest path from the entry block to an exit block passing
3550eae32dcSDimitry Andric   /// through a given block.
3560eae32dcSDimitry Andric   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
3570eae32dcSDimitry Andric     // A path from the entry block to BlockIdx
3580eae32dcSDimitry Andric     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
3590eae32dcSDimitry Andric     // A path from BlockIdx to an exit block
3600eae32dcSDimitry Andric     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
3610eae32dcSDimitry Andric 
3620eae32dcSDimitry Andric     // Concatenate the two paths
3630eae32dcSDimitry Andric     std::vector<FlowJump *> Result;
3640eae32dcSDimitry Andric     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
3650eae32dcSDimitry Andric     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
3660eae32dcSDimitry Andric     return Result;
3670eae32dcSDimitry Andric   }
3680eae32dcSDimitry Andric 
3690eae32dcSDimitry Andric   /// Apply the Dijkstra algorithm to find the shortest path from a given
3700eae32dcSDimitry Andric   /// Source to a given Target block.
3710eae32dcSDimitry Andric   /// If Target == -1, then the path ends at an exit block.
3720eae32dcSDimitry Andric   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
3730eae32dcSDimitry Andric     // Quit early, if possible
3740eae32dcSDimitry Andric     if (Source == Target)
3750eae32dcSDimitry Andric       return std::vector<FlowJump *>();
3760eae32dcSDimitry Andric     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
3770eae32dcSDimitry Andric       return std::vector<FlowJump *>();
3780eae32dcSDimitry Andric 
3790eae32dcSDimitry Andric     // Initialize data structures
3800eae32dcSDimitry Andric     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
3810eae32dcSDimitry Andric     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
3820eae32dcSDimitry Andric     Distance[Source] = 0;
3830eae32dcSDimitry Andric     std::set<std::pair<uint64_t, uint64_t>> Queue;
3840eae32dcSDimitry Andric     Queue.insert(std::make_pair(Distance[Source], Source));
3850eae32dcSDimitry Andric 
3860eae32dcSDimitry Andric     // Run the Dijkstra algorithm
3870eae32dcSDimitry Andric     while (!Queue.empty()) {
3880eae32dcSDimitry Andric       uint64_t Src = Queue.begin()->second;
3890eae32dcSDimitry Andric       Queue.erase(Queue.begin());
3900eae32dcSDimitry Andric       // If we found a solution, quit early
3910eae32dcSDimitry Andric       if (Src == Target ||
3920eae32dcSDimitry Andric           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
3930eae32dcSDimitry Andric         break;
3940eae32dcSDimitry Andric 
3950eae32dcSDimitry Andric       for (auto Jump : Func.Blocks[Src].SuccJumps) {
3960eae32dcSDimitry Andric         uint64_t Dst = Jump->Target;
3970eae32dcSDimitry Andric         int64_t JumpDist = jumpDistance(Jump);
3980eae32dcSDimitry Andric         if (Distance[Dst] > Distance[Src] + JumpDist) {
3990eae32dcSDimitry Andric           Queue.erase(std::make_pair(Distance[Dst], Dst));
4000eae32dcSDimitry Andric 
4010eae32dcSDimitry Andric           Distance[Dst] = Distance[Src] + JumpDist;
4020eae32dcSDimitry Andric           Parent[Dst] = Jump;
4030eae32dcSDimitry Andric 
4040eae32dcSDimitry Andric           Queue.insert(std::make_pair(Distance[Dst], Dst));
4050eae32dcSDimitry Andric         }
4060eae32dcSDimitry Andric       }
4070eae32dcSDimitry Andric     }
4080eae32dcSDimitry Andric     // If Target is not provided, find the closest exit block
4090eae32dcSDimitry Andric     if (Target == AnyExitBlock) {
4100eae32dcSDimitry Andric       for (uint64_t I = 0; I < NumBlocks(); I++) {
4110eae32dcSDimitry Andric         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
4120eae32dcSDimitry Andric           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
4130eae32dcSDimitry Andric             Target = I;
4140eae32dcSDimitry Andric           }
4150eae32dcSDimitry Andric         }
4160eae32dcSDimitry Andric       }
4170eae32dcSDimitry Andric     }
4180eae32dcSDimitry Andric     assert(Parent[Target] != nullptr && "a path does not exist");
4190eae32dcSDimitry Andric 
4200eae32dcSDimitry Andric     // Extract the constructed path
4210eae32dcSDimitry Andric     std::vector<FlowJump *> Result;
4220eae32dcSDimitry Andric     uint64_t Now = Target;
4230eae32dcSDimitry Andric     while (Now != Source) {
4240eae32dcSDimitry Andric       assert(Now == Parent[Now]->Target && "incorrect parent jump");
4250eae32dcSDimitry Andric       Result.push_back(Parent[Now]);
4260eae32dcSDimitry Andric       Now = Parent[Now]->Source;
4270eae32dcSDimitry Andric     }
4280eae32dcSDimitry Andric     // Reverse the path, since it is extracted from Target to Source
4290eae32dcSDimitry Andric     std::reverse(Result.begin(), Result.end());
4300eae32dcSDimitry Andric     return Result;
4310eae32dcSDimitry Andric   }
4320eae32dcSDimitry Andric 
4330eae32dcSDimitry Andric   /// A distance of a path for a given jump.
4340eae32dcSDimitry Andric   /// In order to incite the path to use blocks/jumps with large positive flow,
4350eae32dcSDimitry Andric   /// and avoid changing branch probability of outgoing edges drastically,
4360eae32dcSDimitry Andric   /// set the distance as follows:
4370eae32dcSDimitry Andric   ///   if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
4380eae32dcSDimitry Andric   ///   if Block.Weight > 0, then distance = 1
4390eae32dcSDimitry Andric   ///   otherwise distance >> 1
4400eae32dcSDimitry Andric   int64_t jumpDistance(FlowJump *Jump) const {
4410eae32dcSDimitry Andric     int64_t BaseDistance = 100;
4420eae32dcSDimitry Andric     if (Jump->IsUnlikely)
4430eae32dcSDimitry Andric       return MinCostMaxFlow::AuxCostUnlikely;
4440eae32dcSDimitry Andric     if (Jump->Flow > 0)
4450eae32dcSDimitry Andric       return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
4460eae32dcSDimitry Andric     if (Func.Blocks[Jump->Target].Weight > 0)
4470eae32dcSDimitry Andric       return BaseDistance;
4480eae32dcSDimitry Andric     return BaseDistance * (NumBlocks() + 1);
4490eae32dcSDimitry Andric   };
4500eae32dcSDimitry Andric 
4510eae32dcSDimitry Andric   uint64_t NumBlocks() const { return Func.Blocks.size(); }
4520eae32dcSDimitry Andric 
453*04eeddc0SDimitry Andric   /// Rebalance unknown subgraphs so that the flow is split evenly across the
454*04eeddc0SDimitry Andric   /// outgoing branches of every block of the subgraph. The method iterates over
455*04eeddc0SDimitry Andric   /// blocks with known weight and identifies unknown subgraphs rooted at the
456*04eeddc0SDimitry Andric   /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
4570eae32dcSDimitry Andric   void rebalanceUnknownSubgraphs() {
458*04eeddc0SDimitry Andric     // Try to find unknown subgraphs from each block
4590eae32dcSDimitry Andric     for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
4600eae32dcSDimitry Andric       auto SrcBlock = &Func.Blocks[I];
461*04eeddc0SDimitry Andric       // Verify if rebalancing rooted at SrcBlock is feasible
462*04eeddc0SDimitry Andric       if (!canRebalanceAtRoot(SrcBlock))
4630eae32dcSDimitry Andric         continue;
4640eae32dcSDimitry Andric 
465*04eeddc0SDimitry Andric       // Find an unknown subgraphs starting at SrcBlock. Along the way,
466*04eeddc0SDimitry Andric       // fill in known destinations and intermediate unknown blocks.
467*04eeddc0SDimitry Andric       std::vector<FlowBlock *> UnknownBlocks;
468*04eeddc0SDimitry Andric       std::vector<FlowBlock *> KnownDstBlocks;
469*04eeddc0SDimitry Andric       findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
470*04eeddc0SDimitry Andric 
471*04eeddc0SDimitry Andric       // Verify if rebalancing of the subgraph is feasible. If the search is
472*04eeddc0SDimitry Andric       // successful, find the unique destination block (which can be null)
4730eae32dcSDimitry Andric       FlowBlock *DstBlock = nullptr;
474*04eeddc0SDimitry Andric       if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
475*04eeddc0SDimitry Andric                                 DstBlock))
4760eae32dcSDimitry Andric         continue;
477*04eeddc0SDimitry Andric 
478*04eeddc0SDimitry Andric       // We cannot rebalance subgraphs containing cycles among unknown blocks
479*04eeddc0SDimitry Andric       if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
4800eae32dcSDimitry Andric         continue;
4810eae32dcSDimitry Andric 
4820eae32dcSDimitry Andric       // Rebalance the flow
483*04eeddc0SDimitry Andric       rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
4840eae32dcSDimitry Andric     }
4850eae32dcSDimitry Andric   }
4860eae32dcSDimitry Andric 
487*04eeddc0SDimitry Andric   /// Verify if rebalancing rooted at a given block is possible.
488*04eeddc0SDimitry Andric   bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
489*04eeddc0SDimitry Andric     // Do not attempt to find unknown subgraphs from an unknown or a
490*04eeddc0SDimitry Andric     // zero-flow block
491*04eeddc0SDimitry Andric     if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
492*04eeddc0SDimitry Andric       return false;
493*04eeddc0SDimitry Andric 
494*04eeddc0SDimitry Andric     // Do not attempt to process subgraphs from a block w/o unknown sucessors
495*04eeddc0SDimitry Andric     bool HasUnknownSuccs = false;
496*04eeddc0SDimitry Andric     for (auto Jump : SrcBlock->SuccJumps) {
497*04eeddc0SDimitry Andric       if (Func.Blocks[Jump->Target].UnknownWeight) {
498*04eeddc0SDimitry Andric         HasUnknownSuccs = true;
499*04eeddc0SDimitry Andric         break;
500*04eeddc0SDimitry Andric       }
501*04eeddc0SDimitry Andric     }
502*04eeddc0SDimitry Andric     if (!HasUnknownSuccs)
503*04eeddc0SDimitry Andric       return false;
504*04eeddc0SDimitry Andric 
505*04eeddc0SDimitry Andric     return true;
506*04eeddc0SDimitry Andric   }
507*04eeddc0SDimitry Andric 
508*04eeddc0SDimitry Andric   /// Find an unknown subgraph starting at block SrcBlock. The method sets
509*04eeddc0SDimitry Andric   /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
510*04eeddc0SDimitry Andric   void findUnknownSubgraph(const FlowBlock *SrcBlock,
511*04eeddc0SDimitry Andric                            std::vector<FlowBlock *> &KnownDstBlocks,
512*04eeddc0SDimitry Andric                            std::vector<FlowBlock *> &UnknownBlocks) {
5130eae32dcSDimitry Andric     // Run BFS from SrcBlock and make sure all paths are going through unknown
5140eae32dcSDimitry Andric     // blocks and end at a non-unknown DstBlock
515*04eeddc0SDimitry Andric     auto Visited = BitVector(NumBlocks(), false);
5160eae32dcSDimitry Andric     std::queue<uint64_t> Queue;
5170eae32dcSDimitry Andric 
5180eae32dcSDimitry Andric     Queue.push(SrcBlock->Index);
5190eae32dcSDimitry Andric     Visited[SrcBlock->Index] = true;
5200eae32dcSDimitry Andric     while (!Queue.empty()) {
5210eae32dcSDimitry Andric       auto &Block = Func.Blocks[Queue.front()];
5220eae32dcSDimitry Andric       Queue.pop();
5230eae32dcSDimitry Andric       // Process blocks reachable from Block
5240eae32dcSDimitry Andric       for (auto Jump : Block.SuccJumps) {
525*04eeddc0SDimitry Andric         // If Jump can be ignored, skip it
526*04eeddc0SDimitry Andric         if (ignoreJump(SrcBlock, nullptr, Jump))
527*04eeddc0SDimitry Andric           continue;
528*04eeddc0SDimitry Andric 
5290eae32dcSDimitry Andric         uint64_t Dst = Jump->Target;
530*04eeddc0SDimitry Andric         // If Dst has been visited, skip Jump
5310eae32dcSDimitry Andric         if (Visited[Dst])
5320eae32dcSDimitry Andric           continue;
533*04eeddc0SDimitry Andric         // Process block Dst
5340eae32dcSDimitry Andric         Visited[Dst] = true;
5350eae32dcSDimitry Andric         if (!Func.Blocks[Dst].UnknownWeight) {
536*04eeddc0SDimitry Andric           KnownDstBlocks.push_back(&Func.Blocks[Dst]);
5370eae32dcSDimitry Andric         } else {
5380eae32dcSDimitry Andric           Queue.push(Dst);
539*04eeddc0SDimitry Andric           UnknownBlocks.push_back(&Func.Blocks[Dst]);
540*04eeddc0SDimitry Andric         }
5410eae32dcSDimitry Andric       }
5420eae32dcSDimitry Andric     }
5430eae32dcSDimitry Andric   }
5440eae32dcSDimitry Andric 
545*04eeddc0SDimitry Andric   /// Verify if rebalancing of the subgraph is feasible. If the checks are
546*04eeddc0SDimitry Andric   /// successful, set the unique destination block, DstBlock (can be null).
547*04eeddc0SDimitry Andric   bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
548*04eeddc0SDimitry Andric                             const std::vector<FlowBlock *> &KnownDstBlocks,
549*04eeddc0SDimitry Andric                             const std::vector<FlowBlock *> &UnknownBlocks,
550*04eeddc0SDimitry Andric                             FlowBlock *&DstBlock) {
5510eae32dcSDimitry Andric     // If the list of unknown blocks is empty, we don't need rebalancing
552*04eeddc0SDimitry Andric     if (UnknownBlocks.empty())
5530eae32dcSDimitry Andric       return false;
554*04eeddc0SDimitry Andric 
555*04eeddc0SDimitry Andric     // If there are multiple known sinks, we can't rebalance
556*04eeddc0SDimitry Andric     if (KnownDstBlocks.size() > 1)
5570eae32dcSDimitry Andric       return false;
558*04eeddc0SDimitry Andric     DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
559*04eeddc0SDimitry Andric 
560*04eeddc0SDimitry Andric     // Verify sinks of the subgraph
561*04eeddc0SDimitry Andric     for (auto Block : UnknownBlocks) {
562*04eeddc0SDimitry Andric       if (Block->SuccJumps.empty()) {
563*04eeddc0SDimitry Andric         // If there are multiple (known and unknown) sinks, we can't rebalance
564*04eeddc0SDimitry Andric         if (DstBlock != nullptr)
565*04eeddc0SDimitry Andric           return false;
566*04eeddc0SDimitry Andric         continue;
567*04eeddc0SDimitry Andric       }
568*04eeddc0SDimitry Andric       size_t NumIgnoredJumps = 0;
569*04eeddc0SDimitry Andric       for (auto Jump : Block->SuccJumps) {
570*04eeddc0SDimitry Andric         if (ignoreJump(SrcBlock, DstBlock, Jump))
571*04eeddc0SDimitry Andric           NumIgnoredJumps++;
572*04eeddc0SDimitry Andric       }
573*04eeddc0SDimitry Andric       // If there is a non-sink block in UnknownBlocks with all jumps ignored,
574*04eeddc0SDimitry Andric       // then we can't rebalance
575*04eeddc0SDimitry Andric       if (NumIgnoredJumps == Block->SuccJumps.size())
5760eae32dcSDimitry Andric         return false;
5770eae32dcSDimitry Andric     }
5780eae32dcSDimitry Andric 
5790eae32dcSDimitry Andric     return true;
5800eae32dcSDimitry Andric   }
5810eae32dcSDimitry Andric 
582*04eeddc0SDimitry Andric   /// Decide whether the Jump is ignored while processing an unknown subgraphs
583*04eeddc0SDimitry Andric   /// rooted at basic block SrcBlock with the destination block, DstBlock.
584*04eeddc0SDimitry Andric   bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
585*04eeddc0SDimitry Andric                   const FlowJump *Jump) {
586*04eeddc0SDimitry Andric     // Ignore unlikely jumps with zero flow
587*04eeddc0SDimitry Andric     if (Jump->IsUnlikely && Jump->Flow == 0)
588*04eeddc0SDimitry Andric       return true;
589*04eeddc0SDimitry Andric 
590*04eeddc0SDimitry Andric     auto JumpSource = &Func.Blocks[Jump->Source];
591*04eeddc0SDimitry Andric     auto JumpTarget = &Func.Blocks[Jump->Target];
592*04eeddc0SDimitry Andric 
593*04eeddc0SDimitry Andric     // Do not ignore jumps coming into DstBlock
594*04eeddc0SDimitry Andric     if (DstBlock != nullptr && JumpTarget == DstBlock)
595*04eeddc0SDimitry Andric       return false;
596*04eeddc0SDimitry Andric 
597*04eeddc0SDimitry Andric     // Ignore jumps out of SrcBlock to known blocks
598*04eeddc0SDimitry Andric     if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
599*04eeddc0SDimitry Andric       return true;
600*04eeddc0SDimitry Andric 
601*04eeddc0SDimitry Andric     // Ignore jumps to known blocks with zero flow
602*04eeddc0SDimitry Andric     if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
603*04eeddc0SDimitry Andric       return true;
604*04eeddc0SDimitry Andric 
605*04eeddc0SDimitry Andric     return false;
606*04eeddc0SDimitry Andric   }
607*04eeddc0SDimitry Andric 
6080eae32dcSDimitry Andric   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
609*04eeddc0SDimitry Andric   /// UnknownBlocks in the topological order (so that all jumps are "forward").
610*04eeddc0SDimitry Andric   bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
611*04eeddc0SDimitry Andric                          std::vector<FlowBlock *> &UnknownBlocks) {
6120eae32dcSDimitry Andric     // Extract local in-degrees in the considered subgraph
6130eae32dcSDimitry Andric     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
614*04eeddc0SDimitry Andric     auto fillInDegree = [&](const FlowBlock *Block) {
615*04eeddc0SDimitry Andric       for (auto Jump : Block->SuccJumps) {
616*04eeddc0SDimitry Andric         if (ignoreJump(SrcBlock, DstBlock, Jump))
617*04eeddc0SDimitry Andric           continue;
6180eae32dcSDimitry Andric         LocalInDegree[Jump->Target]++;
6190eae32dcSDimitry Andric       }
620*04eeddc0SDimitry Andric     };
621*04eeddc0SDimitry Andric     fillInDegree(SrcBlock);
622*04eeddc0SDimitry Andric     for (auto Block : UnknownBlocks) {
623*04eeddc0SDimitry Andric       fillInDegree(Block);
6240eae32dcSDimitry Andric     }
6250eae32dcSDimitry Andric     // A loop containing SrcBlock
6260eae32dcSDimitry Andric     if (LocalInDegree[SrcBlock->Index] > 0)
6270eae32dcSDimitry Andric       return false;
6280eae32dcSDimitry Andric 
6290eae32dcSDimitry Andric     std::vector<FlowBlock *> AcyclicOrder;
6300eae32dcSDimitry Andric     std::queue<uint64_t> Queue;
6310eae32dcSDimitry Andric     Queue.push(SrcBlock->Index);
6320eae32dcSDimitry Andric     while (!Queue.empty()) {
633*04eeddc0SDimitry Andric       FlowBlock *Block = &Func.Blocks[Queue.front()];
6340eae32dcSDimitry Andric       Queue.pop();
635*04eeddc0SDimitry Andric       // Stop propagation once we reach DstBlock, if any
636*04eeddc0SDimitry Andric       if (DstBlock != nullptr && Block == DstBlock)
6370eae32dcSDimitry Andric         break;
6380eae32dcSDimitry Andric 
639*04eeddc0SDimitry Andric       // Keep an acyclic order of unknown blocks
640*04eeddc0SDimitry Andric       if (Block->UnknownWeight && Block != SrcBlock)
641*04eeddc0SDimitry Andric         AcyclicOrder.push_back(Block);
642*04eeddc0SDimitry Andric 
6430eae32dcSDimitry Andric       // Add to the queue all successors with zero local in-degree
644*04eeddc0SDimitry Andric       for (auto Jump : Block->SuccJumps) {
645*04eeddc0SDimitry Andric         if (ignoreJump(SrcBlock, DstBlock, Jump))
646*04eeddc0SDimitry Andric           continue;
6470eae32dcSDimitry Andric         uint64_t Dst = Jump->Target;
6480eae32dcSDimitry Andric         LocalInDegree[Dst]--;
6490eae32dcSDimitry Andric         if (LocalInDegree[Dst] == 0) {
6500eae32dcSDimitry Andric           Queue.push(Dst);
6510eae32dcSDimitry Andric         }
6520eae32dcSDimitry Andric       }
6530eae32dcSDimitry Andric     }
6540eae32dcSDimitry Andric 
6550eae32dcSDimitry Andric     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
6560eae32dcSDimitry Andric     // of all blocks
657*04eeddc0SDimitry Andric     if (UnknownBlocks.size() != AcyclicOrder.size())
6580eae32dcSDimitry Andric       return false;
659*04eeddc0SDimitry Andric     UnknownBlocks = AcyclicOrder;
6600eae32dcSDimitry Andric     return true;
6610eae32dcSDimitry Andric   }
6620eae32dcSDimitry Andric 
663*04eeddc0SDimitry Andric   /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
664*04eeddc0SDimitry Andric   /// having UnknownBlocks intermediate blocks.
665*04eeddc0SDimitry Andric   void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
666*04eeddc0SDimitry Andric                                 const FlowBlock *DstBlock,
667*04eeddc0SDimitry Andric                                 const std::vector<FlowBlock *> &UnknownBlocks) {
6680eae32dcSDimitry Andric     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
6690eae32dcSDimitry Andric 
670*04eeddc0SDimitry Andric     // Ditribute flow from the source block
671*04eeddc0SDimitry Andric     uint64_t BlockFlow = 0;
672*04eeddc0SDimitry Andric     // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
673*04eeddc0SDimitry Andric     for (auto Jump : SrcBlock->SuccJumps) {
674*04eeddc0SDimitry Andric       if (ignoreJump(SrcBlock, DstBlock, Jump))
6750eae32dcSDimitry Andric         continue;
676*04eeddc0SDimitry Andric       BlockFlow += Jump->Flow;
6770eae32dcSDimitry Andric     }
678*04eeddc0SDimitry Andric     rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
679*04eeddc0SDimitry Andric 
680*04eeddc0SDimitry Andric     // Ditribute flow from the remaining blocks
681*04eeddc0SDimitry Andric     for (auto Block : UnknownBlocks) {
682*04eeddc0SDimitry Andric       assert(Block->UnknownWeight && "incorrect unknown subgraph");
683*04eeddc0SDimitry Andric       uint64_t BlockFlow = 0;
684*04eeddc0SDimitry Andric       // Block's flow is the sum of incoming flows
685*04eeddc0SDimitry Andric       for (auto Jump : Block->PredJumps) {
686*04eeddc0SDimitry Andric         BlockFlow += Jump->Flow;
687*04eeddc0SDimitry Andric       }
688*04eeddc0SDimitry Andric       Block->Flow = BlockFlow;
689*04eeddc0SDimitry Andric       rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
690*04eeddc0SDimitry Andric     }
691*04eeddc0SDimitry Andric   }
692*04eeddc0SDimitry Andric 
693*04eeddc0SDimitry Andric   /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
694*04eeddc0SDimitry Andric   /// and ending at DstBlock.
695*04eeddc0SDimitry Andric   void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
696*04eeddc0SDimitry Andric                       const FlowBlock *Block, uint64_t BlockFlow) {
697*04eeddc0SDimitry Andric     // Process all successor jumps and update corresponding flow values
698*04eeddc0SDimitry Andric     size_t BlockDegree = 0;
699*04eeddc0SDimitry Andric     for (auto Jump : Block->SuccJumps) {
700*04eeddc0SDimitry Andric       if (ignoreJump(SrcBlock, DstBlock, Jump))
701*04eeddc0SDimitry Andric         continue;
702*04eeddc0SDimitry Andric       BlockDegree++;
703*04eeddc0SDimitry Andric     }
704*04eeddc0SDimitry Andric     // If all successor jumps of the block are ignored, skip it
705*04eeddc0SDimitry Andric     if (DstBlock == nullptr && BlockDegree == 0)
706*04eeddc0SDimitry Andric       return;
707*04eeddc0SDimitry Andric     assert(BlockDegree > 0 && "all outgoing jumps are ignored");
708*04eeddc0SDimitry Andric 
709*04eeddc0SDimitry Andric     // Each of the Block's successors gets the following amount of flow.
710*04eeddc0SDimitry Andric     // Rounding the value up so that all flow is propagated
711*04eeddc0SDimitry Andric     uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
712*04eeddc0SDimitry Andric     for (auto Jump : Block->SuccJumps) {
713*04eeddc0SDimitry Andric       if (ignoreJump(SrcBlock, DstBlock, Jump))
714*04eeddc0SDimitry Andric         continue;
715*04eeddc0SDimitry Andric       uint64_t Flow = std::min(SuccFlow, BlockFlow);
7160eae32dcSDimitry Andric       Jump->Flow = Flow;
717*04eeddc0SDimitry Andric       BlockFlow -= Flow;
7180eae32dcSDimitry Andric     }
719*04eeddc0SDimitry Andric     assert(BlockFlow == 0 && "not all flow is propagated");
7200eae32dcSDimitry Andric   }
7210eae32dcSDimitry Andric 
7220eae32dcSDimitry Andric   /// A constant indicating an arbitrary exit block of a function.
7230eae32dcSDimitry Andric   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
7240eae32dcSDimitry Andric 
7250eae32dcSDimitry Andric   /// The function.
7260eae32dcSDimitry Andric   FlowFunction &Func;
7270eae32dcSDimitry Andric };
7280eae32dcSDimitry Andric 
7294824e7fdSDimitry Andric /// Initializing flow network for a given function.
7304824e7fdSDimitry Andric ///
7314824e7fdSDimitry Andric /// Every block is split into three nodes that are responsible for (i) an
7324824e7fdSDimitry Andric /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
7334824e7fdSDimitry Andric /// reduction of the block weight.
7344824e7fdSDimitry Andric void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
7354824e7fdSDimitry Andric   uint64_t NumBlocks = Func.Blocks.size();
7364824e7fdSDimitry Andric   assert(NumBlocks > 1 && "Too few blocks in a function");
7374824e7fdSDimitry Andric   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
7384824e7fdSDimitry Andric 
7394824e7fdSDimitry Andric   // Pre-process data: make sure the entry weight is at least 1
7404824e7fdSDimitry Andric   if (Func.Blocks[Func.Entry].Weight == 0) {
7414824e7fdSDimitry Andric     Func.Blocks[Func.Entry].Weight = 1;
7424824e7fdSDimitry Andric   }
7434824e7fdSDimitry Andric   // Introducing dummy source/sink pairs to allow flow circulation.
7444824e7fdSDimitry Andric   // The nodes corresponding to blocks of Func have indicies in the range
7454824e7fdSDimitry Andric   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
7464824e7fdSDimitry Andric   uint64_t S = 3 * NumBlocks;
7474824e7fdSDimitry Andric   uint64_t T = S + 1;
7484824e7fdSDimitry Andric   uint64_t S1 = S + 2;
7494824e7fdSDimitry Andric   uint64_t T1 = S + 3;
7504824e7fdSDimitry Andric 
7514824e7fdSDimitry Andric   Network.initialize(3 * NumBlocks + 4, S1, T1);
7524824e7fdSDimitry Andric 
7534824e7fdSDimitry Andric   // Create three nodes for every block of the function
7544824e7fdSDimitry Andric   for (uint64_t B = 0; B < NumBlocks; B++) {
7554824e7fdSDimitry Andric     auto &Block = Func.Blocks[B];
7564824e7fdSDimitry Andric     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
7574824e7fdSDimitry Andric            "non-zero weight of a block w/o weight except for an entry");
7584824e7fdSDimitry Andric 
7594824e7fdSDimitry Andric     // Split every block into two nodes
7604824e7fdSDimitry Andric     uint64_t Bin = 3 * B;
7614824e7fdSDimitry Andric     uint64_t Bout = 3 * B + 1;
7624824e7fdSDimitry Andric     uint64_t Baux = 3 * B + 2;
7634824e7fdSDimitry Andric     if (Block.Weight > 0) {
7644824e7fdSDimitry Andric       Network.addEdge(S1, Bout, Block.Weight, 0);
7654824e7fdSDimitry Andric       Network.addEdge(Bin, T1, Block.Weight, 0);
7664824e7fdSDimitry Andric     }
7674824e7fdSDimitry Andric 
7684824e7fdSDimitry Andric     // Edges from S and to T
7694824e7fdSDimitry Andric     assert((!Block.isEntry() || !Block.isExit()) &&
7704824e7fdSDimitry Andric            "a block cannot be an entry and an exit");
7714824e7fdSDimitry Andric     if (Block.isEntry()) {
7724824e7fdSDimitry Andric       Network.addEdge(S, Bin, 0);
7734824e7fdSDimitry Andric     } else if (Block.isExit()) {
7744824e7fdSDimitry Andric       Network.addEdge(Bout, T, 0);
7754824e7fdSDimitry Andric     }
7764824e7fdSDimitry Andric 
7774824e7fdSDimitry Andric     // An auxiliary node to allow increase/reduction of block counts:
7784824e7fdSDimitry Andric     // We assume that decreasing block counts is more expensive than increasing,
7794824e7fdSDimitry Andric     // and thus, setting separate costs here. In the future we may want to tune
7804824e7fdSDimitry Andric     // the relative costs so as to maximize the quality of generated profiles.
7814824e7fdSDimitry Andric     int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
7824824e7fdSDimitry Andric     int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
7834824e7fdSDimitry Andric     if (Block.UnknownWeight) {
7844824e7fdSDimitry Andric       // Do not penalize changing weights of blocks w/o known profile count
7854824e7fdSDimitry Andric       AuxCostInc = 0;
7864824e7fdSDimitry Andric       AuxCostDec = 0;
7874824e7fdSDimitry Andric     } else {
7884824e7fdSDimitry Andric       // Increasing the count for "cold" blocks with zero initial count is more
7894824e7fdSDimitry Andric       // expensive than for "hot" ones
7904824e7fdSDimitry Andric       if (Block.Weight == 0) {
7914824e7fdSDimitry Andric         AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
7924824e7fdSDimitry Andric       }
7934824e7fdSDimitry Andric       // Modifying the count of the entry block is expensive
7944824e7fdSDimitry Andric       if (Block.isEntry()) {
7954824e7fdSDimitry Andric         AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
7964824e7fdSDimitry Andric         AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
7974824e7fdSDimitry Andric       }
7984824e7fdSDimitry Andric     }
7994824e7fdSDimitry Andric     // For blocks with self-edges, do not penalize a reduction of the count,
8004824e7fdSDimitry Andric     // as all of the increase can be attributed to the self-edge
8014824e7fdSDimitry Andric     if (Block.HasSelfEdge) {
8024824e7fdSDimitry Andric       AuxCostDec = 0;
8034824e7fdSDimitry Andric     }
8044824e7fdSDimitry Andric 
8054824e7fdSDimitry Andric     Network.addEdge(Bin, Baux, AuxCostInc);
8064824e7fdSDimitry Andric     Network.addEdge(Baux, Bout, AuxCostInc);
8074824e7fdSDimitry Andric     if (Block.Weight > 0) {
8084824e7fdSDimitry Andric       Network.addEdge(Bout, Baux, AuxCostDec);
8094824e7fdSDimitry Andric       Network.addEdge(Baux, Bin, AuxCostDec);
8104824e7fdSDimitry Andric     }
8114824e7fdSDimitry Andric   }
8124824e7fdSDimitry Andric 
8134824e7fdSDimitry Andric   // Creating edges for every jump
8144824e7fdSDimitry Andric   for (auto &Jump : Func.Jumps) {
8154824e7fdSDimitry Andric     uint64_t Src = Jump.Source;
8164824e7fdSDimitry Andric     uint64_t Dst = Jump.Target;
8174824e7fdSDimitry Andric     if (Src != Dst) {
8184824e7fdSDimitry Andric       uint64_t SrcOut = 3 * Src + 1;
8194824e7fdSDimitry Andric       uint64_t DstIn = 3 * Dst;
8204824e7fdSDimitry Andric       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
8214824e7fdSDimitry Andric       Network.addEdge(SrcOut, DstIn, Cost);
8224824e7fdSDimitry Andric     }
8234824e7fdSDimitry Andric   }
8244824e7fdSDimitry Andric 
8254824e7fdSDimitry Andric   // Make sure we have a valid flow circulation
8264824e7fdSDimitry Andric   Network.addEdge(T, S, 0);
8274824e7fdSDimitry Andric }
8284824e7fdSDimitry Andric 
8294824e7fdSDimitry Andric /// Extract resulting block and edge counts from the flow network.
8304824e7fdSDimitry Andric void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
8314824e7fdSDimitry Andric   uint64_t NumBlocks = Func.Blocks.size();
8324824e7fdSDimitry Andric 
8334824e7fdSDimitry Andric   // Extract resulting block counts
8344824e7fdSDimitry Andric   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
8354824e7fdSDimitry Andric     auto &Block = Func.Blocks[Src];
8364824e7fdSDimitry Andric     uint64_t SrcOut = 3 * Src + 1;
8374824e7fdSDimitry Andric     int64_t Flow = 0;
8384824e7fdSDimitry Andric     for (auto &Adj : Network.getFlow(SrcOut)) {
8394824e7fdSDimitry Andric       uint64_t DstIn = Adj.first;
8404824e7fdSDimitry Andric       int64_t DstFlow = Adj.second;
8414824e7fdSDimitry Andric       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
8424824e7fdSDimitry Andric       if (!IsAuxNode || Block.HasSelfEdge) {
8434824e7fdSDimitry Andric         Flow += DstFlow;
8444824e7fdSDimitry Andric       }
8454824e7fdSDimitry Andric     }
8464824e7fdSDimitry Andric     Block.Flow = Flow;
8474824e7fdSDimitry Andric     assert(Flow >= 0 && "negative block flow");
8484824e7fdSDimitry Andric   }
8494824e7fdSDimitry Andric 
8504824e7fdSDimitry Andric   // Extract resulting jump counts
8514824e7fdSDimitry Andric   for (auto &Jump : Func.Jumps) {
8524824e7fdSDimitry Andric     uint64_t Src = Jump.Source;
8534824e7fdSDimitry Andric     uint64_t Dst = Jump.Target;
8544824e7fdSDimitry Andric     int64_t Flow = 0;
8554824e7fdSDimitry Andric     if (Src != Dst) {
8564824e7fdSDimitry Andric       uint64_t SrcOut = 3 * Src + 1;
8574824e7fdSDimitry Andric       uint64_t DstIn = 3 * Dst;
8584824e7fdSDimitry Andric       Flow = Network.getFlow(SrcOut, DstIn);
8594824e7fdSDimitry Andric     } else {
8604824e7fdSDimitry Andric       uint64_t SrcOut = 3 * Src + 1;
8614824e7fdSDimitry Andric       uint64_t SrcAux = 3 * Src + 2;
8624824e7fdSDimitry Andric       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
8634824e7fdSDimitry Andric       if (AuxFlow > 0)
8644824e7fdSDimitry Andric         Flow = AuxFlow;
8654824e7fdSDimitry Andric     }
8664824e7fdSDimitry Andric     Jump.Flow = Flow;
8674824e7fdSDimitry Andric     assert(Flow >= 0 && "negative jump flow");
8684824e7fdSDimitry Andric   }
8694824e7fdSDimitry Andric }
8704824e7fdSDimitry Andric 
8714824e7fdSDimitry Andric #ifndef NDEBUG
8724824e7fdSDimitry Andric /// Verify that the computed flow values satisfy flow conservation rules
8734824e7fdSDimitry Andric void verifyWeights(const FlowFunction &Func) {
8744824e7fdSDimitry Andric   const uint64_t NumBlocks = Func.Blocks.size();
8754824e7fdSDimitry Andric   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
8764824e7fdSDimitry Andric   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
8774824e7fdSDimitry Andric   for (auto &Jump : Func.Jumps) {
8784824e7fdSDimitry Andric     InFlow[Jump.Target] += Jump.Flow;
8794824e7fdSDimitry Andric     OutFlow[Jump.Source] += Jump.Flow;
8804824e7fdSDimitry Andric   }
8814824e7fdSDimitry Andric 
8824824e7fdSDimitry Andric   uint64_t TotalInFlow = 0;
8834824e7fdSDimitry Andric   uint64_t TotalOutFlow = 0;
8844824e7fdSDimitry Andric   for (uint64_t I = 0; I < NumBlocks; I++) {
8854824e7fdSDimitry Andric     auto &Block = Func.Blocks[I];
8864824e7fdSDimitry Andric     if (Block.isEntry()) {
8874824e7fdSDimitry Andric       TotalInFlow += Block.Flow;
8884824e7fdSDimitry Andric       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
8894824e7fdSDimitry Andric     } else if (Block.isExit()) {
8904824e7fdSDimitry Andric       TotalOutFlow += Block.Flow;
8914824e7fdSDimitry Andric       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
8924824e7fdSDimitry Andric     } else {
8934824e7fdSDimitry Andric       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
8944824e7fdSDimitry Andric       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
8954824e7fdSDimitry Andric     }
8964824e7fdSDimitry Andric   }
8974824e7fdSDimitry Andric   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
8980eae32dcSDimitry Andric 
8990eae32dcSDimitry Andric   // Verify that there are no isolated flow components
9000eae32dcSDimitry Andric   // One could modify FlowFunction to hold edges indexed by the sources, which
9010eae32dcSDimitry Andric   // will avoid a creation of the object
9020eae32dcSDimitry Andric   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
9030eae32dcSDimitry Andric   for (auto &Jump : Func.Jumps) {
9040eae32dcSDimitry Andric     if (Jump.Flow > 0) {
9050eae32dcSDimitry Andric       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
9060eae32dcSDimitry Andric     }
9070eae32dcSDimitry Andric   }
9080eae32dcSDimitry Andric 
9090eae32dcSDimitry Andric   // Run BFS from the source along edges with positive flow
9100eae32dcSDimitry Andric   std::queue<uint64_t> Queue;
911*04eeddc0SDimitry Andric   auto Visited = BitVector(NumBlocks, false);
9120eae32dcSDimitry Andric   Queue.push(Func.Entry);
9130eae32dcSDimitry Andric   Visited[Func.Entry] = true;
9140eae32dcSDimitry Andric   while (!Queue.empty()) {
9150eae32dcSDimitry Andric     uint64_t Src = Queue.front();
9160eae32dcSDimitry Andric     Queue.pop();
9170eae32dcSDimitry Andric     for (uint64_t Dst : PositiveFlowEdges[Src]) {
9180eae32dcSDimitry Andric       if (!Visited[Dst]) {
9190eae32dcSDimitry Andric         Queue.push(Dst);
9200eae32dcSDimitry Andric         Visited[Dst] = true;
9210eae32dcSDimitry Andric       }
9220eae32dcSDimitry Andric     }
9230eae32dcSDimitry Andric   }
9240eae32dcSDimitry Andric 
9250eae32dcSDimitry Andric   // Verify that every block that has a positive flow is reached from the source
9260eae32dcSDimitry Andric   // along edges with a positive flow
9270eae32dcSDimitry Andric   for (uint64_t I = 0; I < NumBlocks; I++) {
9280eae32dcSDimitry Andric     auto &Block = Func.Blocks[I];
9290eae32dcSDimitry Andric     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
9300eae32dcSDimitry Andric   }
9314824e7fdSDimitry Andric }
9324824e7fdSDimitry Andric #endif
9334824e7fdSDimitry Andric 
9344824e7fdSDimitry Andric } // end of anonymous namespace
9354824e7fdSDimitry Andric 
9364824e7fdSDimitry Andric /// Apply the profile inference algorithm for a given flow function
9374824e7fdSDimitry Andric void llvm::applyFlowInference(FlowFunction &Func) {
9384824e7fdSDimitry Andric   // Create and apply an inference network model
9394824e7fdSDimitry Andric   auto InferenceNetwork = MinCostMaxFlow();
9404824e7fdSDimitry Andric   initializeNetwork(InferenceNetwork, Func);
9414824e7fdSDimitry Andric   InferenceNetwork.run();
9424824e7fdSDimitry Andric 
9434824e7fdSDimitry Andric   // Extract flow values for every block and every edge
9444824e7fdSDimitry Andric   extractWeights(InferenceNetwork, Func);
9454824e7fdSDimitry Andric 
9460eae32dcSDimitry Andric   // Post-processing adjustments to the flow
9470eae32dcSDimitry Andric   auto Adjuster = FlowAdjuster(Func);
9480eae32dcSDimitry Andric   Adjuster.run();
9490eae32dcSDimitry Andric 
9504824e7fdSDimitry Andric #ifndef NDEBUG
9514824e7fdSDimitry Andric   // Verify the result
9524824e7fdSDimitry Andric   verifyWeights(Func);
9534824e7fdSDimitry Andric #endif
9544824e7fdSDimitry Andric }
955