1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a profile inference algorithm. Given an incomplete and 10 // possibly imprecise block counts, the algorithm reconstructs realistic block 11 // and edge counts that satisfy flow conservation rules, while minimally modify 12 // input block counts. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Transforms/Utils/SampleProfileInference.h" 17 #include "llvm/ADT/BitVector.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/Debug.h" 20 #include <queue> 21 #include <set> 22 #include <stack> 23 24 using namespace llvm; 25 #define DEBUG_TYPE "sample-profile-inference" 26 27 namespace { 28 29 static cl::opt<bool> SampleProfileEvenCountDistribution( 30 "sample-profile-even-count-distribution", cl::init(true), cl::Hidden, 31 cl::ZeroOrMore, 32 cl::desc("Try to evenly distribute counts when there are multiple equally " 33 "likely options.")); 34 35 static cl::opt<unsigned> SampleProfileMaxDfsCalls( 36 "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden, 37 cl::desc("Maximum number of dfs iterations for even count distribution.")); 38 39 static cl::opt<unsigned> SampleProfileProfiCostInc( 40 "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden, 41 cl::desc("A cost of increasing a block's count by one.")); 42 43 static cl::opt<unsigned> SampleProfileProfiCostDec( 44 "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden, 45 cl::desc("A cost of decreasing a block's count by one.")); 46 47 static cl::opt<unsigned> SampleProfileProfiCostIncZero( 48 "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden, 49 cl::ZeroOrMore, 50 cl::desc("A cost of increasing a count of zero-weight block by one.")); 51 52 static cl::opt<unsigned> SampleProfileProfiCostIncEntry( 53 "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden, 54 cl::ZeroOrMore, 55 cl::desc("A cost of increasing the entry block's count by one.")); 56 57 static cl::opt<unsigned> SampleProfileProfiCostDecEntry( 58 "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden, 59 cl::ZeroOrMore, 60 cl::desc("A cost of decreasing the entry block's count by one.")); 61 62 /// A value indicating an infinite flow/capacity/weight of a block/edge. 63 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 64 /// during the execution. 65 static constexpr int64_t INF = ((int64_t)1) << 50; 66 67 /// The minimum-cost maximum flow algorithm. 68 /// 69 /// The algorithm finds the maximum flow of minimum cost on a given (directed) 70 /// network using a modified version of the classical Moore-Bellman-Ford 71 /// approach. The algorithm applies a number of augmentation iterations in which 72 /// flow is sent along paths of positive capacity from the source to the sink. 73 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 74 /// where m is the number of edges, n is the number of vertices, and v(f) is the 75 /// value of the maximum flow. However, the observed running time on typical 76 /// instances is sub-quadratic, that is, o(n^2). 77 /// 78 /// The input is a set of edges with specified costs and capacities, and a pair 79 /// of nodes (source and sink). The output is the flow along each edge of the 80 /// minimum total cost respecting the given edge capacities. 81 class MinCostMaxFlow { 82 public: 83 // Initialize algorithm's data structures for a network of a given size. 84 void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 85 Source = SourceNode; 86 Target = SinkNode; 87 88 Nodes = std::vector<Node>(NodeCount); 89 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 90 if (SampleProfileEvenCountDistribution) 91 AugmentingEdges = 92 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>()); 93 } 94 95 // Run the algorithm. 96 int64_t run() { 97 // Iteratively find an augmentation path/dag in the network and send the 98 // flow along its edges 99 size_t AugmentationIters = applyFlowAugmentation(); 100 101 // Compute the total flow and its cost 102 int64_t TotalCost = 0; 103 int64_t TotalFlow = 0; 104 for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 105 for (auto &Edge : Edges[Src]) { 106 if (Edge.Flow > 0) { 107 TotalCost += Edge.Cost * Edge.Flow; 108 if (Src == Source) 109 TotalFlow += Edge.Flow; 110 } 111 } 112 } 113 LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 114 << " iterations with " << TotalFlow << " total flow" 115 << " of " << TotalCost << " cost\n"); 116 (void)TotalFlow; 117 (void)AugmentationIters; 118 return TotalCost; 119 } 120 121 /// Adding an edge to the network with a specified capacity and a cost. 122 /// Multiple edges between a pair of nodes are allowed but self-edges 123 /// are not supported. 124 void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 125 assert(Capacity > 0 && "adding an edge of zero capacity"); 126 assert(Src != Dst && "loop edge are not supported"); 127 128 Edge SrcEdge; 129 SrcEdge.Dst = Dst; 130 SrcEdge.Cost = Cost; 131 SrcEdge.Capacity = Capacity; 132 SrcEdge.Flow = 0; 133 SrcEdge.RevEdgeIndex = Edges[Dst].size(); 134 135 Edge DstEdge; 136 DstEdge.Dst = Src; 137 DstEdge.Cost = -Cost; 138 DstEdge.Capacity = 0; 139 DstEdge.Flow = 0; 140 DstEdge.RevEdgeIndex = Edges[Src].size(); 141 142 Edges[Src].push_back(SrcEdge); 143 Edges[Dst].push_back(DstEdge); 144 } 145 146 /// Adding an edge to the network of infinite capacity and a given cost. 147 void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 148 addEdge(Src, Dst, INF, Cost); 149 } 150 151 /// Get the total flow from a given source node. 152 /// Returns a list of pairs (target node, amount of flow to the target). 153 const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 154 std::vector<std::pair<uint64_t, int64_t>> Flow; 155 for (auto &Edge : Edges[Src]) { 156 if (Edge.Flow > 0) 157 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 158 } 159 return Flow; 160 } 161 162 /// Get the total flow between a pair of nodes. 163 int64_t getFlow(uint64_t Src, uint64_t Dst) const { 164 int64_t Flow = 0; 165 for (auto &Edge : Edges[Src]) { 166 if (Edge.Dst == Dst) { 167 Flow += Edge.Flow; 168 } 169 } 170 return Flow; 171 } 172 173 /// A cost of taking an unlikely jump. 174 static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; 175 /// Minimum BaseDistance for the jump distance values in island joining. 176 static constexpr uint64_t MinBaseDistance = 10000; 177 178 private: 179 /// Iteratively find an augmentation path/dag in the network and send the 180 /// flow along its edges. The method returns the number of applied iterations. 181 size_t applyFlowAugmentation() { 182 size_t AugmentationIters = 0; 183 while (findAugmentingPath()) { 184 uint64_t PathCapacity = computeAugmentingPathCapacity(); 185 while (PathCapacity > 0) { 186 bool Progress = false; 187 if (SampleProfileEvenCountDistribution) { 188 // Identify node/edge candidates for augmentation 189 identifyShortestEdges(PathCapacity); 190 191 // Find an augmenting DAG 192 auto AugmentingOrder = findAugmentingDAG(); 193 194 // Apply the DAG augmentation 195 Progress = augmentFlowAlongDAG(AugmentingOrder); 196 PathCapacity = computeAugmentingPathCapacity(); 197 } 198 199 if (!Progress) { 200 augmentFlowAlongPath(PathCapacity); 201 PathCapacity = 0; 202 } 203 204 AugmentationIters++; 205 } 206 } 207 return AugmentationIters; 208 } 209 210 /// Compute the capacity of the cannonical augmenting path. If the path is 211 /// saturated (that is, no flow can be sent along the path), then return 0. 212 uint64_t computeAugmentingPathCapacity() { 213 uint64_t PathCapacity = INF; 214 uint64_t Now = Target; 215 while (Now != Source) { 216 uint64_t Pred = Nodes[Now].ParentNode; 217 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 218 219 assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow"); 220 uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow); 221 PathCapacity = std::min(PathCapacity, EdgeCapacity); 222 223 Now = Pred; 224 } 225 return PathCapacity; 226 } 227 228 /// Check for existence of an augmenting path with a positive capacity. 229 bool findAugmentingPath() { 230 // Initialize data structures 231 for (auto &Node : Nodes) { 232 Node.Distance = INF; 233 Node.ParentNode = uint64_t(-1); 234 Node.ParentEdgeIndex = uint64_t(-1); 235 Node.Taken = false; 236 } 237 238 std::queue<uint64_t> Queue; 239 Queue.push(Source); 240 Nodes[Source].Distance = 0; 241 Nodes[Source].Taken = true; 242 while (!Queue.empty()) { 243 uint64_t Src = Queue.front(); 244 Queue.pop(); 245 Nodes[Src].Taken = false; 246 // Although the residual network contains edges with negative costs 247 // (in particular, backward edges), it can be shown that there are no 248 // negative-weight cycles and the following two invariants are maintained: 249 // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 250 // where Dist is the length of the shortest path between two nodes. This 251 // allows to prune the search-space of the path-finding algorithm using 252 // the following early-stop criteria: 253 // -- If we find a path with zero-distance from Source to Target, stop the 254 // search, as the path is the shortest since Dist[Source, Target] >= 0; 255 // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 256 // process node V, as it is guaranteed _not_ to be on a shortest path 257 // from Source to Target; it follows from inequalities 258 // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 259 // >= Dist[Source, V] 260 if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0) 261 break; 262 if (Nodes[Src].Distance > Nodes[Target].Distance) 263 continue; 264 265 // Process adjacent edges 266 for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 267 auto &Edge = Edges[Src][EdgeIdx]; 268 if (Edge.Flow < Edge.Capacity) { 269 uint64_t Dst = Edge.Dst; 270 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 271 if (Nodes[Dst].Distance > NewDistance) { 272 // Update the distance and the parent node/edge 273 Nodes[Dst].Distance = NewDistance; 274 Nodes[Dst].ParentNode = Src; 275 Nodes[Dst].ParentEdgeIndex = EdgeIdx; 276 // Add the node to the queue, if it is not there yet 277 if (!Nodes[Dst].Taken) { 278 Queue.push(Dst); 279 Nodes[Dst].Taken = true; 280 } 281 } 282 } 283 } 284 } 285 286 return Nodes[Target].Distance != INF; 287 } 288 289 /// Update the current flow along the augmenting path. 290 void augmentFlowAlongPath(uint64_t PathCapacity) { 291 assert(PathCapacity > 0 && "found an incorrect augmenting path"); 292 uint64_t Now = Target; 293 while (Now != Source) { 294 uint64_t Pred = Nodes[Now].ParentNode; 295 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 296 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 297 298 Edge.Flow += PathCapacity; 299 RevEdge.Flow -= PathCapacity; 300 301 Now = Pred; 302 } 303 } 304 305 /// Find an Augmenting DAG order using a modified version of DFS in which we 306 /// can visit a node multiple times. In the DFS search, when scanning each 307 /// edge out of a node, continue search at Edge.Dst endpoint if it has not 308 /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm 309 /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time. 310 /// It returns an Augmenting Order (Taken nodes in decreasing Finish time) 311 /// that starts with Source and ends with Target. 312 std::vector<uint64_t> findAugmentingDAG() { 313 // We use a stack based implemenation of DFS to avoid recursion. 314 // Defining DFS data structures: 315 // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that 316 // - we are currently visiting Nodes[NodeIdx] and 317 // - the next edge to scan is Edges[NodeIdx][EdgeIdx] 318 typedef std::pair<uint64_t, uint64_t> StackItemType; 319 std::stack<StackItemType> Stack; 320 std::vector<uint64_t> AugmentingOrder; 321 322 // Phase 0: Initialize Node attributes and Time for DFS run 323 for (auto &Node : Nodes) { 324 Node.Discovery = 0; 325 Node.Finish = 0; 326 Node.NumCalls = 0; 327 Node.Taken = false; 328 } 329 uint64_t Time = 0; 330 // Mark Target as Taken 331 // Taken attribute will be propagated backwards from Target towards Source 332 Nodes[Target].Taken = true; 333 334 // Phase 1: Start DFS traversal from Source 335 Stack.emplace(Source, 0); 336 Nodes[Source].Discovery = ++Time; 337 while (!Stack.empty()) { 338 auto NodeIdx = Stack.top().first; 339 auto EdgeIdx = Stack.top().second; 340 341 // If we haven't scanned all edges out of NodeIdx, continue scanning 342 if (EdgeIdx < Edges[NodeIdx].size()) { 343 auto &Edge = Edges[NodeIdx][EdgeIdx]; 344 auto &Dst = Nodes[Edge.Dst]; 345 Stack.top().second++; 346 347 if (Edge.OnShortestPath) { 348 // If we haven't seen Edge.Dst so far, continue DFS search there 349 if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) { 350 Dst.Discovery = ++Time; 351 Stack.emplace(Edge.Dst, 0); 352 Dst.NumCalls++; 353 } else if (Dst.Taken && Dst.Finish != 0) { 354 // Else, if Edge.Dst already have a path to Target, so that NodeIdx 355 Nodes[NodeIdx].Taken = true; 356 } 357 } 358 } else { 359 // If we are done scanning all edge out of NodeIdx 360 Stack.pop(); 361 // If we haven't found a path from NodeIdx to Target, forget about it 362 if (!Nodes[NodeIdx].Taken) { 363 Nodes[NodeIdx].Discovery = 0; 364 } else { 365 // If we have found a path from NodeIdx to Target, then finish NodeIdx 366 // and propagate Taken flag to DFS parent unless at the Source 367 Nodes[NodeIdx].Finish = ++Time; 368 // NodeIdx == Source if and only if the stack is empty 369 if (NodeIdx != Source) { 370 assert(!Stack.empty() && "empty stack while running dfs"); 371 Nodes[Stack.top().first].Taken = true; 372 } 373 AugmentingOrder.push_back(NodeIdx); 374 } 375 } 376 } 377 // Nodes are collected decreasing Finish time, so the order is reversed 378 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end()); 379 380 // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges 381 for (size_t Src : AugmentingOrder) { 382 AugmentingEdges[Src].clear(); 383 for (auto &Edge : Edges[Src]) { 384 uint64_t Dst = Edge.Dst; 385 if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken && 386 Nodes[Dst].Finish < Nodes[Src].Finish) { 387 AugmentingEdges[Src].push_back(&Edge); 388 } 389 } 390 assert((Src == Target || !AugmentingEdges[Src].empty()) && 391 "incorrectly constructed augmenting edges"); 392 } 393 394 return AugmentingOrder; 395 } 396 397 /// Update the current flow along the given (acyclic) subgraph specified by 398 /// the vertex order, AugmentingOrder. The objective is to send as much flow 399 /// as possible while evenly distributing flow among successors of each node. 400 /// After the update at least one edge is saturated. 401 bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) { 402 // Phase 0: Initialization 403 for (uint64_t Src : AugmentingOrder) { 404 Nodes[Src].FracFlow = 0; 405 Nodes[Src].IntFlow = 0; 406 for (auto &Edge : AugmentingEdges[Src]) { 407 Edge->AugmentedFlow = 0; 408 } 409 } 410 411 // Phase 1: Send a unit of fractional flow along the DAG 412 uint64_t MaxFlowAmount = INF; 413 Nodes[Source].FracFlow = 1.0; 414 for (uint64_t Src : AugmentingOrder) { 415 assert((Src == Target || Nodes[Src].FracFlow > 0.0) && 416 "incorrectly computed fractional flow"); 417 // Distribute flow evenly among successors of Src 418 uint64_t Degree = AugmentingEdges[Src].size(); 419 for (auto &Edge : AugmentingEdges[Src]) { 420 double EdgeFlow = Nodes[Src].FracFlow / Degree; 421 Nodes[Edge->Dst].FracFlow += EdgeFlow; 422 if (Edge->Capacity == INF) 423 continue; 424 uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow; 425 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow); 426 } 427 } 428 // Stop early if we cannot send any (integral) flow from Source to Target 429 if (MaxFlowAmount == 0) 430 return false; 431 432 // Phase 2: Send an integral flow of MaxFlowAmount 433 Nodes[Source].IntFlow = MaxFlowAmount; 434 for (uint64_t Src : AugmentingOrder) { 435 if (Src == Target) 436 break; 437 // Distribute flow evenly among successors of Src, rounding up to make 438 // sure all flow is sent 439 uint64_t Degree = AugmentingEdges[Src].size(); 440 // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree 441 uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree; 442 for (auto &Edge : AugmentingEdges[Src]) { 443 uint64_t Dst = Edge->Dst; 444 uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow); 445 EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow)); 446 Nodes[Dst].IntFlow += EdgeFlow; 447 Nodes[Src].IntFlow -= EdgeFlow; 448 Edge->AugmentedFlow += EdgeFlow; 449 } 450 } 451 assert(Nodes[Target].IntFlow <= MaxFlowAmount); 452 Nodes[Target].IntFlow = 0; 453 454 // Phase 3: Send excess flow back traversing the nodes backwards. 455 // Because of rounding, not all flow can be sent along the edges of Src. 456 // Hence, sending the remaining flow back to maintain flow conservation 457 for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) { 458 uint64_t Src = AugmentingOrder[Idx - 1]; 459 // Try to send excess flow back along each edge. 460 // Make sure we only send back flow we just augmented (AugmentedFlow). 461 for (auto &Edge : AugmentingEdges[Src]) { 462 uint64_t Dst = Edge->Dst; 463 if (Nodes[Dst].IntFlow == 0) 464 continue; 465 uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow); 466 Nodes[Dst].IntFlow -= EdgeFlow; 467 Nodes[Src].IntFlow += EdgeFlow; 468 Edge->AugmentedFlow -= EdgeFlow; 469 } 470 } 471 472 // Phase 4: Update flow values along all edges 473 bool HasSaturatedEdges = false; 474 for (uint64_t Src : AugmentingOrder) { 475 // Verify that we have sent all the excess flow from the node 476 assert(Src == Source || Nodes[Src].IntFlow == 0); 477 for (auto &Edge : AugmentingEdges[Src]) { 478 assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow); 479 // Update flow values along the edge and its reverse copy 480 auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex]; 481 Edge->Flow += Edge->AugmentedFlow; 482 RevEdge.Flow -= Edge->AugmentedFlow; 483 if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0) 484 HasSaturatedEdges = true; 485 } 486 } 487 488 // The augmentation is successful iff at least one edge becomes saturated 489 return HasSaturatedEdges; 490 } 491 492 /// Identify candidate (shortest) edges for augmentation. 493 void identifyShortestEdges(uint64_t PathCapacity) { 494 assert(PathCapacity > 0 && "found an incorrect augmenting DAG"); 495 // To make sure the augmentation DAG contains only edges with large residual 496 // capacity, we prune all edges whose capacity is below a fraction of 497 // the capacity of the augmented path. 498 // (All edges of the path itself are always in the DAG) 499 uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1)); 500 501 // Decide which edges are on a shortest path from Source to Target 502 for (size_t Src = 0; Src < Nodes.size(); Src++) { 503 // An edge cannot be augmenting if the endpoint has large distance 504 if (Nodes[Src].Distance > Nodes[Target].Distance) 505 continue; 506 507 for (auto &Edge : Edges[Src]) { 508 uint64_t Dst = Edge.Dst; 509 Edge.OnShortestPath = 510 Src != Target && Dst != Source && 511 Nodes[Dst].Distance <= Nodes[Target].Distance && 512 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost && 513 Edge.Capacity > Edge.Flow && 514 uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity; 515 } 516 } 517 } 518 519 /// A node in a flow network. 520 struct Node { 521 /// The cost of the cheapest path from the source to the current node. 522 int64_t Distance; 523 /// The node preceding the current one in the path. 524 uint64_t ParentNode; 525 /// The index of the edge between ParentNode and the current node. 526 uint64_t ParentEdgeIndex; 527 /// An indicator of whether the current node is in a queue. 528 bool Taken; 529 530 /// Data fields utilized in DAG-augmentation: 531 /// Fractional flow. 532 double FracFlow; 533 /// Integral flow. 534 uint64_t IntFlow; 535 /// Discovery time. 536 uint64_t Discovery; 537 /// Finish time. 538 uint64_t Finish; 539 /// NumCalls. 540 uint64_t NumCalls; 541 }; 542 543 /// An edge in a flow network. 544 struct Edge { 545 /// The cost of the edge. 546 int64_t Cost; 547 /// The capacity of the edge. 548 int64_t Capacity; 549 /// The current flow on the edge. 550 int64_t Flow; 551 /// The destination node of the edge. 552 uint64_t Dst; 553 /// The index of the reverse edge between Dst and the current node. 554 uint64_t RevEdgeIndex; 555 556 /// Data fields utilized in DAG-augmentation: 557 /// Whether the edge is currently on a shortest path from Source to Target. 558 bool OnShortestPath; 559 /// Extra flow along the edge. 560 uint64_t AugmentedFlow; 561 }; 562 563 /// The set of network nodes. 564 std::vector<Node> Nodes; 565 /// The set of network edges. 566 std::vector<std::vector<Edge>> Edges; 567 /// Source node of the flow. 568 uint64_t Source; 569 /// Target (sink) node of the flow. 570 uint64_t Target; 571 /// Augmenting edges. 572 std::vector<std::vector<Edge *>> AugmentingEdges; 573 }; 574 575 constexpr int64_t MinCostMaxFlow::AuxCostUnlikely; 576 constexpr uint64_t MinCostMaxFlow::MinBaseDistance; 577 578 /// A post-processing adjustment of control flow. It applies two steps by 579 /// rerouting some flow and making it more realistic: 580 /// 581 /// - First, it removes all isolated components ("islands") with a positive flow 582 /// that are unreachable from the entry block. For every such component, we 583 /// find the shortest from the entry to an exit passing through the component, 584 /// and increase the flow by one unit along the path. 585 /// 586 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks 587 /// with no sampled counts. Then it rebalnces the flow that goes through such 588 /// a subgraph so that each branch is taken with probability 50%. 589 /// An unknown subgraph is such that for every two nodes u and v: 590 /// - u dominates v and u is not unknown; 591 /// - v post-dominates u; and 592 /// - all inner-nodes of all (u,v)-paths are unknown. 593 /// 594 class FlowAdjuster { 595 public: 596 FlowAdjuster(FlowFunction &Func) : Func(Func) { 597 assert(Func.Blocks[Func.Entry].isEntry() && 598 "incorrect index of the entry block"); 599 } 600 601 // Run the post-processing 602 void run() { 603 /// Adjust the flow to get rid of isolated components. 604 joinIsolatedComponents(); 605 606 /// Rebalance the flow inside unknown subgraphs. 607 rebalanceUnknownSubgraphs(); 608 } 609 610 private: 611 void joinIsolatedComponents() { 612 // Find blocks that are reachable from the source 613 auto Visited = BitVector(NumBlocks(), false); 614 findReachable(Func.Entry, Visited); 615 616 // Iterate over all non-reachable blocks and adjust their weights 617 for (uint64_t I = 0; I < NumBlocks(); I++) { 618 auto &Block = Func.Blocks[I]; 619 if (Block.Flow > 0 && !Visited[I]) { 620 // Find a path from the entry to an exit passing through the block I 621 auto Path = findShortestPath(I); 622 // Increase the flow along the path 623 assert(Path.size() > 0 && Path[0]->Source == Func.Entry && 624 "incorrectly computed path adjusting control flow"); 625 Func.Blocks[Func.Entry].Flow += 1; 626 for (auto &Jump : Path) { 627 Jump->Flow += 1; 628 Func.Blocks[Jump->Target].Flow += 1; 629 // Update reachability 630 findReachable(Jump->Target, Visited); 631 } 632 } 633 } 634 } 635 636 /// Run BFS from a given block along the jumps with a positive flow and mark 637 /// all reachable blocks. 638 void findReachable(uint64_t Src, BitVector &Visited) { 639 if (Visited[Src]) 640 return; 641 std::queue<uint64_t> Queue; 642 Queue.push(Src); 643 Visited[Src] = true; 644 while (!Queue.empty()) { 645 Src = Queue.front(); 646 Queue.pop(); 647 for (auto Jump : Func.Blocks[Src].SuccJumps) { 648 uint64_t Dst = Jump->Target; 649 if (Jump->Flow > 0 && !Visited[Dst]) { 650 Queue.push(Dst); 651 Visited[Dst] = true; 652 } 653 } 654 } 655 } 656 657 /// Find the shortest path from the entry block to an exit block passing 658 /// through a given block. 659 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { 660 // A path from the entry block to BlockIdx 661 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); 662 // A path from BlockIdx to an exit block 663 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); 664 665 // Concatenate the two paths 666 std::vector<FlowJump *> Result; 667 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); 668 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); 669 return Result; 670 } 671 672 /// Apply the Dijkstra algorithm to find the shortest path from a given 673 /// Source to a given Target block. 674 /// If Target == -1, then the path ends at an exit block. 675 std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { 676 // Quit early, if possible 677 if (Source == Target) 678 return std::vector<FlowJump *>(); 679 if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) 680 return std::vector<FlowJump *>(); 681 682 // Initialize data structures 683 auto Distance = std::vector<int64_t>(NumBlocks(), INF); 684 auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); 685 Distance[Source] = 0; 686 std::set<std::pair<uint64_t, uint64_t>> Queue; 687 Queue.insert(std::make_pair(Distance[Source], Source)); 688 689 // Run the Dijkstra algorithm 690 while (!Queue.empty()) { 691 uint64_t Src = Queue.begin()->second; 692 Queue.erase(Queue.begin()); 693 // If we found a solution, quit early 694 if (Src == Target || 695 (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) 696 break; 697 698 for (auto Jump : Func.Blocks[Src].SuccJumps) { 699 uint64_t Dst = Jump->Target; 700 int64_t JumpDist = jumpDistance(Jump); 701 if (Distance[Dst] > Distance[Src] + JumpDist) { 702 Queue.erase(std::make_pair(Distance[Dst], Dst)); 703 704 Distance[Dst] = Distance[Src] + JumpDist; 705 Parent[Dst] = Jump; 706 707 Queue.insert(std::make_pair(Distance[Dst], Dst)); 708 } 709 } 710 } 711 // If Target is not provided, find the closest exit block 712 if (Target == AnyExitBlock) { 713 for (uint64_t I = 0; I < NumBlocks(); I++) { 714 if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { 715 if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { 716 Target = I; 717 } 718 } 719 } 720 } 721 assert(Parent[Target] != nullptr && "a path does not exist"); 722 723 // Extract the constructed path 724 std::vector<FlowJump *> Result; 725 uint64_t Now = Target; 726 while (Now != Source) { 727 assert(Now == Parent[Now]->Target && "incorrect parent jump"); 728 Result.push_back(Parent[Now]); 729 Now = Parent[Now]->Source; 730 } 731 // Reverse the path, since it is extracted from Target to Source 732 std::reverse(Result.begin(), Result.end()); 733 return Result; 734 } 735 736 /// A distance of a path for a given jump. 737 /// In order to incite the path to use blocks/jumps with large positive flow, 738 /// and avoid changing branch probability of outgoing edges drastically, 739 /// set the jump distance so as: 740 /// - to minimize the number of unlikely jumps used and subject to that, 741 /// - to minimize the number of Flow == 0 jumps used and subject to that, 742 /// - minimizes total multiplicative Flow increase for the remaining edges. 743 /// To capture this objective with integer distances, we round off fractional 744 /// parts to a multiple of 1 / BaseDistance. 745 int64_t jumpDistance(FlowJump *Jump) const { 746 uint64_t BaseDistance = 747 std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance), 748 std::min(Func.Blocks[Func.Entry].Flow, 749 MinCostMaxFlow::AuxCostUnlikely / NumBlocks())); 750 if (Jump->IsUnlikely) 751 return MinCostMaxFlow::AuxCostUnlikely; 752 if (Jump->Flow > 0) 753 return BaseDistance + BaseDistance / Jump->Flow; 754 return BaseDistance * NumBlocks(); 755 }; 756 757 uint64_t NumBlocks() const { return Func.Blocks.size(); } 758 759 /// Rebalance unknown subgraphs so that the flow is split evenly across the 760 /// outgoing branches of every block of the subgraph. The method iterates over 761 /// blocks with known weight and identifies unknown subgraphs rooted at the 762 /// blocks. Then it verifies if flow rebalancing is feasible and applies it. 763 void rebalanceUnknownSubgraphs() { 764 // Try to find unknown subgraphs from each block 765 for (uint64_t I = 0; I < Func.Blocks.size(); I++) { 766 auto SrcBlock = &Func.Blocks[I]; 767 // Verify if rebalancing rooted at SrcBlock is feasible 768 if (!canRebalanceAtRoot(SrcBlock)) 769 continue; 770 771 // Find an unknown subgraphs starting at SrcBlock. Along the way, 772 // fill in known destinations and intermediate unknown blocks. 773 std::vector<FlowBlock *> UnknownBlocks; 774 std::vector<FlowBlock *> KnownDstBlocks; 775 findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks); 776 777 // Verify if rebalancing of the subgraph is feasible. If the search is 778 // successful, find the unique destination block (which can be null) 779 FlowBlock *DstBlock = nullptr; 780 if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks, 781 DstBlock)) 782 continue; 783 784 // We cannot rebalance subgraphs containing cycles among unknown blocks 785 if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks)) 786 continue; 787 788 // Rebalance the flow 789 rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks); 790 } 791 } 792 793 /// Verify if rebalancing rooted at a given block is possible. 794 bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { 795 // Do not attempt to find unknown subgraphs from an unknown or a 796 // zero-flow block 797 if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) 798 return false; 799 800 // Do not attempt to process subgraphs from a block w/o unknown sucessors 801 bool HasUnknownSuccs = false; 802 for (auto Jump : SrcBlock->SuccJumps) { 803 if (Func.Blocks[Jump->Target].UnknownWeight) { 804 HasUnknownSuccs = true; 805 break; 806 } 807 } 808 if (!HasUnknownSuccs) 809 return false; 810 811 return true; 812 } 813 814 /// Find an unknown subgraph starting at block SrcBlock. The method sets 815 /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. 816 void findUnknownSubgraph(const FlowBlock *SrcBlock, 817 std::vector<FlowBlock *> &KnownDstBlocks, 818 std::vector<FlowBlock *> &UnknownBlocks) { 819 // Run BFS from SrcBlock and make sure all paths are going through unknown 820 // blocks and end at a known DstBlock 821 auto Visited = BitVector(NumBlocks(), false); 822 std::queue<uint64_t> Queue; 823 824 Queue.push(SrcBlock->Index); 825 Visited[SrcBlock->Index] = true; 826 while (!Queue.empty()) { 827 auto &Block = Func.Blocks[Queue.front()]; 828 Queue.pop(); 829 // Process blocks reachable from Block 830 for (auto Jump : Block.SuccJumps) { 831 // If Jump can be ignored, skip it 832 if (ignoreJump(SrcBlock, nullptr, Jump)) 833 continue; 834 835 uint64_t Dst = Jump->Target; 836 // If Dst has been visited, skip Jump 837 if (Visited[Dst]) 838 continue; 839 // Process block Dst 840 Visited[Dst] = true; 841 if (!Func.Blocks[Dst].UnknownWeight) { 842 KnownDstBlocks.push_back(&Func.Blocks[Dst]); 843 } else { 844 Queue.push(Dst); 845 UnknownBlocks.push_back(&Func.Blocks[Dst]); 846 } 847 } 848 } 849 } 850 851 /// Verify if rebalancing of the subgraph is feasible. If the checks are 852 /// successful, set the unique destination block, DstBlock (can be null). 853 bool canRebalanceSubgraph(const FlowBlock *SrcBlock, 854 const std::vector<FlowBlock *> &KnownDstBlocks, 855 const std::vector<FlowBlock *> &UnknownBlocks, 856 FlowBlock *&DstBlock) { 857 // If the list of unknown blocks is empty, we don't need rebalancing 858 if (UnknownBlocks.empty()) 859 return false; 860 861 // If there are multiple known sinks, we can't rebalance 862 if (KnownDstBlocks.size() > 1) 863 return false; 864 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); 865 866 // Verify sinks of the subgraph 867 for (auto Block : UnknownBlocks) { 868 if (Block->SuccJumps.empty()) { 869 // If there are multiple (known and unknown) sinks, we can't rebalance 870 if (DstBlock != nullptr) 871 return false; 872 continue; 873 } 874 size_t NumIgnoredJumps = 0; 875 for (auto Jump : Block->SuccJumps) { 876 if (ignoreJump(SrcBlock, DstBlock, Jump)) 877 NumIgnoredJumps++; 878 } 879 // If there is a non-sink block in UnknownBlocks with all jumps ignored, 880 // then we can't rebalance 881 if (NumIgnoredJumps == Block->SuccJumps.size()) 882 return false; 883 } 884 885 return true; 886 } 887 888 /// Decide whether the Jump is ignored while processing an unknown subgraphs 889 /// rooted at basic block SrcBlock with the destination block, DstBlock. 890 bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 891 const FlowJump *Jump) { 892 // Ignore unlikely jumps with zero flow 893 if (Jump->IsUnlikely && Jump->Flow == 0) 894 return true; 895 896 auto JumpSource = &Func.Blocks[Jump->Source]; 897 auto JumpTarget = &Func.Blocks[Jump->Target]; 898 899 // Do not ignore jumps coming into DstBlock 900 if (DstBlock != nullptr && JumpTarget == DstBlock) 901 return false; 902 903 // Ignore jumps out of SrcBlock to known blocks 904 if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) 905 return true; 906 907 // Ignore jumps to known blocks with zero flow 908 if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) 909 return true; 910 911 return false; 912 } 913 914 /// Verify if the given unknown subgraph is acyclic, and if yes, reorder 915 /// UnknownBlocks in the topological order (so that all jumps are "forward"). 916 bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 917 std::vector<FlowBlock *> &UnknownBlocks) { 918 // Extract local in-degrees in the considered subgraph 919 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); 920 auto fillInDegree = [&](const FlowBlock *Block) { 921 for (auto Jump : Block->SuccJumps) { 922 if (ignoreJump(SrcBlock, DstBlock, Jump)) 923 continue; 924 LocalInDegree[Jump->Target]++; 925 } 926 }; 927 fillInDegree(SrcBlock); 928 for (auto Block : UnknownBlocks) { 929 fillInDegree(Block); 930 } 931 // A loop containing SrcBlock 932 if (LocalInDegree[SrcBlock->Index] > 0) 933 return false; 934 935 std::vector<FlowBlock *> AcyclicOrder; 936 std::queue<uint64_t> Queue; 937 Queue.push(SrcBlock->Index); 938 while (!Queue.empty()) { 939 FlowBlock *Block = &Func.Blocks[Queue.front()]; 940 Queue.pop(); 941 // Stop propagation once we reach DstBlock, if any 942 if (DstBlock != nullptr && Block == DstBlock) 943 break; 944 945 // Keep an acyclic order of unknown blocks 946 if (Block->UnknownWeight && Block != SrcBlock) 947 AcyclicOrder.push_back(Block); 948 949 // Add to the queue all successors with zero local in-degree 950 for (auto Jump : Block->SuccJumps) { 951 if (ignoreJump(SrcBlock, DstBlock, Jump)) 952 continue; 953 uint64_t Dst = Jump->Target; 954 LocalInDegree[Dst]--; 955 if (LocalInDegree[Dst] == 0) { 956 Queue.push(Dst); 957 } 958 } 959 } 960 961 // If there is a cycle in the subgraph, AcyclicOrder contains only a subset 962 // of all blocks 963 if (UnknownBlocks.size() != AcyclicOrder.size()) 964 return false; 965 UnknownBlocks = AcyclicOrder; 966 return true; 967 } 968 969 /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and 970 /// having UnknownBlocks intermediate blocks. 971 void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, 972 const FlowBlock *DstBlock, 973 const std::vector<FlowBlock *> &UnknownBlocks) { 974 assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); 975 976 // Ditribute flow from the source block 977 uint64_t BlockFlow = 0; 978 // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps 979 for (auto Jump : SrcBlock->SuccJumps) { 980 if (ignoreJump(SrcBlock, DstBlock, Jump)) 981 continue; 982 BlockFlow += Jump->Flow; 983 } 984 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); 985 986 // Ditribute flow from the remaining blocks 987 for (auto Block : UnknownBlocks) { 988 assert(Block->UnknownWeight && "incorrect unknown subgraph"); 989 uint64_t BlockFlow = 0; 990 // Block's flow is the sum of incoming flows 991 for (auto Jump : Block->PredJumps) { 992 BlockFlow += Jump->Flow; 993 } 994 Block->Flow = BlockFlow; 995 rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); 996 } 997 } 998 999 /// Redistribute flow for a block in a subgraph rooted at SrcBlock, 1000 /// and ending at DstBlock. 1001 void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 1002 const FlowBlock *Block, uint64_t BlockFlow) { 1003 // Process all successor jumps and update corresponding flow values 1004 size_t BlockDegree = 0; 1005 for (auto Jump : Block->SuccJumps) { 1006 if (ignoreJump(SrcBlock, DstBlock, Jump)) 1007 continue; 1008 BlockDegree++; 1009 } 1010 // If all successor jumps of the block are ignored, skip it 1011 if (DstBlock == nullptr && BlockDegree == 0) 1012 return; 1013 assert(BlockDegree > 0 && "all outgoing jumps are ignored"); 1014 1015 // Each of the Block's successors gets the following amount of flow. 1016 // Rounding the value up so that all flow is propagated 1017 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; 1018 for (auto Jump : Block->SuccJumps) { 1019 if (ignoreJump(SrcBlock, DstBlock, Jump)) 1020 continue; 1021 uint64_t Flow = std::min(SuccFlow, BlockFlow); 1022 Jump->Flow = Flow; 1023 BlockFlow -= Flow; 1024 } 1025 assert(BlockFlow == 0 && "not all flow is propagated"); 1026 } 1027 1028 /// A constant indicating an arbitrary exit block of a function. 1029 static constexpr uint64_t AnyExitBlock = uint64_t(-1); 1030 1031 /// The function. 1032 FlowFunction &Func; 1033 }; 1034 1035 /// Initializing flow network for a given function. 1036 /// 1037 /// Every block is split into three nodes that are responsible for (i) an 1038 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 1039 /// reduction of the block weight. 1040 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 1041 uint64_t NumBlocks = Func.Blocks.size(); 1042 assert(NumBlocks > 1 && "Too few blocks in a function"); 1043 LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 1044 1045 // Pre-process data: make sure the entry weight is at least 1 1046 if (Func.Blocks[Func.Entry].Weight == 0) { 1047 Func.Blocks[Func.Entry].Weight = 1; 1048 } 1049 // Introducing dummy source/sink pairs to allow flow circulation. 1050 // The nodes corresponding to blocks of Func have indicies in the range 1051 // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 1052 uint64_t S = 3 * NumBlocks; 1053 uint64_t T = S + 1; 1054 uint64_t S1 = S + 2; 1055 uint64_t T1 = S + 3; 1056 1057 Network.initialize(3 * NumBlocks + 4, S1, T1); 1058 1059 // Create three nodes for every block of the function 1060 for (uint64_t B = 0; B < NumBlocks; B++) { 1061 auto &Block = Func.Blocks[B]; 1062 assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 1063 "non-zero weight of a block w/o weight except for an entry"); 1064 1065 // Split every block into two nodes 1066 uint64_t Bin = 3 * B; 1067 uint64_t Bout = 3 * B + 1; 1068 uint64_t Baux = 3 * B + 2; 1069 if (Block.Weight > 0) { 1070 Network.addEdge(S1, Bout, Block.Weight, 0); 1071 Network.addEdge(Bin, T1, Block.Weight, 0); 1072 } 1073 1074 // Edges from S and to T 1075 assert((!Block.isEntry() || !Block.isExit()) && 1076 "a block cannot be an entry and an exit"); 1077 if (Block.isEntry()) { 1078 Network.addEdge(S, Bin, 0); 1079 } else if (Block.isExit()) { 1080 Network.addEdge(Bout, T, 0); 1081 } 1082 1083 // An auxiliary node to allow increase/reduction of block counts: 1084 // We assume that decreasing block counts is more expensive than increasing, 1085 // and thus, setting separate costs here. In the future we may want to tune 1086 // the relative costs so as to maximize the quality of generated profiles. 1087 int64_t AuxCostInc = SampleProfileProfiCostInc; 1088 int64_t AuxCostDec = SampleProfileProfiCostDec; 1089 if (Block.UnknownWeight) { 1090 // Do not penalize changing weights of blocks w/o known profile count 1091 AuxCostInc = 0; 1092 AuxCostDec = 0; 1093 } else { 1094 // Increasing the count for "cold" blocks with zero initial count is more 1095 // expensive than for "hot" ones 1096 if (Block.Weight == 0) { 1097 AuxCostInc = SampleProfileProfiCostIncZero; 1098 } 1099 // Modifying the count of the entry block is expensive 1100 if (Block.isEntry()) { 1101 AuxCostInc = SampleProfileProfiCostIncEntry; 1102 AuxCostDec = SampleProfileProfiCostDecEntry; 1103 } 1104 } 1105 // For blocks with self-edges, do not penalize a reduction of the count, 1106 // as all of the increase can be attributed to the self-edge 1107 if (Block.HasSelfEdge) { 1108 AuxCostDec = 0; 1109 } 1110 1111 Network.addEdge(Bin, Baux, AuxCostInc); 1112 Network.addEdge(Baux, Bout, AuxCostInc); 1113 if (Block.Weight > 0) { 1114 Network.addEdge(Bout, Baux, AuxCostDec); 1115 Network.addEdge(Baux, Bin, AuxCostDec); 1116 } 1117 } 1118 1119 // Creating edges for every jump 1120 for (auto &Jump : Func.Jumps) { 1121 uint64_t Src = Jump.Source; 1122 uint64_t Dst = Jump.Target; 1123 if (Src != Dst) { 1124 uint64_t SrcOut = 3 * Src + 1; 1125 uint64_t DstIn = 3 * Dst; 1126 uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 1127 Network.addEdge(SrcOut, DstIn, Cost); 1128 } 1129 } 1130 1131 // Make sure we have a valid flow circulation 1132 Network.addEdge(T, S, 0); 1133 } 1134 1135 /// Extract resulting block and edge counts from the flow network. 1136 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 1137 uint64_t NumBlocks = Func.Blocks.size(); 1138 1139 // Extract resulting block counts 1140 for (uint64_t Src = 0; Src < NumBlocks; Src++) { 1141 auto &Block = Func.Blocks[Src]; 1142 uint64_t SrcOut = 3 * Src + 1; 1143 int64_t Flow = 0; 1144 for (auto &Adj : Network.getFlow(SrcOut)) { 1145 uint64_t DstIn = Adj.first; 1146 int64_t DstFlow = Adj.second; 1147 bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 1148 if (!IsAuxNode || Block.HasSelfEdge) { 1149 Flow += DstFlow; 1150 } 1151 } 1152 Block.Flow = Flow; 1153 assert(Flow >= 0 && "negative block flow"); 1154 } 1155 1156 // Extract resulting jump counts 1157 for (auto &Jump : Func.Jumps) { 1158 uint64_t Src = Jump.Source; 1159 uint64_t Dst = Jump.Target; 1160 int64_t Flow = 0; 1161 if (Src != Dst) { 1162 uint64_t SrcOut = 3 * Src + 1; 1163 uint64_t DstIn = 3 * Dst; 1164 Flow = Network.getFlow(SrcOut, DstIn); 1165 } else { 1166 uint64_t SrcOut = 3 * Src + 1; 1167 uint64_t SrcAux = 3 * Src + 2; 1168 int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 1169 if (AuxFlow > 0) 1170 Flow = AuxFlow; 1171 } 1172 Jump.Flow = Flow; 1173 assert(Flow >= 0 && "negative jump flow"); 1174 } 1175 } 1176 1177 #ifndef NDEBUG 1178 /// Verify that the computed flow values satisfy flow conservation rules 1179 void verifyWeights(const FlowFunction &Func) { 1180 const uint64_t NumBlocks = Func.Blocks.size(); 1181 auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 1182 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 1183 for (auto &Jump : Func.Jumps) { 1184 InFlow[Jump.Target] += Jump.Flow; 1185 OutFlow[Jump.Source] += Jump.Flow; 1186 } 1187 1188 uint64_t TotalInFlow = 0; 1189 uint64_t TotalOutFlow = 0; 1190 for (uint64_t I = 0; I < NumBlocks; I++) { 1191 auto &Block = Func.Blocks[I]; 1192 if (Block.isEntry()) { 1193 TotalInFlow += Block.Flow; 1194 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 1195 } else if (Block.isExit()) { 1196 TotalOutFlow += Block.Flow; 1197 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 1198 } else { 1199 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 1200 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 1201 } 1202 } 1203 assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 1204 1205 // Verify that there are no isolated flow components 1206 // One could modify FlowFunction to hold edges indexed by the sources, which 1207 // will avoid a creation of the object 1208 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); 1209 for (auto &Jump : Func.Jumps) { 1210 if (Jump.Flow > 0) { 1211 PositiveFlowEdges[Jump.Source].push_back(Jump.Target); 1212 } 1213 } 1214 1215 // Run BFS from the source along edges with positive flow 1216 std::queue<uint64_t> Queue; 1217 auto Visited = BitVector(NumBlocks, false); 1218 Queue.push(Func.Entry); 1219 Visited[Func.Entry] = true; 1220 while (!Queue.empty()) { 1221 uint64_t Src = Queue.front(); 1222 Queue.pop(); 1223 for (uint64_t Dst : PositiveFlowEdges[Src]) { 1224 if (!Visited[Dst]) { 1225 Queue.push(Dst); 1226 Visited[Dst] = true; 1227 } 1228 } 1229 } 1230 1231 // Verify that every block that has a positive flow is reached from the source 1232 // along edges with a positive flow 1233 for (uint64_t I = 0; I < NumBlocks; I++) { 1234 auto &Block = Func.Blocks[I]; 1235 assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); 1236 } 1237 } 1238 #endif 1239 1240 } // end of anonymous namespace 1241 1242 /// Apply the profile inference algorithm for a given flow function 1243 void llvm::applyFlowInference(FlowFunction &Func) { 1244 // Create and apply an inference network model 1245 auto InferenceNetwork = MinCostMaxFlow(); 1246 initializeNetwork(InferenceNetwork, Func); 1247 InferenceNetwork.run(); 1248 1249 // Extract flow values for every block and every edge 1250 extractWeights(InferenceNetwork, Func); 1251 1252 // Post-processing adjustments to the flow 1253 auto Adjuster = FlowAdjuster(Func); 1254 Adjuster.run(); 1255 1256 #ifndef NDEBUG 1257 // Verify the result 1258 verifyWeights(Func); 1259 #endif 1260 } 1261