1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a profile inference algorithm. Given an incomplete and 10 // possibly imprecise block counts, the algorithm reconstructs realistic block 11 // and edge counts that satisfy flow conservation rules, while minimally modify 12 // input block counts. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Transforms/Utils/SampleProfileInference.h" 17 #include "llvm/ADT/BitVector.h" 18 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/Debug.h" 20 #include <queue> 21 #include <set> 22 #include <stack> 23 24 using namespace llvm; 25 #define DEBUG_TYPE "sample-profile-inference" 26 27 namespace { 28 29 static cl::opt<bool> SampleProfileEvenCountDistribution( 30 "sample-profile-even-count-distribution", cl::init(true), cl::Hidden, 31 cl::ZeroOrMore, 32 cl::desc("Try to evenly distribute counts when there are multiple equally " 33 "likely options.")); 34 35 static cl::opt<unsigned> SampleProfileMaxDfsCalls( 36 "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden, cl::ZeroOrMore, 37 cl::desc("Maximum number of dfs iterations for even count distribution.")); 38 39 /// A value indicating an infinite flow/capacity/weight of a block/edge. 40 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 41 /// during the execution. 42 static constexpr int64_t INF = ((int64_t)1) << 50; 43 44 /// The minimum-cost maximum flow algorithm. 45 /// 46 /// The algorithm finds the maximum flow of minimum cost on a given (directed) 47 /// network using a modified version of the classical Moore-Bellman-Ford 48 /// approach. The algorithm applies a number of augmentation iterations in which 49 /// flow is sent along paths of positive capacity from the source to the sink. 50 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 51 /// where m is the number of edges, n is the number of vertices, and v(f) is the 52 /// value of the maximum flow. However, the observed running time on typical 53 /// instances is sub-quadratic, that is, o(n^2). 54 /// 55 /// The input is a set of edges with specified costs and capacities, and a pair 56 /// of nodes (source and sink). The output is the flow along each edge of the 57 /// minimum total cost respecting the given edge capacities. 58 class MinCostMaxFlow { 59 public: 60 // Initialize algorithm's data structures for a network of a given size. 61 void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 62 Source = SourceNode; 63 Target = SinkNode; 64 65 Nodes = std::vector<Node>(NodeCount); 66 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 67 if (SampleProfileEvenCountDistribution) 68 AugmentingEdges = 69 std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>()); 70 } 71 72 // Run the algorithm. 73 int64_t run() { 74 // Iteratively find an augmentation path/dag in the network and send the 75 // flow along its edges 76 size_t AugmentationIters = applyFlowAugmentation(); 77 78 // Compute the total flow and its cost 79 int64_t TotalCost = 0; 80 int64_t TotalFlow = 0; 81 for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 82 for (auto &Edge : Edges[Src]) { 83 if (Edge.Flow > 0) { 84 TotalCost += Edge.Cost * Edge.Flow; 85 if (Src == Source) 86 TotalFlow += Edge.Flow; 87 } 88 } 89 } 90 LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 91 << " iterations with " << TotalFlow << " total flow" 92 << " of " << TotalCost << " cost\n"); 93 (void)TotalFlow; 94 (void)AugmentationIters; 95 return TotalCost; 96 } 97 98 /// Adding an edge to the network with a specified capacity and a cost. 99 /// Multiple edges between a pair of nodes are allowed but self-edges 100 /// are not supported. 101 void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 102 assert(Capacity > 0 && "adding an edge of zero capacity"); 103 assert(Src != Dst && "loop edge are not supported"); 104 105 Edge SrcEdge; 106 SrcEdge.Dst = Dst; 107 SrcEdge.Cost = Cost; 108 SrcEdge.Capacity = Capacity; 109 SrcEdge.Flow = 0; 110 SrcEdge.RevEdgeIndex = Edges[Dst].size(); 111 112 Edge DstEdge; 113 DstEdge.Dst = Src; 114 DstEdge.Cost = -Cost; 115 DstEdge.Capacity = 0; 116 DstEdge.Flow = 0; 117 DstEdge.RevEdgeIndex = Edges[Src].size(); 118 119 Edges[Src].push_back(SrcEdge); 120 Edges[Dst].push_back(DstEdge); 121 } 122 123 /// Adding an edge to the network of infinite capacity and a given cost. 124 void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 125 addEdge(Src, Dst, INF, Cost); 126 } 127 128 /// Get the total flow from a given source node. 129 /// Returns a list of pairs (target node, amount of flow to the target). 130 const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 131 std::vector<std::pair<uint64_t, int64_t>> Flow; 132 for (auto &Edge : Edges[Src]) { 133 if (Edge.Flow > 0) 134 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 135 } 136 return Flow; 137 } 138 139 /// Get the total flow between a pair of nodes. 140 int64_t getFlow(uint64_t Src, uint64_t Dst) const { 141 int64_t Flow = 0; 142 for (auto &Edge : Edges[Src]) { 143 if (Edge.Dst == Dst) { 144 Flow += Edge.Flow; 145 } 146 } 147 return Flow; 148 } 149 150 /// A cost of increasing a block's count by one. 151 static constexpr int64_t AuxCostInc = 10; 152 /// A cost of decreasing a block's count by one. 153 static constexpr int64_t AuxCostDec = 20; 154 /// A cost of increasing a count of zero-weight block by one. 155 static constexpr int64_t AuxCostIncZero = 11; 156 /// A cost of increasing the entry block's count by one. 157 static constexpr int64_t AuxCostIncEntry = 40; 158 /// A cost of decreasing the entry block's count by one. 159 static constexpr int64_t AuxCostDecEntry = 10; 160 /// A cost of taking an unlikely jump. 161 static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; 162 163 private: 164 /// Iteratively find an augmentation path/dag in the network and send the 165 /// flow along its edges. The method returns the number of applied iterations. 166 size_t applyFlowAugmentation() { 167 size_t AugmentationIters = 0; 168 while (findAugmentingPath()) { 169 uint64_t PathCapacity = computeAugmentingPathCapacity(); 170 while (PathCapacity > 0) { 171 bool Progress = false; 172 if (SampleProfileEvenCountDistribution) { 173 // Identify node/edge candidates for augmentation 174 identifyShortestEdges(PathCapacity); 175 176 // Find an augmenting DAG 177 auto AugmentingOrder = findAugmentingDAG(); 178 179 // Apply the DAG augmentation 180 Progress = augmentFlowAlongDAG(AugmentingOrder); 181 PathCapacity = computeAugmentingPathCapacity(); 182 } 183 184 if (!Progress) { 185 augmentFlowAlongPath(PathCapacity); 186 PathCapacity = 0; 187 } 188 189 AugmentationIters++; 190 } 191 } 192 return AugmentationIters; 193 } 194 195 /// Compute the capacity of the cannonical augmenting path. If the path is 196 /// saturated (that is, no flow can be sent along the path), then return 0. 197 uint64_t computeAugmentingPathCapacity() { 198 uint64_t PathCapacity = INF; 199 uint64_t Now = Target; 200 while (Now != Source) { 201 uint64_t Pred = Nodes[Now].ParentNode; 202 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 203 204 assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow"); 205 uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow); 206 PathCapacity = std::min(PathCapacity, EdgeCapacity); 207 208 Now = Pred; 209 } 210 return PathCapacity; 211 } 212 213 /// Check for existence of an augmenting path with a positive capacity. 214 bool findAugmentingPath() { 215 // Initialize data structures 216 for (auto &Node : Nodes) { 217 Node.Distance = INF; 218 Node.ParentNode = uint64_t(-1); 219 Node.ParentEdgeIndex = uint64_t(-1); 220 Node.Taken = false; 221 } 222 223 std::queue<uint64_t> Queue; 224 Queue.push(Source); 225 Nodes[Source].Distance = 0; 226 Nodes[Source].Taken = true; 227 while (!Queue.empty()) { 228 uint64_t Src = Queue.front(); 229 Queue.pop(); 230 Nodes[Src].Taken = false; 231 // Although the residual network contains edges with negative costs 232 // (in particular, backward edges), it can be shown that there are no 233 // negative-weight cycles and the following two invariants are maintained: 234 // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 235 // where Dist is the length of the shortest path between two nodes. This 236 // allows to prune the search-space of the path-finding algorithm using 237 // the following early-stop criteria: 238 // -- If we find a path with zero-distance from Source to Target, stop the 239 // search, as the path is the shortest since Dist[Source, Target] >= 0; 240 // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 241 // process node V, as it is guaranteed _not_ to be on a shortest path 242 // from Source to Target; it follows from inequalities 243 // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 244 // >= Dist[Source, V] 245 if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0) 246 break; 247 if (Nodes[Src].Distance > Nodes[Target].Distance) 248 continue; 249 250 // Process adjacent edges 251 for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 252 auto &Edge = Edges[Src][EdgeIdx]; 253 if (Edge.Flow < Edge.Capacity) { 254 uint64_t Dst = Edge.Dst; 255 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 256 if (Nodes[Dst].Distance > NewDistance) { 257 // Update the distance and the parent node/edge 258 Nodes[Dst].Distance = NewDistance; 259 Nodes[Dst].ParentNode = Src; 260 Nodes[Dst].ParentEdgeIndex = EdgeIdx; 261 // Add the node to the queue, if it is not there yet 262 if (!Nodes[Dst].Taken) { 263 Queue.push(Dst); 264 Nodes[Dst].Taken = true; 265 } 266 } 267 } 268 } 269 } 270 271 return Nodes[Target].Distance != INF; 272 } 273 274 /// Update the current flow along the augmenting path. 275 void augmentFlowAlongPath(uint64_t PathCapacity) { 276 assert(PathCapacity > 0 && "found an incorrect augmenting path"); 277 uint64_t Now = Target; 278 while (Now != Source) { 279 uint64_t Pred = Nodes[Now].ParentNode; 280 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 281 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 282 283 Edge.Flow += PathCapacity; 284 RevEdge.Flow -= PathCapacity; 285 286 Now = Pred; 287 } 288 } 289 290 /// Find an Augmenting DAG order using a modified version of DFS in which we 291 /// can visit a node multiple times. In the DFS search, when scanning each 292 /// edge out of a node, continue search at Edge.Dst endpoint if it has not 293 /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm 294 /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time. 295 /// It returns an Augmenting Order (Taken nodes in decreasing Finish time) 296 /// that starts with Source and ends with Target. 297 std::vector<uint64_t> findAugmentingDAG() { 298 // We use a stack based implemenation of DFS to avoid recursion. 299 // Defining DFS data structures: 300 // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that 301 // - we are currently visiting Nodes[NodeIdx] and 302 // - the next edge to scan is Edges[NodeIdx][EdgeIdx] 303 typedef std::pair<uint64_t, uint64_t> StackItemType; 304 std::stack<StackItemType> Stack; 305 std::vector<uint64_t> AugmentingOrder; 306 307 // Phase 0: Initialize Node attributes and Time for DFS run 308 for (auto &Node : Nodes) { 309 Node.Discovery = 0; 310 Node.Finish = 0; 311 Node.NumCalls = 0; 312 Node.Taken = false; 313 } 314 uint64_t Time = 0; 315 // Mark Target as Taken 316 // Taken attribute will be propagated backwards from Target towards Source 317 Nodes[Target].Taken = true; 318 319 // Phase 1: Start DFS traversal from Source 320 Stack.emplace(Source, 0); 321 Nodes[Source].Discovery = ++Time; 322 while (!Stack.empty()) { 323 auto NodeIdx = Stack.top().first; 324 auto EdgeIdx = Stack.top().second; 325 326 // If we haven't scanned all edges out of NodeIdx, continue scanning 327 if (EdgeIdx < Edges[NodeIdx].size()) { 328 auto &Edge = Edges[NodeIdx][EdgeIdx]; 329 auto &Dst = Nodes[Edge.Dst]; 330 Stack.top().second++; 331 332 if (Edge.OnShortestPath) { 333 // If we haven't seen Edge.Dst so far, continue DFS search there 334 if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) { 335 Dst.Discovery = ++Time; 336 Stack.emplace(Edge.Dst, 0); 337 Dst.NumCalls++; 338 } else if (Dst.Taken && Dst.Finish != 0) { 339 // Else, if Edge.Dst already have a path to Target, so that NodeIdx 340 Nodes[NodeIdx].Taken = true; 341 } 342 } 343 } else { 344 // If we are done scanning all edge out of NodeIdx 345 Stack.pop(); 346 // If we haven't found a path from NodeIdx to Target, forget about it 347 if (!Nodes[NodeIdx].Taken) { 348 Nodes[NodeIdx].Discovery = 0; 349 } else { 350 // If we have found a path from NodeIdx to Target, then finish NodeIdx 351 // and propagate Taken flag to DFS parent unless at the Source 352 Nodes[NodeIdx].Finish = ++Time; 353 // NodeIdx == Source if and only if the stack is empty 354 if (NodeIdx != Source) { 355 assert(!Stack.empty() && "empty stack while running dfs"); 356 Nodes[Stack.top().first].Taken = true; 357 } 358 AugmentingOrder.push_back(NodeIdx); 359 } 360 } 361 } 362 // Nodes are collected decreasing Finish time, so the order is reversed 363 std::reverse(AugmentingOrder.begin(), AugmentingOrder.end()); 364 365 // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges 366 for (size_t Src : AugmentingOrder) { 367 AugmentingEdges[Src].clear(); 368 for (auto &Edge : Edges[Src]) { 369 uint64_t Dst = Edge.Dst; 370 if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken && 371 Nodes[Dst].Finish < Nodes[Src].Finish) { 372 AugmentingEdges[Src].push_back(&Edge); 373 } 374 } 375 assert((Src == Target || !AugmentingEdges[Src].empty()) && 376 "incorrectly constructed augmenting edges"); 377 } 378 379 return AugmentingOrder; 380 } 381 382 /// Update the current flow along the given (acyclic) subgraph specified by 383 /// the vertex order, AugmentingOrder. The objective is to send as much flow 384 /// as possible while evenly distributing flow among successors of each node. 385 /// After the update at least one edge is saturated. 386 bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) { 387 // Phase 0: Initialization 388 for (uint64_t Src : AugmentingOrder) { 389 Nodes[Src].FracFlow = 0; 390 Nodes[Src].IntFlow = 0; 391 for (auto &Edge : AugmentingEdges[Src]) { 392 Edge->AugmentedFlow = 0; 393 } 394 } 395 396 // Phase 1: Send a unit of fractional flow along the DAG 397 uint64_t MaxFlowAmount = INF; 398 Nodes[Source].FracFlow = 1.0; 399 for (uint64_t Src : AugmentingOrder) { 400 assert((Src == Target || Nodes[Src].FracFlow > 0.0) && 401 "incorrectly computed fractional flow"); 402 // Distribute flow evenly among successors of Src 403 uint64_t Degree = AugmentingEdges[Src].size(); 404 for (auto &Edge : AugmentingEdges[Src]) { 405 double EdgeFlow = Nodes[Src].FracFlow / Degree; 406 Nodes[Edge->Dst].FracFlow += EdgeFlow; 407 if (Edge->Capacity == INF) 408 continue; 409 uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow; 410 MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow); 411 } 412 } 413 // Stop early if we cannot send any (integral) flow from Source to Target 414 if (MaxFlowAmount == 0) 415 return false; 416 417 // Phase 2: Send an integral flow of MaxFlowAmount 418 Nodes[Source].IntFlow = MaxFlowAmount; 419 for (uint64_t Src : AugmentingOrder) { 420 if (Src == Target) 421 break; 422 // Distribute flow evenly among successors of Src, rounding up to make 423 // sure all flow is sent 424 uint64_t Degree = AugmentingEdges[Src].size(); 425 // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree 426 uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree; 427 for (auto &Edge : AugmentingEdges[Src]) { 428 uint64_t Dst = Edge->Dst; 429 uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow); 430 EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow)); 431 Nodes[Dst].IntFlow += EdgeFlow; 432 Nodes[Src].IntFlow -= EdgeFlow; 433 Edge->AugmentedFlow += EdgeFlow; 434 } 435 } 436 assert(Nodes[Target].IntFlow <= MaxFlowAmount); 437 Nodes[Target].IntFlow = 0; 438 439 // Phase 3: Send excess flow back traversing the nodes backwards. 440 // Because of rounding, not all flow can be sent along the edges of Src. 441 // Hence, sending the remaining flow back to maintain flow conservation 442 for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) { 443 uint64_t Src = AugmentingOrder[Idx - 1]; 444 // Try to send excess flow back along each edge. 445 // Make sure we only send back flow we just augmented (AugmentedFlow). 446 for (auto &Edge : AugmentingEdges[Src]) { 447 uint64_t Dst = Edge->Dst; 448 if (Nodes[Dst].IntFlow == 0) 449 continue; 450 uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow); 451 Nodes[Dst].IntFlow -= EdgeFlow; 452 Nodes[Src].IntFlow += EdgeFlow; 453 Edge->AugmentedFlow -= EdgeFlow; 454 } 455 } 456 457 // Phase 4: Update flow values along all edges 458 bool HasSaturatedEdges = false; 459 for (uint64_t Src : AugmentingOrder) { 460 // Verify that we have sent all the excess flow from the node 461 assert(Src == Source || Nodes[Src].IntFlow == 0); 462 for (auto &Edge : AugmentingEdges[Src]) { 463 assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow); 464 // Update flow values along the edge and its reverse copy 465 auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex]; 466 Edge->Flow += Edge->AugmentedFlow; 467 RevEdge.Flow -= Edge->AugmentedFlow; 468 if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0) 469 HasSaturatedEdges = true; 470 } 471 } 472 473 // The augmentation is successful iff at least one edge becomes saturated 474 return HasSaturatedEdges; 475 } 476 477 /// Identify candidate (shortest) edges for augmentation. 478 void identifyShortestEdges(uint64_t PathCapacity) { 479 assert(PathCapacity > 0 && "found an incorrect augmenting DAG"); 480 // To make sure the augmentation DAG contains only edges with large residual 481 // capacity, we prune all edges whose capacity is below a fraction of 482 // the capacity of the augmented path. 483 // (All edges of the path itself are always in the DAG) 484 uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1)); 485 486 // Decide which edges are on a shortest path from Source to Target 487 for (size_t Src = 0; Src < Nodes.size(); Src++) { 488 // An edge cannot be augmenting if the endpoint has large distance 489 if (Nodes[Src].Distance > Nodes[Target].Distance) 490 continue; 491 492 for (auto &Edge : Edges[Src]) { 493 uint64_t Dst = Edge.Dst; 494 Edge.OnShortestPath = 495 Src != Target && Dst != Source && 496 Nodes[Dst].Distance <= Nodes[Target].Distance && 497 Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost && 498 Edge.Capacity > Edge.Flow && 499 uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity; 500 } 501 } 502 } 503 504 /// A node in a flow network. 505 struct Node { 506 /// The cost of the cheapest path from the source to the current node. 507 int64_t Distance; 508 /// The node preceding the current one in the path. 509 uint64_t ParentNode; 510 /// The index of the edge between ParentNode and the current node. 511 uint64_t ParentEdgeIndex; 512 /// An indicator of whether the current node is in a queue. 513 bool Taken; 514 515 /// Data fields utilized in DAG-augmentation: 516 /// Fractional flow. 517 double FracFlow; 518 /// Integral flow. 519 uint64_t IntFlow; 520 /// Discovery time. 521 uint64_t Discovery; 522 /// Finish time. 523 uint64_t Finish; 524 /// NumCalls. 525 uint64_t NumCalls; 526 }; 527 528 /// An edge in a flow network. 529 struct Edge { 530 /// The cost of the edge. 531 int64_t Cost; 532 /// The capacity of the edge. 533 int64_t Capacity; 534 /// The current flow on the edge. 535 int64_t Flow; 536 /// The destination node of the edge. 537 uint64_t Dst; 538 /// The index of the reverse edge between Dst and the current node. 539 uint64_t RevEdgeIndex; 540 541 /// Data fields utilized in DAG-augmentation: 542 /// Whether the edge is currently on a shortest path from Source to Target. 543 bool OnShortestPath; 544 /// Extra flow along the edge. 545 uint64_t AugmentedFlow; 546 }; 547 548 /// The set of network nodes. 549 std::vector<Node> Nodes; 550 /// The set of network edges. 551 std::vector<std::vector<Edge>> Edges; 552 /// Source node of the flow. 553 uint64_t Source; 554 /// Target (sink) node of the flow. 555 uint64_t Target; 556 /// Augmenting edges. 557 std::vector<std::vector<Edge *>> AugmentingEdges; 558 }; 559 560 /// A post-processing adjustment of control flow. It applies two steps by 561 /// rerouting some flow and making it more realistic: 562 /// 563 /// - First, it removes all isolated components ("islands") with a positive flow 564 /// that are unreachable from the entry block. For every such component, we 565 /// find the shortest from the entry to an exit passing through the component, 566 /// and increase the flow by one unit along the path. 567 /// 568 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks 569 /// with no sampled counts. Then it rebalnces the flow that goes through such 570 /// a subgraph so that each branch is taken with probability 50%. 571 /// An unknown subgraph is such that for every two nodes u and v: 572 /// - u dominates v and u is not unknown; 573 /// - v post-dominates u; and 574 /// - all inner-nodes of all (u,v)-paths are unknown. 575 /// 576 class FlowAdjuster { 577 public: 578 FlowAdjuster(FlowFunction &Func) : Func(Func) { 579 assert(Func.Blocks[Func.Entry].isEntry() && 580 "incorrect index of the entry block"); 581 } 582 583 // Run the post-processing 584 void run() { 585 /// Adjust the flow to get rid of isolated components. 586 joinIsolatedComponents(); 587 588 /// Rebalance the flow inside unknown subgraphs. 589 rebalanceUnknownSubgraphs(); 590 } 591 592 private: 593 void joinIsolatedComponents() { 594 // Find blocks that are reachable from the source 595 auto Visited = BitVector(NumBlocks(), false); 596 findReachable(Func.Entry, Visited); 597 598 // Iterate over all non-reachable blocks and adjust their weights 599 for (uint64_t I = 0; I < NumBlocks(); I++) { 600 auto &Block = Func.Blocks[I]; 601 if (Block.Flow > 0 && !Visited[I]) { 602 // Find a path from the entry to an exit passing through the block I 603 auto Path = findShortestPath(I); 604 // Increase the flow along the path 605 assert(Path.size() > 0 && Path[0]->Source == Func.Entry && 606 "incorrectly computed path adjusting control flow"); 607 Func.Blocks[Func.Entry].Flow += 1; 608 for (auto &Jump : Path) { 609 Jump->Flow += 1; 610 Func.Blocks[Jump->Target].Flow += 1; 611 // Update reachability 612 findReachable(Jump->Target, Visited); 613 } 614 } 615 } 616 } 617 618 /// Run BFS from a given block along the jumps with a positive flow and mark 619 /// all reachable blocks. 620 void findReachable(uint64_t Src, BitVector &Visited) { 621 if (Visited[Src]) 622 return; 623 std::queue<uint64_t> Queue; 624 Queue.push(Src); 625 Visited[Src] = true; 626 while (!Queue.empty()) { 627 Src = Queue.front(); 628 Queue.pop(); 629 for (auto Jump : Func.Blocks[Src].SuccJumps) { 630 uint64_t Dst = Jump->Target; 631 if (Jump->Flow > 0 && !Visited[Dst]) { 632 Queue.push(Dst); 633 Visited[Dst] = true; 634 } 635 } 636 } 637 } 638 639 /// Find the shortest path from the entry block to an exit block passing 640 /// through a given block. 641 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { 642 // A path from the entry block to BlockIdx 643 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); 644 // A path from BlockIdx to an exit block 645 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); 646 647 // Concatenate the two paths 648 std::vector<FlowJump *> Result; 649 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); 650 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); 651 return Result; 652 } 653 654 /// Apply the Dijkstra algorithm to find the shortest path from a given 655 /// Source to a given Target block. 656 /// If Target == -1, then the path ends at an exit block. 657 std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { 658 // Quit early, if possible 659 if (Source == Target) 660 return std::vector<FlowJump *>(); 661 if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) 662 return std::vector<FlowJump *>(); 663 664 // Initialize data structures 665 auto Distance = std::vector<int64_t>(NumBlocks(), INF); 666 auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); 667 Distance[Source] = 0; 668 std::set<std::pair<uint64_t, uint64_t>> Queue; 669 Queue.insert(std::make_pair(Distance[Source], Source)); 670 671 // Run the Dijkstra algorithm 672 while (!Queue.empty()) { 673 uint64_t Src = Queue.begin()->second; 674 Queue.erase(Queue.begin()); 675 // If we found a solution, quit early 676 if (Src == Target || 677 (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) 678 break; 679 680 for (auto Jump : Func.Blocks[Src].SuccJumps) { 681 uint64_t Dst = Jump->Target; 682 int64_t JumpDist = jumpDistance(Jump); 683 if (Distance[Dst] > Distance[Src] + JumpDist) { 684 Queue.erase(std::make_pair(Distance[Dst], Dst)); 685 686 Distance[Dst] = Distance[Src] + JumpDist; 687 Parent[Dst] = Jump; 688 689 Queue.insert(std::make_pair(Distance[Dst], Dst)); 690 } 691 } 692 } 693 // If Target is not provided, find the closest exit block 694 if (Target == AnyExitBlock) { 695 for (uint64_t I = 0; I < NumBlocks(); I++) { 696 if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { 697 if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { 698 Target = I; 699 } 700 } 701 } 702 } 703 assert(Parent[Target] != nullptr && "a path does not exist"); 704 705 // Extract the constructed path 706 std::vector<FlowJump *> Result; 707 uint64_t Now = Target; 708 while (Now != Source) { 709 assert(Now == Parent[Now]->Target && "incorrect parent jump"); 710 Result.push_back(Parent[Now]); 711 Now = Parent[Now]->Source; 712 } 713 // Reverse the path, since it is extracted from Target to Source 714 std::reverse(Result.begin(), Result.end()); 715 return Result; 716 } 717 718 /// A distance of a path for a given jump. 719 /// In order to incite the path to use blocks/jumps with large positive flow, 720 /// and avoid changing branch probability of outgoing edges drastically, 721 /// set the distance as follows: 722 /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0) 723 /// if Block.Weight > 0, then distance = 1 724 /// otherwise distance >> 1 725 int64_t jumpDistance(FlowJump *Jump) const { 726 int64_t BaseDistance = 100; 727 if (Jump->IsUnlikely) 728 return MinCostMaxFlow::AuxCostUnlikely; 729 if (Jump->Flow > 0) 730 return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0); 731 if (Func.Blocks[Jump->Target].Weight > 0) 732 return BaseDistance; 733 return BaseDistance * (NumBlocks() + 1); 734 }; 735 736 uint64_t NumBlocks() const { return Func.Blocks.size(); } 737 738 /// Rebalance unknown subgraphs so that the flow is split evenly across the 739 /// outgoing branches of every block of the subgraph. The method iterates over 740 /// blocks with known weight and identifies unknown subgraphs rooted at the 741 /// blocks. Then it verifies if flow rebalancing is feasible and applies it. 742 void rebalanceUnknownSubgraphs() { 743 // Try to find unknown subgraphs from each block 744 for (uint64_t I = 0; I < Func.Blocks.size(); I++) { 745 auto SrcBlock = &Func.Blocks[I]; 746 // Verify if rebalancing rooted at SrcBlock is feasible 747 if (!canRebalanceAtRoot(SrcBlock)) 748 continue; 749 750 // Find an unknown subgraphs starting at SrcBlock. Along the way, 751 // fill in known destinations and intermediate unknown blocks. 752 std::vector<FlowBlock *> UnknownBlocks; 753 std::vector<FlowBlock *> KnownDstBlocks; 754 findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks); 755 756 // Verify if rebalancing of the subgraph is feasible. If the search is 757 // successful, find the unique destination block (which can be null) 758 FlowBlock *DstBlock = nullptr; 759 if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks, 760 DstBlock)) 761 continue; 762 763 // We cannot rebalance subgraphs containing cycles among unknown blocks 764 if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks)) 765 continue; 766 767 // Rebalance the flow 768 rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks); 769 } 770 } 771 772 /// Verify if rebalancing rooted at a given block is possible. 773 bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { 774 // Do not attempt to find unknown subgraphs from an unknown or a 775 // zero-flow block 776 if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) 777 return false; 778 779 // Do not attempt to process subgraphs from a block w/o unknown sucessors 780 bool HasUnknownSuccs = false; 781 for (auto Jump : SrcBlock->SuccJumps) { 782 if (Func.Blocks[Jump->Target].UnknownWeight) { 783 HasUnknownSuccs = true; 784 break; 785 } 786 } 787 if (!HasUnknownSuccs) 788 return false; 789 790 return true; 791 } 792 793 /// Find an unknown subgraph starting at block SrcBlock. The method sets 794 /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. 795 void findUnknownSubgraph(const FlowBlock *SrcBlock, 796 std::vector<FlowBlock *> &KnownDstBlocks, 797 std::vector<FlowBlock *> &UnknownBlocks) { 798 // Run BFS from SrcBlock and make sure all paths are going through unknown 799 // blocks and end at a known DstBlock 800 auto Visited = BitVector(NumBlocks(), false); 801 std::queue<uint64_t> Queue; 802 803 Queue.push(SrcBlock->Index); 804 Visited[SrcBlock->Index] = true; 805 while (!Queue.empty()) { 806 auto &Block = Func.Blocks[Queue.front()]; 807 Queue.pop(); 808 // Process blocks reachable from Block 809 for (auto Jump : Block.SuccJumps) { 810 // If Jump can be ignored, skip it 811 if (ignoreJump(SrcBlock, nullptr, Jump)) 812 continue; 813 814 uint64_t Dst = Jump->Target; 815 // If Dst has been visited, skip Jump 816 if (Visited[Dst]) 817 continue; 818 // Process block Dst 819 Visited[Dst] = true; 820 if (!Func.Blocks[Dst].UnknownWeight) { 821 KnownDstBlocks.push_back(&Func.Blocks[Dst]); 822 } else { 823 Queue.push(Dst); 824 UnknownBlocks.push_back(&Func.Blocks[Dst]); 825 } 826 } 827 } 828 } 829 830 /// Verify if rebalancing of the subgraph is feasible. If the checks are 831 /// successful, set the unique destination block, DstBlock (can be null). 832 bool canRebalanceSubgraph(const FlowBlock *SrcBlock, 833 const std::vector<FlowBlock *> &KnownDstBlocks, 834 const std::vector<FlowBlock *> &UnknownBlocks, 835 FlowBlock *&DstBlock) { 836 // If the list of unknown blocks is empty, we don't need rebalancing 837 if (UnknownBlocks.empty()) 838 return false; 839 840 // If there are multiple known sinks, we can't rebalance 841 if (KnownDstBlocks.size() > 1) 842 return false; 843 DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); 844 845 // Verify sinks of the subgraph 846 for (auto Block : UnknownBlocks) { 847 if (Block->SuccJumps.empty()) { 848 // If there are multiple (known and unknown) sinks, we can't rebalance 849 if (DstBlock != nullptr) 850 return false; 851 continue; 852 } 853 size_t NumIgnoredJumps = 0; 854 for (auto Jump : Block->SuccJumps) { 855 if (ignoreJump(SrcBlock, DstBlock, Jump)) 856 NumIgnoredJumps++; 857 } 858 // If there is a non-sink block in UnknownBlocks with all jumps ignored, 859 // then we can't rebalance 860 if (NumIgnoredJumps == Block->SuccJumps.size()) 861 return false; 862 } 863 864 return true; 865 } 866 867 /// Decide whether the Jump is ignored while processing an unknown subgraphs 868 /// rooted at basic block SrcBlock with the destination block, DstBlock. 869 bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 870 const FlowJump *Jump) { 871 // Ignore unlikely jumps with zero flow 872 if (Jump->IsUnlikely && Jump->Flow == 0) 873 return true; 874 875 auto JumpSource = &Func.Blocks[Jump->Source]; 876 auto JumpTarget = &Func.Blocks[Jump->Target]; 877 878 // Do not ignore jumps coming into DstBlock 879 if (DstBlock != nullptr && JumpTarget == DstBlock) 880 return false; 881 882 // Ignore jumps out of SrcBlock to known blocks 883 if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) 884 return true; 885 886 // Ignore jumps to known blocks with zero flow 887 if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) 888 return true; 889 890 return false; 891 } 892 893 /// Verify if the given unknown subgraph is acyclic, and if yes, reorder 894 /// UnknownBlocks in the topological order (so that all jumps are "forward"). 895 bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 896 std::vector<FlowBlock *> &UnknownBlocks) { 897 // Extract local in-degrees in the considered subgraph 898 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); 899 auto fillInDegree = [&](const FlowBlock *Block) { 900 for (auto Jump : Block->SuccJumps) { 901 if (ignoreJump(SrcBlock, DstBlock, Jump)) 902 continue; 903 LocalInDegree[Jump->Target]++; 904 } 905 }; 906 fillInDegree(SrcBlock); 907 for (auto Block : UnknownBlocks) { 908 fillInDegree(Block); 909 } 910 // A loop containing SrcBlock 911 if (LocalInDegree[SrcBlock->Index] > 0) 912 return false; 913 914 std::vector<FlowBlock *> AcyclicOrder; 915 std::queue<uint64_t> Queue; 916 Queue.push(SrcBlock->Index); 917 while (!Queue.empty()) { 918 FlowBlock *Block = &Func.Blocks[Queue.front()]; 919 Queue.pop(); 920 // Stop propagation once we reach DstBlock, if any 921 if (DstBlock != nullptr && Block == DstBlock) 922 break; 923 924 // Keep an acyclic order of unknown blocks 925 if (Block->UnknownWeight && Block != SrcBlock) 926 AcyclicOrder.push_back(Block); 927 928 // Add to the queue all successors with zero local in-degree 929 for (auto Jump : Block->SuccJumps) { 930 if (ignoreJump(SrcBlock, DstBlock, Jump)) 931 continue; 932 uint64_t Dst = Jump->Target; 933 LocalInDegree[Dst]--; 934 if (LocalInDegree[Dst] == 0) { 935 Queue.push(Dst); 936 } 937 } 938 } 939 940 // If there is a cycle in the subgraph, AcyclicOrder contains only a subset 941 // of all blocks 942 if (UnknownBlocks.size() != AcyclicOrder.size()) 943 return false; 944 UnknownBlocks = AcyclicOrder; 945 return true; 946 } 947 948 /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and 949 /// having UnknownBlocks intermediate blocks. 950 void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, 951 const FlowBlock *DstBlock, 952 const std::vector<FlowBlock *> &UnknownBlocks) { 953 assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); 954 955 // Ditribute flow from the source block 956 uint64_t BlockFlow = 0; 957 // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps 958 for (auto Jump : SrcBlock->SuccJumps) { 959 if (ignoreJump(SrcBlock, DstBlock, Jump)) 960 continue; 961 BlockFlow += Jump->Flow; 962 } 963 rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); 964 965 // Ditribute flow from the remaining blocks 966 for (auto Block : UnknownBlocks) { 967 assert(Block->UnknownWeight && "incorrect unknown subgraph"); 968 uint64_t BlockFlow = 0; 969 // Block's flow is the sum of incoming flows 970 for (auto Jump : Block->PredJumps) { 971 BlockFlow += Jump->Flow; 972 } 973 Block->Flow = BlockFlow; 974 rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); 975 } 976 } 977 978 /// Redistribute flow for a block in a subgraph rooted at SrcBlock, 979 /// and ending at DstBlock. 980 void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 981 const FlowBlock *Block, uint64_t BlockFlow) { 982 // Process all successor jumps and update corresponding flow values 983 size_t BlockDegree = 0; 984 for (auto Jump : Block->SuccJumps) { 985 if (ignoreJump(SrcBlock, DstBlock, Jump)) 986 continue; 987 BlockDegree++; 988 } 989 // If all successor jumps of the block are ignored, skip it 990 if (DstBlock == nullptr && BlockDegree == 0) 991 return; 992 assert(BlockDegree > 0 && "all outgoing jumps are ignored"); 993 994 // Each of the Block's successors gets the following amount of flow. 995 // Rounding the value up so that all flow is propagated 996 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; 997 for (auto Jump : Block->SuccJumps) { 998 if (ignoreJump(SrcBlock, DstBlock, Jump)) 999 continue; 1000 uint64_t Flow = std::min(SuccFlow, BlockFlow); 1001 Jump->Flow = Flow; 1002 BlockFlow -= Flow; 1003 } 1004 assert(BlockFlow == 0 && "not all flow is propagated"); 1005 } 1006 1007 /// A constant indicating an arbitrary exit block of a function. 1008 static constexpr uint64_t AnyExitBlock = uint64_t(-1); 1009 1010 /// The function. 1011 FlowFunction &Func; 1012 }; 1013 1014 /// Initializing flow network for a given function. 1015 /// 1016 /// Every block is split into three nodes that are responsible for (i) an 1017 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 1018 /// reduction of the block weight. 1019 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 1020 uint64_t NumBlocks = Func.Blocks.size(); 1021 assert(NumBlocks > 1 && "Too few blocks in a function"); 1022 LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 1023 1024 // Pre-process data: make sure the entry weight is at least 1 1025 if (Func.Blocks[Func.Entry].Weight == 0) { 1026 Func.Blocks[Func.Entry].Weight = 1; 1027 } 1028 // Introducing dummy source/sink pairs to allow flow circulation. 1029 // The nodes corresponding to blocks of Func have indicies in the range 1030 // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 1031 uint64_t S = 3 * NumBlocks; 1032 uint64_t T = S + 1; 1033 uint64_t S1 = S + 2; 1034 uint64_t T1 = S + 3; 1035 1036 Network.initialize(3 * NumBlocks + 4, S1, T1); 1037 1038 // Create three nodes for every block of the function 1039 for (uint64_t B = 0; B < NumBlocks; B++) { 1040 auto &Block = Func.Blocks[B]; 1041 assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 1042 "non-zero weight of a block w/o weight except for an entry"); 1043 1044 // Split every block into two nodes 1045 uint64_t Bin = 3 * B; 1046 uint64_t Bout = 3 * B + 1; 1047 uint64_t Baux = 3 * B + 2; 1048 if (Block.Weight > 0) { 1049 Network.addEdge(S1, Bout, Block.Weight, 0); 1050 Network.addEdge(Bin, T1, Block.Weight, 0); 1051 } 1052 1053 // Edges from S and to T 1054 assert((!Block.isEntry() || !Block.isExit()) && 1055 "a block cannot be an entry and an exit"); 1056 if (Block.isEntry()) { 1057 Network.addEdge(S, Bin, 0); 1058 } else if (Block.isExit()) { 1059 Network.addEdge(Bout, T, 0); 1060 } 1061 1062 // An auxiliary node to allow increase/reduction of block counts: 1063 // We assume that decreasing block counts is more expensive than increasing, 1064 // and thus, setting separate costs here. In the future we may want to tune 1065 // the relative costs so as to maximize the quality of generated profiles. 1066 int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; 1067 int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; 1068 if (Block.UnknownWeight) { 1069 // Do not penalize changing weights of blocks w/o known profile count 1070 AuxCostInc = 0; 1071 AuxCostDec = 0; 1072 } else { 1073 // Increasing the count for "cold" blocks with zero initial count is more 1074 // expensive than for "hot" ones 1075 if (Block.Weight == 0) { 1076 AuxCostInc = MinCostMaxFlow::AuxCostIncZero; 1077 } 1078 // Modifying the count of the entry block is expensive 1079 if (Block.isEntry()) { 1080 AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; 1081 AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; 1082 } 1083 } 1084 // For blocks with self-edges, do not penalize a reduction of the count, 1085 // as all of the increase can be attributed to the self-edge 1086 if (Block.HasSelfEdge) { 1087 AuxCostDec = 0; 1088 } 1089 1090 Network.addEdge(Bin, Baux, AuxCostInc); 1091 Network.addEdge(Baux, Bout, AuxCostInc); 1092 if (Block.Weight > 0) { 1093 Network.addEdge(Bout, Baux, AuxCostDec); 1094 Network.addEdge(Baux, Bin, AuxCostDec); 1095 } 1096 } 1097 1098 // Creating edges for every jump 1099 for (auto &Jump : Func.Jumps) { 1100 uint64_t Src = Jump.Source; 1101 uint64_t Dst = Jump.Target; 1102 if (Src != Dst) { 1103 uint64_t SrcOut = 3 * Src + 1; 1104 uint64_t DstIn = 3 * Dst; 1105 uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 1106 Network.addEdge(SrcOut, DstIn, Cost); 1107 } 1108 } 1109 1110 // Make sure we have a valid flow circulation 1111 Network.addEdge(T, S, 0); 1112 } 1113 1114 /// Extract resulting block and edge counts from the flow network. 1115 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 1116 uint64_t NumBlocks = Func.Blocks.size(); 1117 1118 // Extract resulting block counts 1119 for (uint64_t Src = 0; Src < NumBlocks; Src++) { 1120 auto &Block = Func.Blocks[Src]; 1121 uint64_t SrcOut = 3 * Src + 1; 1122 int64_t Flow = 0; 1123 for (auto &Adj : Network.getFlow(SrcOut)) { 1124 uint64_t DstIn = Adj.first; 1125 int64_t DstFlow = Adj.second; 1126 bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 1127 if (!IsAuxNode || Block.HasSelfEdge) { 1128 Flow += DstFlow; 1129 } 1130 } 1131 Block.Flow = Flow; 1132 assert(Flow >= 0 && "negative block flow"); 1133 } 1134 1135 // Extract resulting jump counts 1136 for (auto &Jump : Func.Jumps) { 1137 uint64_t Src = Jump.Source; 1138 uint64_t Dst = Jump.Target; 1139 int64_t Flow = 0; 1140 if (Src != Dst) { 1141 uint64_t SrcOut = 3 * Src + 1; 1142 uint64_t DstIn = 3 * Dst; 1143 Flow = Network.getFlow(SrcOut, DstIn); 1144 } else { 1145 uint64_t SrcOut = 3 * Src + 1; 1146 uint64_t SrcAux = 3 * Src + 2; 1147 int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 1148 if (AuxFlow > 0) 1149 Flow = AuxFlow; 1150 } 1151 Jump.Flow = Flow; 1152 assert(Flow >= 0 && "negative jump flow"); 1153 } 1154 } 1155 1156 #ifndef NDEBUG 1157 /// Verify that the computed flow values satisfy flow conservation rules 1158 void verifyWeights(const FlowFunction &Func) { 1159 const uint64_t NumBlocks = Func.Blocks.size(); 1160 auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 1161 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 1162 for (auto &Jump : Func.Jumps) { 1163 InFlow[Jump.Target] += Jump.Flow; 1164 OutFlow[Jump.Source] += Jump.Flow; 1165 } 1166 1167 uint64_t TotalInFlow = 0; 1168 uint64_t TotalOutFlow = 0; 1169 for (uint64_t I = 0; I < NumBlocks; I++) { 1170 auto &Block = Func.Blocks[I]; 1171 if (Block.isEntry()) { 1172 TotalInFlow += Block.Flow; 1173 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 1174 } else if (Block.isExit()) { 1175 TotalOutFlow += Block.Flow; 1176 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 1177 } else { 1178 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 1179 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 1180 } 1181 } 1182 assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 1183 1184 // Verify that there are no isolated flow components 1185 // One could modify FlowFunction to hold edges indexed by the sources, which 1186 // will avoid a creation of the object 1187 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); 1188 for (auto &Jump : Func.Jumps) { 1189 if (Jump.Flow > 0) { 1190 PositiveFlowEdges[Jump.Source].push_back(Jump.Target); 1191 } 1192 } 1193 1194 // Run BFS from the source along edges with positive flow 1195 std::queue<uint64_t> Queue; 1196 auto Visited = BitVector(NumBlocks, false); 1197 Queue.push(Func.Entry); 1198 Visited[Func.Entry] = true; 1199 while (!Queue.empty()) { 1200 uint64_t Src = Queue.front(); 1201 Queue.pop(); 1202 for (uint64_t Dst : PositiveFlowEdges[Src]) { 1203 if (!Visited[Dst]) { 1204 Queue.push(Dst); 1205 Visited[Dst] = true; 1206 } 1207 } 1208 } 1209 1210 // Verify that every block that has a positive flow is reached from the source 1211 // along edges with a positive flow 1212 for (uint64_t I = 0; I < NumBlocks; I++) { 1213 auto &Block = Func.Blocks[I]; 1214 assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); 1215 } 1216 } 1217 #endif 1218 1219 } // end of anonymous namespace 1220 1221 /// Apply the profile inference algorithm for a given flow function 1222 void llvm::applyFlowInference(FlowFunction &Func) { 1223 // Create and apply an inference network model 1224 auto InferenceNetwork = MinCostMaxFlow(); 1225 initializeNetwork(InferenceNetwork, Func); 1226 InferenceNetwork.run(); 1227 1228 // Extract flow values for every block and every edge 1229 extractWeights(InferenceNetwork, Func); 1230 1231 // Post-processing adjustments to the flow 1232 auto Adjuster = FlowAdjuster(Func); 1233 Adjuster.run(); 1234 1235 #ifndef NDEBUG 1236 // Verify the result 1237 verifyWeights(Func); 1238 #endif 1239 } 1240