1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a profile inference algorithm. Given an incomplete and 10 // possibly imprecise block counts, the algorithm reconstructs realistic block 11 // and edge counts that satisfy flow conservation rules, while minimally modify 12 // input block counts. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Transforms/Utils/SampleProfileInference.h" 17 #include "llvm/Support/Debug.h" 18 #include <queue> 19 #include <set> 20 21 using namespace llvm; 22 #define DEBUG_TYPE "sample-profile-inference" 23 24 namespace { 25 26 /// A value indicating an infinite flow/capacity/weight of a block/edge. 27 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 28 /// during the execution. 29 static constexpr int64_t INF = ((int64_t)1) << 50; 30 31 /// The minimum-cost maximum flow algorithm. 32 /// 33 /// The algorithm finds the maximum flow of minimum cost on a given (directed) 34 /// network using a modified version of the classical Moore-Bellman-Ford 35 /// approach. The algorithm applies a number of augmentation iterations in which 36 /// flow is sent along paths of positive capacity from the source to the sink. 37 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 38 /// where m is the number of edges, n is the number of vertices, and v(f) is the 39 /// value of the maximum flow. However, the observed running time on typical 40 /// instances is sub-quadratic, that is, o(n^2). 41 /// 42 /// The input is a set of edges with specified costs and capacities, and a pair 43 /// of nodes (source and sink). The output is the flow along each edge of the 44 /// minimum total cost respecting the given edge capacities. 45 class MinCostMaxFlow { 46 public: 47 // Initialize algorithm's data structures for a network of a given size. 48 void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 49 Source = SourceNode; 50 Target = SinkNode; 51 52 Nodes = std::vector<Node>(NodeCount); 53 Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 54 } 55 56 // Run the algorithm. 57 int64_t run() { 58 // Find an augmenting path and update the flow along the path 59 size_t AugmentationIters = 0; 60 while (findAugmentingPath()) { 61 augmentFlowAlongPath(); 62 AugmentationIters++; 63 } 64 65 // Compute the total flow and its cost 66 int64_t TotalCost = 0; 67 int64_t TotalFlow = 0; 68 for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 69 for (auto &Edge : Edges[Src]) { 70 if (Edge.Flow > 0) { 71 TotalCost += Edge.Cost * Edge.Flow; 72 if (Src == Source) 73 TotalFlow += Edge.Flow; 74 } 75 } 76 } 77 LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 78 << " iterations with " << TotalFlow << " total flow" 79 << " of " << TotalCost << " cost\n"); 80 (void)TotalFlow; 81 return TotalCost; 82 } 83 84 /// Adding an edge to the network with a specified capacity and a cost. 85 /// Multiple edges between a pair of nodes are allowed but self-edges 86 /// are not supported. 87 void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 88 assert(Capacity > 0 && "adding an edge of zero capacity"); 89 assert(Src != Dst && "loop edge are not supported"); 90 91 Edge SrcEdge; 92 SrcEdge.Dst = Dst; 93 SrcEdge.Cost = Cost; 94 SrcEdge.Capacity = Capacity; 95 SrcEdge.Flow = 0; 96 SrcEdge.RevEdgeIndex = Edges[Dst].size(); 97 98 Edge DstEdge; 99 DstEdge.Dst = Src; 100 DstEdge.Cost = -Cost; 101 DstEdge.Capacity = 0; 102 DstEdge.Flow = 0; 103 DstEdge.RevEdgeIndex = Edges[Src].size(); 104 105 Edges[Src].push_back(SrcEdge); 106 Edges[Dst].push_back(DstEdge); 107 } 108 109 /// Adding an edge to the network of infinite capacity and a given cost. 110 void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 111 addEdge(Src, Dst, INF, Cost); 112 } 113 114 /// Get the total flow from a given source node. 115 /// Returns a list of pairs (target node, amount of flow to the target). 116 const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 117 std::vector<std::pair<uint64_t, int64_t>> Flow; 118 for (auto &Edge : Edges[Src]) { 119 if (Edge.Flow > 0) 120 Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 121 } 122 return Flow; 123 } 124 125 /// Get the total flow between a pair of nodes. 126 int64_t getFlow(uint64_t Src, uint64_t Dst) const { 127 int64_t Flow = 0; 128 for (auto &Edge : Edges[Src]) { 129 if (Edge.Dst == Dst) { 130 Flow += Edge.Flow; 131 } 132 } 133 return Flow; 134 } 135 136 /// A cost of increasing a block's count by one. 137 static constexpr int64_t AuxCostInc = 10; 138 /// A cost of decreasing a block's count by one. 139 static constexpr int64_t AuxCostDec = 20; 140 /// A cost of increasing a count of zero-weight block by one. 141 static constexpr int64_t AuxCostIncZero = 11; 142 /// A cost of increasing the entry block's count by one. 143 static constexpr int64_t AuxCostIncEntry = 40; 144 /// A cost of decreasing the entry block's count by one. 145 static constexpr int64_t AuxCostDecEntry = 10; 146 /// A cost of taking an unlikely jump. 147 static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20; 148 149 private: 150 /// Check for existence of an augmenting path with a positive capacity. 151 bool findAugmentingPath() { 152 // Initialize data structures 153 for (auto &Node : Nodes) { 154 Node.Distance = INF; 155 Node.ParentNode = uint64_t(-1); 156 Node.ParentEdgeIndex = uint64_t(-1); 157 Node.Taken = false; 158 } 159 160 std::queue<uint64_t> Queue; 161 Queue.push(Source); 162 Nodes[Source].Distance = 0; 163 Nodes[Source].Taken = true; 164 while (!Queue.empty()) { 165 uint64_t Src = Queue.front(); 166 Queue.pop(); 167 Nodes[Src].Taken = false; 168 // Although the residual network contains edges with negative costs 169 // (in particular, backward edges), it can be shown that there are no 170 // negative-weight cycles and the following two invariants are maintained: 171 // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 172 // where Dist is the length of the shortest path between two nodes. This 173 // allows to prune the search-space of the path-finding algorithm using 174 // the following early-stop criteria: 175 // -- If we find a path with zero-distance from Source to Target, stop the 176 // search, as the path is the shortest since Dist[Source, Target] >= 0; 177 // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 178 // process node V, as it is guaranteed _not_ to be on a shortest path 179 // from Source to Target; it follows from inequalities 180 // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 181 // >= Dist[Source, V] 182 if (Nodes[Target].Distance == 0) 183 break; 184 if (Nodes[Src].Distance > Nodes[Target].Distance) 185 continue; 186 187 // Process adjacent edges 188 for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 189 auto &Edge = Edges[Src][EdgeIdx]; 190 if (Edge.Flow < Edge.Capacity) { 191 uint64_t Dst = Edge.Dst; 192 int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 193 if (Nodes[Dst].Distance > NewDistance) { 194 // Update the distance and the parent node/edge 195 Nodes[Dst].Distance = NewDistance; 196 Nodes[Dst].ParentNode = Src; 197 Nodes[Dst].ParentEdgeIndex = EdgeIdx; 198 // Add the node to the queue, if it is not there yet 199 if (!Nodes[Dst].Taken) { 200 Queue.push(Dst); 201 Nodes[Dst].Taken = true; 202 } 203 } 204 } 205 } 206 } 207 208 return Nodes[Target].Distance != INF; 209 } 210 211 /// Update the current flow along the augmenting path. 212 void augmentFlowAlongPath() { 213 // Find path capacity 214 int64_t PathCapacity = INF; 215 uint64_t Now = Target; 216 while (Now != Source) { 217 uint64_t Pred = Nodes[Now].ParentNode; 218 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 219 PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow); 220 Now = Pred; 221 } 222 223 assert(PathCapacity > 0 && "found an incorrect augmenting path"); 224 225 // Update the flow along the path 226 Now = Target; 227 while (Now != Source) { 228 uint64_t Pred = Nodes[Now].ParentNode; 229 auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 230 auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 231 232 Edge.Flow += PathCapacity; 233 RevEdge.Flow -= PathCapacity; 234 235 Now = Pred; 236 } 237 } 238 239 /// An node in a flow network. 240 struct Node { 241 /// The cost of the cheapest path from the source to the current node. 242 int64_t Distance; 243 /// The node preceding the current one in the path. 244 uint64_t ParentNode; 245 /// The index of the edge between ParentNode and the current node. 246 uint64_t ParentEdgeIndex; 247 /// An indicator of whether the current node is in a queue. 248 bool Taken; 249 }; 250 /// An edge in a flow network. 251 struct Edge { 252 /// The cost of the edge. 253 int64_t Cost; 254 /// The capacity of the edge. 255 int64_t Capacity; 256 /// The current flow on the edge. 257 int64_t Flow; 258 /// The destination node of the edge. 259 uint64_t Dst; 260 /// The index of the reverse edge between Dst and the current node. 261 uint64_t RevEdgeIndex; 262 }; 263 264 /// The set of network nodes. 265 std::vector<Node> Nodes; 266 /// The set of network edges. 267 std::vector<std::vector<Edge>> Edges; 268 /// Source node of the flow. 269 uint64_t Source; 270 /// Target (sink) node of the flow. 271 uint64_t Target; 272 }; 273 274 /// A post-processing adjustment of control flow. It applies two steps by 275 /// rerouting some flow and making it more realistic: 276 /// 277 /// - First, it removes all isolated components ("islands") with a positive flow 278 /// that are unreachable from the entry block. For every such component, we 279 /// find the shortest from the entry to an exit passing through the component, 280 /// and increase the flow by one unit along the path. 281 /// 282 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks 283 /// with no sampled counts. Then it rebalnces the flow that goes through such 284 /// a subgraph so that each branch is taken with probability 50%. 285 /// An unknown subgraph is such that for every two nodes u and v: 286 /// - u dominates v and u is not unknown; 287 /// - v post-dominates u; and 288 /// - all inner-nodes of all (u,v)-paths are unknown. 289 /// 290 class FlowAdjuster { 291 public: 292 FlowAdjuster(FlowFunction &Func) : Func(Func) { 293 assert(Func.Blocks[Func.Entry].isEntry() && 294 "incorrect index of the entry block"); 295 } 296 297 // Run the post-processing 298 void run() { 299 /// Adjust the flow to get rid of isolated components. 300 joinIsolatedComponents(); 301 302 /// Rebalance the flow inside unknown subgraphs. 303 rebalanceUnknownSubgraphs(); 304 } 305 306 /// The probability for the first successor of a unknown subgraph 307 static constexpr double UnknownFirstSuccProbability = 0.5; 308 309 private: 310 void joinIsolatedComponents() { 311 // Find blocks that are reachable from the source 312 auto Visited = std::vector<bool>(NumBlocks(), false); 313 findReachable(Func.Entry, Visited); 314 315 // Iterate over all non-reachable blocks and adjust their weights 316 for (uint64_t I = 0; I < NumBlocks(); I++) { 317 auto &Block = Func.Blocks[I]; 318 if (Block.Flow > 0 && !Visited[I]) { 319 // Find a path from the entry to an exit passing through the block I 320 auto Path = findShortestPath(I); 321 // Increase the flow along the path 322 assert(Path.size() > 0 && Path[0]->Source == Func.Entry && 323 "incorrectly computed path adjusting control flow"); 324 Func.Blocks[Func.Entry].Flow += 1; 325 for (auto &Jump : Path) { 326 Jump->Flow += 1; 327 Func.Blocks[Jump->Target].Flow += 1; 328 // Update reachability 329 findReachable(Jump->Target, Visited); 330 } 331 } 332 } 333 } 334 335 /// Run BFS from a given block along the jumps with a positive flow and mark 336 /// all reachable blocks. 337 void findReachable(uint64_t Src, std::vector<bool> &Visited) { 338 if (Visited[Src]) 339 return; 340 std::queue<uint64_t> Queue; 341 Queue.push(Src); 342 Visited[Src] = true; 343 while (!Queue.empty()) { 344 Src = Queue.front(); 345 Queue.pop(); 346 for (auto Jump : Func.Blocks[Src].SuccJumps) { 347 uint64_t Dst = Jump->Target; 348 if (Jump->Flow > 0 && !Visited[Dst]) { 349 Queue.push(Dst); 350 Visited[Dst] = true; 351 } 352 } 353 } 354 } 355 356 /// Find the shortest path from the entry block to an exit block passing 357 /// through a given block. 358 std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { 359 // A path from the entry block to BlockIdx 360 auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); 361 // A path from BlockIdx to an exit block 362 auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); 363 364 // Concatenate the two paths 365 std::vector<FlowJump *> Result; 366 Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); 367 Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); 368 return Result; 369 } 370 371 /// Apply the Dijkstra algorithm to find the shortest path from a given 372 /// Source to a given Target block. 373 /// If Target == -1, then the path ends at an exit block. 374 std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { 375 // Quit early, if possible 376 if (Source == Target) 377 return std::vector<FlowJump *>(); 378 if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) 379 return std::vector<FlowJump *>(); 380 381 // Initialize data structures 382 auto Distance = std::vector<int64_t>(NumBlocks(), INF); 383 auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); 384 Distance[Source] = 0; 385 std::set<std::pair<uint64_t, uint64_t>> Queue; 386 Queue.insert(std::make_pair(Distance[Source], Source)); 387 388 // Run the Dijkstra algorithm 389 while (!Queue.empty()) { 390 uint64_t Src = Queue.begin()->second; 391 Queue.erase(Queue.begin()); 392 // If we found a solution, quit early 393 if (Src == Target || 394 (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) 395 break; 396 397 for (auto Jump : Func.Blocks[Src].SuccJumps) { 398 uint64_t Dst = Jump->Target; 399 int64_t JumpDist = jumpDistance(Jump); 400 if (Distance[Dst] > Distance[Src] + JumpDist) { 401 Queue.erase(std::make_pair(Distance[Dst], Dst)); 402 403 Distance[Dst] = Distance[Src] + JumpDist; 404 Parent[Dst] = Jump; 405 406 Queue.insert(std::make_pair(Distance[Dst], Dst)); 407 } 408 } 409 } 410 // If Target is not provided, find the closest exit block 411 if (Target == AnyExitBlock) { 412 for (uint64_t I = 0; I < NumBlocks(); I++) { 413 if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { 414 if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { 415 Target = I; 416 } 417 } 418 } 419 } 420 assert(Parent[Target] != nullptr && "a path does not exist"); 421 422 // Extract the constructed path 423 std::vector<FlowJump *> Result; 424 uint64_t Now = Target; 425 while (Now != Source) { 426 assert(Now == Parent[Now]->Target && "incorrect parent jump"); 427 Result.push_back(Parent[Now]); 428 Now = Parent[Now]->Source; 429 } 430 // Reverse the path, since it is extracted from Target to Source 431 std::reverse(Result.begin(), Result.end()); 432 return Result; 433 } 434 435 /// A distance of a path for a given jump. 436 /// In order to incite the path to use blocks/jumps with large positive flow, 437 /// and avoid changing branch probability of outgoing edges drastically, 438 /// set the distance as follows: 439 /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0) 440 /// if Block.Weight > 0, then distance = 1 441 /// otherwise distance >> 1 442 int64_t jumpDistance(FlowJump *Jump) const { 443 int64_t BaseDistance = 100; 444 if (Jump->IsUnlikely) 445 return MinCostMaxFlow::AuxCostUnlikely; 446 if (Jump->Flow > 0) 447 return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0); 448 if (Func.Blocks[Jump->Target].Weight > 0) 449 return BaseDistance; 450 return BaseDistance * (NumBlocks() + 1); 451 }; 452 453 uint64_t NumBlocks() const { return Func.Blocks.size(); } 454 455 /// Rebalance unknown subgraphs so as each branch splits with probabilities 456 /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability 457 void rebalanceUnknownSubgraphs() { 458 static_assert( 459 UnknownFirstSuccProbability >= 0.0 && 460 UnknownFirstSuccProbability <= 1.0, 461 "the share of the unknown successor should be between 0 and 1"); 462 // Try to find unknown subgraphs from each non-unknown block 463 for (uint64_t I = 0; I < Func.Blocks.size(); I++) { 464 auto SrcBlock = &Func.Blocks[I]; 465 // Do not attempt to find unknown successors from a unknown or a 466 // zero-flow block 467 if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) 468 continue; 469 470 std::vector<FlowBlock *> UnknownSuccs; 471 FlowBlock *DstBlock = nullptr; 472 // Find a unknown subgraphs starting at block SrcBlock 473 if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs)) 474 continue; 475 // At the moment, we do not rebalance subgraphs containing cycles among 476 // unknown blocks 477 if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs)) 478 continue; 479 480 // Rebalance the flow 481 rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs); 482 } 483 } 484 485 /// Find a unknown subgraph starting at block SrcBlock. 486 /// If the search is successful, the method sets DstBlock and UnknownSuccs. 487 bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock, 488 std::vector<FlowBlock *> &UnknownSuccs) { 489 // Run BFS from SrcBlock and make sure all paths are going through unknown 490 // blocks and end at a non-unknown DstBlock 491 auto Visited = std::vector<bool>(NumBlocks(), false); 492 std::queue<uint64_t> Queue; 493 DstBlock = nullptr; 494 495 Queue.push(SrcBlock->Index); 496 Visited[SrcBlock->Index] = true; 497 while (!Queue.empty()) { 498 auto &Block = Func.Blocks[Queue.front()]; 499 Queue.pop(); 500 // Process blocks reachable from Block 501 for (auto Jump : Block.SuccJumps) { 502 uint64_t Dst = Jump->Target; 503 if (Visited[Dst]) 504 continue; 505 Visited[Dst] = true; 506 if (!Func.Blocks[Dst].UnknownWeight) { 507 // If we see non-unique non-unknown block reachable from SrcBlock, 508 // stop processing and skip rebalancing 509 FlowBlock *CandidateDstBlock = &Func.Blocks[Dst]; 510 if (DstBlock != nullptr && DstBlock != CandidateDstBlock) 511 return false; 512 DstBlock = CandidateDstBlock; 513 } else { 514 Queue.push(Dst); 515 UnknownSuccs.push_back(&Func.Blocks[Dst]); 516 } 517 } 518 } 519 520 // If the list of unknown blocks is empty, we don't need rebalancing 521 if (UnknownSuccs.empty()) 522 return false; 523 // If all reachable nodes from SrcBlock are unknown, skip rebalancing 524 if (DstBlock == nullptr) 525 return false; 526 // If any of the unknown blocks is an exit block, skip rebalancing 527 for (auto Block : UnknownSuccs) { 528 if (Block->isExit()) 529 return false; 530 } 531 532 return true; 533 } 534 535 /// Verify if the given unknown subgraph is acyclic, and if yes, reorder 536 /// UnknownSuccs in the topological order (so that all jumps are "forward"). 537 bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock, 538 std::vector<FlowBlock *> &UnknownSuccs) { 539 // Extract local in-degrees in the considered subgraph 540 auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); 541 for (auto Jump : SrcBlock->SuccJumps) { 542 LocalInDegree[Jump->Target]++; 543 } 544 for (uint64_t I = 0; I < UnknownSuccs.size(); I++) { 545 for (auto Jump : UnknownSuccs[I]->SuccJumps) { 546 LocalInDegree[Jump->Target]++; 547 } 548 } 549 // A loop containing SrcBlock 550 if (LocalInDegree[SrcBlock->Index] > 0) 551 return false; 552 553 std::vector<FlowBlock *> AcyclicOrder; 554 std::queue<uint64_t> Queue; 555 Queue.push(SrcBlock->Index); 556 while (!Queue.empty()) { 557 auto &Block = Func.Blocks[Queue.front()]; 558 Queue.pop(); 559 // Stop propagation once we reach DstBlock 560 if (Block.Index == DstBlock->Index) 561 break; 562 563 AcyclicOrder.push_back(&Block); 564 // Add to the queue all successors with zero local in-degree 565 for (auto Jump : Block.SuccJumps) { 566 uint64_t Dst = Jump->Target; 567 LocalInDegree[Dst]--; 568 if (LocalInDegree[Dst] == 0) { 569 Queue.push(Dst); 570 } 571 } 572 } 573 574 // If there is a cycle in the subgraph, AcyclicOrder contains only a subset 575 // of all blocks 576 if (UnknownSuccs.size() + 1 != AcyclicOrder.size()) 577 return false; 578 UnknownSuccs = AcyclicOrder; 579 return true; 580 } 581 582 /// Rebalance a given subgraph. 583 void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock, 584 std::vector<FlowBlock *> &UnknownSuccs) { 585 assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); 586 assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns"); 587 588 for (auto Block : UnknownSuccs) { 589 // Block's flow is the sum of incoming flows 590 uint64_t TotalFlow = 0; 591 if (Block == SrcBlock) { 592 TotalFlow = Block->Flow; 593 } else { 594 for (auto Jump : Block->PredJumps) { 595 TotalFlow += Jump->Flow; 596 } 597 Block->Flow = TotalFlow; 598 } 599 600 // Process all successor jumps and update corresponding flow values 601 for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) { 602 auto Jump = Block->SuccJumps[I]; 603 if (I + 1 == Block->SuccJumps.size()) { 604 Jump->Flow = TotalFlow; 605 continue; 606 } 607 uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability); 608 Jump->Flow = Flow; 609 TotalFlow -= Flow; 610 } 611 } 612 } 613 614 /// A constant indicating an arbitrary exit block of a function. 615 static constexpr uint64_t AnyExitBlock = uint64_t(-1); 616 617 /// The function. 618 FlowFunction &Func; 619 }; 620 621 /// Initializing flow network for a given function. 622 /// 623 /// Every block is split into three nodes that are responsible for (i) an 624 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 625 /// reduction of the block weight. 626 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 627 uint64_t NumBlocks = Func.Blocks.size(); 628 assert(NumBlocks > 1 && "Too few blocks in a function"); 629 LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 630 631 // Pre-process data: make sure the entry weight is at least 1 632 if (Func.Blocks[Func.Entry].Weight == 0) { 633 Func.Blocks[Func.Entry].Weight = 1; 634 } 635 // Introducing dummy source/sink pairs to allow flow circulation. 636 // The nodes corresponding to blocks of Func have indicies in the range 637 // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 638 uint64_t S = 3 * NumBlocks; 639 uint64_t T = S + 1; 640 uint64_t S1 = S + 2; 641 uint64_t T1 = S + 3; 642 643 Network.initialize(3 * NumBlocks + 4, S1, T1); 644 645 // Create three nodes for every block of the function 646 for (uint64_t B = 0; B < NumBlocks; B++) { 647 auto &Block = Func.Blocks[B]; 648 assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 649 "non-zero weight of a block w/o weight except for an entry"); 650 651 // Split every block into two nodes 652 uint64_t Bin = 3 * B; 653 uint64_t Bout = 3 * B + 1; 654 uint64_t Baux = 3 * B + 2; 655 if (Block.Weight > 0) { 656 Network.addEdge(S1, Bout, Block.Weight, 0); 657 Network.addEdge(Bin, T1, Block.Weight, 0); 658 } 659 660 // Edges from S and to T 661 assert((!Block.isEntry() || !Block.isExit()) && 662 "a block cannot be an entry and an exit"); 663 if (Block.isEntry()) { 664 Network.addEdge(S, Bin, 0); 665 } else if (Block.isExit()) { 666 Network.addEdge(Bout, T, 0); 667 } 668 669 // An auxiliary node to allow increase/reduction of block counts: 670 // We assume that decreasing block counts is more expensive than increasing, 671 // and thus, setting separate costs here. In the future we may want to tune 672 // the relative costs so as to maximize the quality of generated profiles. 673 int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; 674 int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; 675 if (Block.UnknownWeight) { 676 // Do not penalize changing weights of blocks w/o known profile count 677 AuxCostInc = 0; 678 AuxCostDec = 0; 679 } else { 680 // Increasing the count for "cold" blocks with zero initial count is more 681 // expensive than for "hot" ones 682 if (Block.Weight == 0) { 683 AuxCostInc = MinCostMaxFlow::AuxCostIncZero; 684 } 685 // Modifying the count of the entry block is expensive 686 if (Block.isEntry()) { 687 AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; 688 AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; 689 } 690 } 691 // For blocks with self-edges, do not penalize a reduction of the count, 692 // as all of the increase can be attributed to the self-edge 693 if (Block.HasSelfEdge) { 694 AuxCostDec = 0; 695 } 696 697 Network.addEdge(Bin, Baux, AuxCostInc); 698 Network.addEdge(Baux, Bout, AuxCostInc); 699 if (Block.Weight > 0) { 700 Network.addEdge(Bout, Baux, AuxCostDec); 701 Network.addEdge(Baux, Bin, AuxCostDec); 702 } 703 } 704 705 // Creating edges for every jump 706 for (auto &Jump : Func.Jumps) { 707 uint64_t Src = Jump.Source; 708 uint64_t Dst = Jump.Target; 709 if (Src != Dst) { 710 uint64_t SrcOut = 3 * Src + 1; 711 uint64_t DstIn = 3 * Dst; 712 uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 713 Network.addEdge(SrcOut, DstIn, Cost); 714 } 715 } 716 717 // Make sure we have a valid flow circulation 718 Network.addEdge(T, S, 0); 719 } 720 721 /// Extract resulting block and edge counts from the flow network. 722 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 723 uint64_t NumBlocks = Func.Blocks.size(); 724 725 // Extract resulting block counts 726 for (uint64_t Src = 0; Src < NumBlocks; Src++) { 727 auto &Block = Func.Blocks[Src]; 728 uint64_t SrcOut = 3 * Src + 1; 729 int64_t Flow = 0; 730 for (auto &Adj : Network.getFlow(SrcOut)) { 731 uint64_t DstIn = Adj.first; 732 int64_t DstFlow = Adj.second; 733 bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 734 if (!IsAuxNode || Block.HasSelfEdge) { 735 Flow += DstFlow; 736 } 737 } 738 Block.Flow = Flow; 739 assert(Flow >= 0 && "negative block flow"); 740 } 741 742 // Extract resulting jump counts 743 for (auto &Jump : Func.Jumps) { 744 uint64_t Src = Jump.Source; 745 uint64_t Dst = Jump.Target; 746 int64_t Flow = 0; 747 if (Src != Dst) { 748 uint64_t SrcOut = 3 * Src + 1; 749 uint64_t DstIn = 3 * Dst; 750 Flow = Network.getFlow(SrcOut, DstIn); 751 } else { 752 uint64_t SrcOut = 3 * Src + 1; 753 uint64_t SrcAux = 3 * Src + 2; 754 int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 755 if (AuxFlow > 0) 756 Flow = AuxFlow; 757 } 758 Jump.Flow = Flow; 759 assert(Flow >= 0 && "negative jump flow"); 760 } 761 } 762 763 #ifndef NDEBUG 764 /// Verify that the computed flow values satisfy flow conservation rules 765 void verifyWeights(const FlowFunction &Func) { 766 const uint64_t NumBlocks = Func.Blocks.size(); 767 auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 768 auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 769 for (auto &Jump : Func.Jumps) { 770 InFlow[Jump.Target] += Jump.Flow; 771 OutFlow[Jump.Source] += Jump.Flow; 772 } 773 774 uint64_t TotalInFlow = 0; 775 uint64_t TotalOutFlow = 0; 776 for (uint64_t I = 0; I < NumBlocks; I++) { 777 auto &Block = Func.Blocks[I]; 778 if (Block.isEntry()) { 779 TotalInFlow += Block.Flow; 780 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 781 } else if (Block.isExit()) { 782 TotalOutFlow += Block.Flow; 783 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 784 } else { 785 assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 786 assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 787 } 788 } 789 assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 790 791 // Verify that there are no isolated flow components 792 // One could modify FlowFunction to hold edges indexed by the sources, which 793 // will avoid a creation of the object 794 auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); 795 for (auto &Jump : Func.Jumps) { 796 if (Jump.Flow > 0) { 797 PositiveFlowEdges[Jump.Source].push_back(Jump.Target); 798 } 799 } 800 801 // Run BFS from the source along edges with positive flow 802 std::queue<uint64_t> Queue; 803 auto Visited = std::vector<bool>(NumBlocks, false); 804 Queue.push(Func.Entry); 805 Visited[Func.Entry] = true; 806 while (!Queue.empty()) { 807 uint64_t Src = Queue.front(); 808 Queue.pop(); 809 for (uint64_t Dst : PositiveFlowEdges[Src]) { 810 if (!Visited[Dst]) { 811 Queue.push(Dst); 812 Visited[Dst] = true; 813 } 814 } 815 } 816 817 // Verify that every block that has a positive flow is reached from the source 818 // along edges with a positive flow 819 for (uint64_t I = 0; I < NumBlocks; I++) { 820 auto &Block = Func.Blocks[I]; 821 assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); 822 } 823 } 824 #endif 825 826 } // end of anonymous namespace 827 828 /// Apply the profile inference algorithm for a given flow function 829 void llvm::applyFlowInference(FlowFunction &Func) { 830 // Create and apply an inference network model 831 auto InferenceNetwork = MinCostMaxFlow(); 832 initializeNetwork(InferenceNetwork, Func); 833 InferenceNetwork.run(); 834 835 // Extract flow values for every block and every edge 836 extractWeights(InferenceNetwork, Func); 837 838 // Post-processing adjustments to the flow 839 auto Adjuster = FlowAdjuster(Func); 840 Adjuster.run(); 841 842 #ifndef NDEBUG 843 // Verify the result 844 verifyWeights(Func); 845 #endif 846 } 847