Lines Matching +full:positive +full:- +full:phase

1 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
26 #define DEBUG_TYPE "sample-profile-inference"
31 "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden,
36 "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden,
37 cl::desc("Evenly re-distribute flow among unknown subgraphs."));
40 "sample-profile-join-islands", cl::init(true), cl::Hidden,
41 cl::desc("Join isolated components having positive flow."));
44 "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden,
48 "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden,
52 "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,
56 "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,
60 "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden,
61 cl::desc("The cost of increasing a count of zero-weight block by one."));
64 "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,
72 /// The minimum-cost maximum flow algorithm.
75 /// network using a modified version of the classical Moore-Bellman-Ford
77 /// flow is sent along paths of positive capacity from the source to the sink.
78 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
81 /// instances is sub-quadratic, that is, o(n^2).
131 /// Multiple edges between a pair of nodes are allowed but self-edges
146 DstEdge.Cost = -Cost;
224 uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
232 /// Check for existence of an augmenting path with a positive capacity.
237 Node.ParentNode = uint64_t(-1);
238 Node.ParentEdgeIndex = uint64_t(-1);
252 // negative-weight cycles and the following two invariants are maintained:
255 // allows to prune the search-space of the path-finding algorithm using
256 // the following early-stop criteria:
257 // -- If we find a path with zero-distance from Source to Target, stop the
259 // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
303 RevEdge.Flow -= PathCapacity;
320 // - we are currently visiting Nodes[NodeIdx] and
321 // - the next edge to scan is Edges[NodeIdx][EdgeIdx]
326 // Phase 0: Initialize Node attributes and Time for DFS run
338 // Phase 1: Start DFS traversal from Source
384 // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
406 // Phase 0: Initialization
411 Edge->AugmentedFlow = 0;
415 // Phase 1: Send a unit of fractional flow along the DAG
425 Nodes[Edge->Dst].FracFlow += EdgeFlow;
426 if (Edge->Capacity == INF)
428 uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
436 // Phase 2: Send an integral flow of MaxFlowAmount
445 uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
447 uint64_t Dst = Edge->Dst;
449 EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
451 Nodes[Src].IntFlow -= EdgeFlow;
452 Edge->AugmentedFlow += EdgeFlow;
458 // Phase 3: Send excess flow back traversing the nodes backwards.
461 for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
462 uint64_t Src = AugmentingOrder[Idx - 1];
466 uint64_t Dst = Edge->Dst;
469 uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
470 Nodes[Dst].IntFlow -= EdgeFlow;
472 Edge->AugmentedFlow -= EdgeFlow;
476 // Phase 4: Update flow values along all edges
482 assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
484 auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
485 Edge->Flow += Edge->AugmentedFlow;
486 RevEdge.Flow -= Edge->AugmentedFlow;
487 if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
518 uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
537 /// Data fields utilized in DAG-augmentation:
563 /// Data fields utilized in DAG-augmentation:
584 /// A post-processing adjustment of the control flow. It applies two steps by
587 /// - First, it removes all isolated components ("islands") with a positive flow
592 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
596 /// - u dominates v and u is not unknown;
597 /// - v post-dominates u; and
598 /// - all inner-nodes of all (u,v)-paths are unknown.
605 /// Apply the post-processing.
624 // Iterate over all non-reachable blocks and adjust their weights
631 assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
635 Jump->Flow += 1;
636 Func.Blocks[Jump->Target].Flow += 1;
638 findReachable(Jump->Target, Visited);
644 /// Run BFS from a given block along the jumps with a positive flow and mark
656 uint64_t Dst = Jump->Target;
657 if (Jump->Flow > 0 && !Visited[Dst]) {
682 /// If Target == -1, then the path ends at an exit block.
699 uint64_t Src = Queue.begin()->second;
707 uint64_t Dst = Jump->Target;
735 assert(Now == Parent[Now]->Target && "incorrect parent jump");
737 Now = Parent[Now]->Source;
745 /// In order to incite the path to use blocks/jumps with large positive flow,
748 /// - to minimize the number of unlikely jumps used and subject to that,
749 /// - to minimize the number of Flow == 0 jumps used and subject to that,
750 /// - minimizes total multiplicative Flow increase for the remaining edges.
754 if (Jump->IsUnlikely)
760 if (Jump->Flow > 0)
761 return BaseDistance + BaseDistance / Jump->Flow;
803 // zero-flow block
804 if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)
809 for (auto *Jump : SrcBlock->SuccJumps) {
810 if (Func.Blocks[Jump->Target].HasUnknownWeight) {
831 Queue.push(SrcBlock->Index);
832 Visited[SrcBlock->Index] = true;
842 uint64_t Dst = Jump->Target;
875 if (Block->SuccJumps.empty()) {
882 for (auto *Jump : Block->SuccJumps) {
886 // If there is a non-sink block in UnknownBlocks with all jumps ignored,
888 if (NumIgnoredJumps == Block->SuccJumps.size())
900 if (Jump->IsUnlikely && Jump->Flow == 0)
903 auto JumpSource = &Func.Blocks[Jump->Source];
904 auto JumpTarget = &Func.Blocks[Jump->Target];
911 if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
915 if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
925 // Extract local in-degrees in the considered subgraph
928 for (auto *Jump : Block->SuccJumps) {
931 LocalInDegree[Jump->Target]++;
939 if (LocalInDegree[SrcBlock->Index] > 0)
944 Queue.push(SrcBlock->Index);
953 if (Block->HasUnknownWeight && Block != SrcBlock)
956 // Add to the queue all successors with zero local in-degree
957 for (auto *Jump : Block->SuccJumps) {
960 uint64_t Dst = Jump->Target;
961 LocalInDegree[Dst]--;
981 assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
985 // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
986 for (auto *Jump : SrcBlock->SuccJumps) {
989 BlockFlow += Jump->Flow;
995 assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
998 for (auto *Jump : Block->PredJumps) {
999 BlockFlow += Jump->Flow;
1001 Block->Flow = BlockFlow;
1012 for (auto *Jump : Block->SuccJumps) {
1024 uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
1025 for (auto *Jump : Block->SuccJumps) {
1029 Jump->Flow = Flow;
1030 BlockFlow -= Flow;
1036 static constexpr uint64_t AnyExitBlock = uint64_t(-1);
1166 // Adjusting the fall-through branch
1171 // The cost is different for fall-through and non-fall-through branches
1178 assert(Jump.Weight > 0 && "found zero-weight jump with a positive weight");
1236 auto It = UniqueSuccs.insert(Jump->Target);
1248 "non-zero weight of a block w/o weight except for an entry");
1253 "non-zero weight of a jump w/o weight");
1294 // Run BFS from the source along edges with positive flow
1310 // Verify that every block that has a positive flow is reached from the source
1311 // along edges with a positive flow
1354 // Post-processing adjustments to the flow
1367 // Set the params from the command-line flags.