173471bf0Spatrick //===--- SyncDependenceAnalysis.cpp - Compute Control Divergence Effects --===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file implements an algorithm that returns for a divergent branch
1009467b48Spatrick // the set of basic blocks whose phi nodes become divergent due to divergent
1109467b48Spatrick // control. These are the blocks that are reachable by two disjoint paths from
1209467b48Spatrick // the branch or loop exits that have a reaching path that is disjoint from a
1309467b48Spatrick // path to the loop latch.
1409467b48Spatrick //
1509467b48Spatrick // The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
1609467b48Spatrick // control-induced divergence in phi nodes.
1709467b48Spatrick //
1809467b48Spatrick //
19*d415bd75Srobert // -- Reference --
20*d415bd75Srobert // The algorithm is presented in Section 5 of
21*d415bd75Srobert //
22*d415bd75Srobert // An abstract interpretation for SPMD divergence
23*d415bd75Srobert // on reducible control flow graphs.
24*d415bd75Srobert // Julian Rosemann, Simon Moll and Sebastian Hack
25*d415bd75Srobert // POPL '21
26*d415bd75Srobert //
2709467b48Spatrick //
2809467b48Spatrick // -- Sync dependence --
29*d415bd75Srobert // Sync dependence characterizes the control flow aspect of the
3009467b48Spatrick // propagation of branch divergence. For example,
3109467b48Spatrick //
3209467b48Spatrick // %cond = icmp slt i32 %tid, 10
3309467b48Spatrick // br i1 %cond, label %then, label %else
3409467b48Spatrick // then:
3509467b48Spatrick // br label %merge
3609467b48Spatrick // else:
3709467b48Spatrick // br label %merge
3809467b48Spatrick // merge:
3909467b48Spatrick // %a = phi i32 [ 0, %then ], [ 1, %else ]
4009467b48Spatrick //
4109467b48Spatrick // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
4209467b48Spatrick // because %tid is not on its use-def chains, %a is sync dependent on %tid
4309467b48Spatrick // because the branch "br i1 %cond" depends on %tid and affects which value %a
4409467b48Spatrick // is assigned to.
4509467b48Spatrick //
46*d415bd75Srobert //
4709467b48Spatrick // -- Reduction to SSA construction --
4809467b48Spatrick // There are two disjoint paths from A to X, if a certain variant of SSA
49*d415bd75Srobert // construction places a phi node in X under the following set-up scheme.
5009467b48Spatrick //
5109467b48Spatrick // This variant of SSA construction ignores incoming undef values.
5209467b48Spatrick // That is paths from the entry without a definition do not result in
5309467b48Spatrick // phi nodes.
5409467b48Spatrick //
5509467b48Spatrick // entry
5609467b48Spatrick // / \
5709467b48Spatrick // A \
5809467b48Spatrick // / \ Y
5909467b48Spatrick // B C /
6009467b48Spatrick // \ / \ /
6109467b48Spatrick // D E
6209467b48Spatrick // \ /
6309467b48Spatrick // F
64*d415bd75Srobert //
6509467b48Spatrick // Assume that A contains a divergent branch. We are interested
6609467b48Spatrick // in the set of all blocks where each block is reachable from A
6709467b48Spatrick // via two disjoint paths. This would be the set {D, F} in this
6809467b48Spatrick // case.
6909467b48Spatrick // To generally reduce this query to SSA construction we introduce
7009467b48Spatrick // a virtual variable x and assign to x different values in each
7109467b48Spatrick // successor block of A.
72*d415bd75Srobert //
7309467b48Spatrick // entry
7409467b48Spatrick // / \
7509467b48Spatrick // A \
7609467b48Spatrick // / \ Y
7709467b48Spatrick // x = 0 x = 1 /
7809467b48Spatrick // \ / \ /
7909467b48Spatrick // D E
8009467b48Spatrick // \ /
8109467b48Spatrick // F
82*d415bd75Srobert //
8309467b48Spatrick // Our flavor of SSA construction for x will construct the following
84*d415bd75Srobert //
8509467b48Spatrick // entry
8609467b48Spatrick // / \
8709467b48Spatrick // A \
8809467b48Spatrick // / \ Y
8909467b48Spatrick // x0 = 0 x1 = 1 /
9009467b48Spatrick // \ / \ /
9109467b48Spatrick // x2 = phi E
9209467b48Spatrick // \ /
9309467b48Spatrick // x3 = phi
94*d415bd75Srobert //
9509467b48Spatrick // The blocks D and F contain phi nodes and are thus each reachable
9609467b48Spatrick // by two disjoins paths from A.
9709467b48Spatrick //
9809467b48Spatrick // -- Remarks --
99*d415bd75Srobert // * In case of loop exits we need to check the disjoint path criterion for loops.
100*d415bd75Srobert // To this end, we check whether the definition of x differs between the
10109467b48Spatrick // loop exit and the loop header (_after_ SSA construction).
10209467b48Spatrick //
103*d415bd75Srobert // -- Known Limitations & Future Work --
104*d415bd75Srobert // * The algorithm requires reducible loops because the implementation
105*d415bd75Srobert // implicitly performs a single iteration of the underlying data flow analysis.
106*d415bd75Srobert // This was done for pragmatism, simplicity and speed.
107*d415bd75Srobert //
108*d415bd75Srobert // Relevant related work for extending the algorithm to irreducible control:
109*d415bd75Srobert // A simple algorithm for global data flow analysis problems.
110*d415bd75Srobert // Matthew S. Hecht and Jeffrey D. Ullman.
111*d415bd75Srobert // SIAM Journal on Computing, 4(4):519–532, December 1975.
112*d415bd75Srobert //
113*d415bd75Srobert // * Another reason for requiring reducible loops is that points of
114*d415bd75Srobert // synchronization in irreducible loops aren't 'obvious' - there is no unique
115*d415bd75Srobert // header where threads 'should' synchronize when entering or coming back
116*d415bd75Srobert // around from the latch.
117*d415bd75Srobert //
11809467b48Spatrick //===----------------------------------------------------------------------===//
119*d415bd75Srobert
12073471bf0Spatrick #include "llvm/Analysis/SyncDependenceAnalysis.h"
12109467b48Spatrick #include "llvm/ADT/SmallPtrSet.h"
122*d415bd75Srobert #include "llvm/Analysis/LoopInfo.h"
12309467b48Spatrick #include "llvm/IR/BasicBlock.h"
12409467b48Spatrick #include "llvm/IR/CFG.h"
12509467b48Spatrick #include "llvm/IR/Dominators.h"
12609467b48Spatrick #include "llvm/IR/Function.h"
12709467b48Spatrick
12873471bf0Spatrick #include <functional>
12909467b48Spatrick
13009467b48Spatrick #define DEBUG_TYPE "sync-dependence"
13109467b48Spatrick
13273471bf0Spatrick // The SDA algorithm operates on a modified CFG - we modify the edges leaving
13373471bf0Spatrick // loop headers as follows:
13473471bf0Spatrick //
13573471bf0Spatrick // * We remove all edges leaving all loop headers.
13673471bf0Spatrick // * We add additional edges from the loop headers to their exit blocks.
13773471bf0Spatrick //
13873471bf0Spatrick // The modification is virtual, that is whenever we visit a loop header we
13973471bf0Spatrick // pretend it had different successors.
14073471bf0Spatrick namespace {
14173471bf0Spatrick using namespace llvm;
14273471bf0Spatrick
14373471bf0Spatrick // Custom Post-Order Traveral
14473471bf0Spatrick //
14573471bf0Spatrick // We cannot use the vanilla (R)PO computation of LLVM because:
14673471bf0Spatrick // * We (virtually) modify the CFG.
147*d415bd75Srobert // * We want a loop-compact block enumeration, that is the numbers assigned to
148*d415bd75Srobert // blocks of a loop form an interval
149*d415bd75Srobert //
15073471bf0Spatrick using POCB = std::function<void(const BasicBlock &)>;
15173471bf0Spatrick using VisitedSet = std::set<const BasicBlock *>;
15273471bf0Spatrick using BlockStack = std::vector<const BasicBlock *>;
15373471bf0Spatrick
15473471bf0Spatrick // forward
15573471bf0Spatrick static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
15673471bf0Spatrick VisitedSet &Finalized);
15773471bf0Spatrick
15873471bf0Spatrick // for a nested region (top-level loop or nested loop)
computeStackPO(BlockStack & Stack,const LoopInfo & LI,Loop * Loop,POCB CallBack,VisitedSet & Finalized)15973471bf0Spatrick static void computeStackPO(BlockStack &Stack, const LoopInfo &LI, Loop *Loop,
16073471bf0Spatrick POCB CallBack, VisitedSet &Finalized) {
16173471bf0Spatrick const auto *LoopHeader = Loop ? Loop->getHeader() : nullptr;
16273471bf0Spatrick while (!Stack.empty()) {
16373471bf0Spatrick const auto *NextBB = Stack.back();
16473471bf0Spatrick
16573471bf0Spatrick auto *NestedLoop = LI.getLoopFor(NextBB);
16673471bf0Spatrick bool IsNestedLoop = NestedLoop != Loop;
16773471bf0Spatrick
16873471bf0Spatrick // Treat the loop as a node
16973471bf0Spatrick if (IsNestedLoop) {
17073471bf0Spatrick SmallVector<BasicBlock *, 3> NestedExits;
17173471bf0Spatrick NestedLoop->getUniqueExitBlocks(NestedExits);
17273471bf0Spatrick bool PushedNodes = false;
17373471bf0Spatrick for (const auto *NestedExitBB : NestedExits) {
17473471bf0Spatrick if (NestedExitBB == LoopHeader)
17573471bf0Spatrick continue;
17673471bf0Spatrick if (Loop && !Loop->contains(NestedExitBB))
17773471bf0Spatrick continue;
17873471bf0Spatrick if (Finalized.count(NestedExitBB))
17973471bf0Spatrick continue;
18073471bf0Spatrick PushedNodes = true;
18173471bf0Spatrick Stack.push_back(NestedExitBB);
18273471bf0Spatrick }
18373471bf0Spatrick if (!PushedNodes) {
18473471bf0Spatrick // All loop exits finalized -> finish this node
18573471bf0Spatrick Stack.pop_back();
18673471bf0Spatrick computeLoopPO(LI, *NestedLoop, CallBack, Finalized);
18773471bf0Spatrick }
18873471bf0Spatrick continue;
18973471bf0Spatrick }
19073471bf0Spatrick
19173471bf0Spatrick // DAG-style
19273471bf0Spatrick bool PushedNodes = false;
19373471bf0Spatrick for (const auto *SuccBB : successors(NextBB)) {
19473471bf0Spatrick if (SuccBB == LoopHeader)
19573471bf0Spatrick continue;
19673471bf0Spatrick if (Loop && !Loop->contains(SuccBB))
19773471bf0Spatrick continue;
19873471bf0Spatrick if (Finalized.count(SuccBB))
19973471bf0Spatrick continue;
20073471bf0Spatrick PushedNodes = true;
20173471bf0Spatrick Stack.push_back(SuccBB);
20273471bf0Spatrick }
20373471bf0Spatrick if (!PushedNodes) {
20473471bf0Spatrick // Never push nodes twice
20573471bf0Spatrick Stack.pop_back();
20673471bf0Spatrick if (!Finalized.insert(NextBB).second)
20773471bf0Spatrick continue;
20873471bf0Spatrick CallBack(*NextBB);
20973471bf0Spatrick }
21073471bf0Spatrick }
21173471bf0Spatrick }
21273471bf0Spatrick
computeTopLevelPO(Function & F,const LoopInfo & LI,POCB CallBack)21373471bf0Spatrick static void computeTopLevelPO(Function &F, const LoopInfo &LI, POCB CallBack) {
21473471bf0Spatrick VisitedSet Finalized;
21573471bf0Spatrick BlockStack Stack;
21673471bf0Spatrick Stack.reserve(24); // FIXME made-up number
21773471bf0Spatrick Stack.push_back(&F.getEntryBlock());
21873471bf0Spatrick computeStackPO(Stack, LI, nullptr, CallBack, Finalized);
21973471bf0Spatrick }
22073471bf0Spatrick
computeLoopPO(const LoopInfo & LI,Loop & Loop,POCB CallBack,VisitedSet & Finalized)22173471bf0Spatrick static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
22273471bf0Spatrick VisitedSet &Finalized) {
22373471bf0Spatrick /// Call CallBack on all loop blocks.
22473471bf0Spatrick std::vector<const BasicBlock *> Stack;
22573471bf0Spatrick const auto *LoopHeader = Loop.getHeader();
22673471bf0Spatrick
22773471bf0Spatrick // Visit the header last
22873471bf0Spatrick Finalized.insert(LoopHeader);
22973471bf0Spatrick CallBack(*LoopHeader);
23073471bf0Spatrick
23173471bf0Spatrick // Initialize with immediate successors
23273471bf0Spatrick for (const auto *BB : successors(LoopHeader)) {
23373471bf0Spatrick if (!Loop.contains(BB))
23473471bf0Spatrick continue;
23573471bf0Spatrick if (BB == LoopHeader)
23673471bf0Spatrick continue;
23773471bf0Spatrick Stack.push_back(BB);
23873471bf0Spatrick }
23973471bf0Spatrick
24073471bf0Spatrick // Compute PO inside region
24173471bf0Spatrick computeStackPO(Stack, LI, &Loop, CallBack, Finalized);
24273471bf0Spatrick }
24373471bf0Spatrick
24473471bf0Spatrick } // namespace
24573471bf0Spatrick
24609467b48Spatrick namespace llvm {
24709467b48Spatrick
24873471bf0Spatrick ControlDivergenceDesc SyncDependenceAnalysis::EmptyDivergenceDesc;
24909467b48Spatrick
SyncDependenceAnalysis(const DominatorTree & DT,const PostDominatorTree & PDT,const LoopInfo & LI)25009467b48Spatrick SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
25109467b48Spatrick const PostDominatorTree &PDT,
25209467b48Spatrick const LoopInfo &LI)
25373471bf0Spatrick : DT(DT), PDT(PDT), LI(LI) {
25473471bf0Spatrick computeTopLevelPO(*DT.getRoot()->getParent(), LI,
25573471bf0Spatrick [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); });
25673471bf0Spatrick }
25709467b48Spatrick
258*d415bd75Srobert SyncDependenceAnalysis::~SyncDependenceAnalysis() = default;
25909467b48Spatrick
260*d415bd75Srobert namespace {
26109467b48Spatrick // divergence propagator for reducible CFGs
26209467b48Spatrick struct DivergencePropagator {
26373471bf0Spatrick const ModifiedPO &LoopPOT;
26409467b48Spatrick const DominatorTree &DT;
26509467b48Spatrick const PostDominatorTree &PDT;
26609467b48Spatrick const LoopInfo &LI;
26773471bf0Spatrick const BasicBlock &DivTermBlock;
26809467b48Spatrick
26973471bf0Spatrick // * if BlockLabels[IndexOf(B)] == C then C is the dominating definition at
27073471bf0Spatrick // block B
27173471bf0Spatrick // * if BlockLabels[IndexOf(B)] ~ undef then we haven't seen B yet
27273471bf0Spatrick // * if BlockLabels[IndexOf(B)] == B then B is a join point of disjoint paths
27373471bf0Spatrick // from X or B is an immediate successor of X (initial value).
27473471bf0Spatrick using BlockLabelVec = std::vector<const BasicBlock *>;
27573471bf0Spatrick BlockLabelVec BlockLabels;
27673471bf0Spatrick // divergent join and loop exit descriptor.
27773471bf0Spatrick std::unique_ptr<ControlDivergenceDesc> DivDesc;
27809467b48Spatrick
DivergencePropagatorllvm::__anon3f3a50660311::DivergencePropagator27973471bf0Spatrick DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT,
28073471bf0Spatrick const PostDominatorTree &PDT, const LoopInfo &LI,
28173471bf0Spatrick const BasicBlock &DivTermBlock)
28273471bf0Spatrick : LoopPOT(LoopPOT), DT(DT), PDT(PDT), LI(LI), DivTermBlock(DivTermBlock),
28373471bf0Spatrick BlockLabels(LoopPOT.size(), nullptr),
28473471bf0Spatrick DivDesc(new ControlDivergenceDesc) {}
28509467b48Spatrick
printDefsllvm::__anon3f3a50660311::DivergencePropagator28609467b48Spatrick void printDefs(raw_ostream &Out) {
28773471bf0Spatrick Out << "Propagator::BlockLabels {\n";
28873471bf0Spatrick for (int BlockIdx = (int)BlockLabels.size() - 1; BlockIdx > 0; --BlockIdx) {
28973471bf0Spatrick const auto *Label = BlockLabels[BlockIdx];
29073471bf0Spatrick Out << LoopPOT.getBlockAt(BlockIdx)->getName().str() << "(" << BlockIdx
29173471bf0Spatrick << ") : ";
29273471bf0Spatrick if (!Label) {
29373471bf0Spatrick Out << "<null>\n";
29409467b48Spatrick } else {
29573471bf0Spatrick Out << Label->getName() << "\n";
29609467b48Spatrick }
29709467b48Spatrick }
29809467b48Spatrick Out << "}\n";
29909467b48Spatrick }
30009467b48Spatrick
30173471bf0Spatrick // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
30273471bf0Spatrick // causes a divergent join.
computeJoinllvm::__anon3f3a50660311::DivergencePropagator30373471bf0Spatrick bool computeJoin(const BasicBlock &SuccBlock, const BasicBlock &PushedLabel) {
30473471bf0Spatrick auto SuccIdx = LoopPOT.getIndexOf(SuccBlock);
30509467b48Spatrick
30673471bf0Spatrick // unset or same reaching label
30773471bf0Spatrick const auto *OldLabel = BlockLabels[SuccIdx];
30873471bf0Spatrick if (!OldLabel || (OldLabel == &PushedLabel)) {
30973471bf0Spatrick BlockLabels[SuccIdx] = &PushedLabel;
31073471bf0Spatrick return false;
31109467b48Spatrick }
31209467b48Spatrick
31373471bf0Spatrick // Update the definition
31473471bf0Spatrick BlockLabels[SuccIdx] = &SuccBlock;
31573471bf0Spatrick return true;
31609467b48Spatrick }
31709467b48Spatrick
31873471bf0Spatrick // visiting a virtual loop exit edge from the loop header --> temporal
31973471bf0Spatrick // divergence on join
visitLoopExitEdgellvm::__anon3f3a50660311::DivergencePropagator32073471bf0Spatrick bool visitLoopExitEdge(const BasicBlock &ExitBlock,
32173471bf0Spatrick const BasicBlock &DefBlock, bool FromParentLoop) {
32273471bf0Spatrick // Pushing from a non-parent loop cannot cause temporal divergence.
32373471bf0Spatrick if (!FromParentLoop)
32473471bf0Spatrick return visitEdge(ExitBlock, DefBlock);
32509467b48Spatrick
32673471bf0Spatrick if (!computeJoin(ExitBlock, DefBlock))
32773471bf0Spatrick return false;
32873471bf0Spatrick
32973471bf0Spatrick // Identified a divergent loop exit
33073471bf0Spatrick DivDesc->LoopDivBlocks.insert(&ExitBlock);
33173471bf0Spatrick LLVM_DEBUG(dbgs() << "\tDivergent loop exit: " << ExitBlock.getName()
33273471bf0Spatrick << "\n");
33373471bf0Spatrick return true;
33409467b48Spatrick }
33509467b48Spatrick
33673471bf0Spatrick // process \p SuccBlock with reaching definition \p DefBlock
visitEdgellvm::__anon3f3a50660311::DivergencePropagator33773471bf0Spatrick bool visitEdge(const BasicBlock &SuccBlock, const BasicBlock &DefBlock) {
33873471bf0Spatrick if (!computeJoin(SuccBlock, DefBlock))
33973471bf0Spatrick return false;
34009467b48Spatrick
34173471bf0Spatrick // Divergent, disjoint paths join.
34273471bf0Spatrick DivDesc->JoinDivBlocks.insert(&SuccBlock);
34373471bf0Spatrick LLVM_DEBUG(dbgs() << "\tDivergent join: " << SuccBlock.getName());
34473471bf0Spatrick return true;
34573471bf0Spatrick }
34673471bf0Spatrick
computeJoinPointsllvm::__anon3f3a50660311::DivergencePropagator34773471bf0Spatrick std::unique_ptr<ControlDivergenceDesc> computeJoinPoints() {
34873471bf0Spatrick assert(DivDesc);
34973471bf0Spatrick
35073471bf0Spatrick LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: " << DivTermBlock.getName()
35173471bf0Spatrick << "\n");
35273471bf0Spatrick
35373471bf0Spatrick const auto *DivBlockLoop = LI.getLoopFor(&DivTermBlock);
35473471bf0Spatrick
35573471bf0Spatrick // Early stopping criterion
35673471bf0Spatrick int FloorIdx = LoopPOT.size() - 1;
35773471bf0Spatrick const BasicBlock *FloorLabel = nullptr;
35809467b48Spatrick
35909467b48Spatrick // bootstrap with branch targets
36073471bf0Spatrick int BlockIdx = 0;
36109467b48Spatrick
36273471bf0Spatrick for (const auto *SuccBlock : successors(&DivTermBlock)) {
36373471bf0Spatrick auto SuccIdx = LoopPOT.getIndexOf(*SuccBlock);
36473471bf0Spatrick BlockLabels[SuccIdx] = SuccBlock;
36509467b48Spatrick
36673471bf0Spatrick // Find the successor with the highest index to start with
36773471bf0Spatrick BlockIdx = std::max<int>(BlockIdx, SuccIdx);
36873471bf0Spatrick FloorIdx = std::min<int>(FloorIdx, SuccIdx);
36909467b48Spatrick
37073471bf0Spatrick // Identify immediate divergent loop exits
37173471bf0Spatrick if (!DivBlockLoop)
37273471bf0Spatrick continue;
37309467b48Spatrick
37473471bf0Spatrick const auto *BlockLoop = LI.getLoopFor(SuccBlock);
37573471bf0Spatrick if (BlockLoop && DivBlockLoop->contains(BlockLoop))
37673471bf0Spatrick continue;
37773471bf0Spatrick DivDesc->LoopDivBlocks.insert(SuccBlock);
37873471bf0Spatrick LLVM_DEBUG(dbgs() << "\tImmediate divergent loop exit: "
37973471bf0Spatrick << SuccBlock->getName() << "\n");
380097a140dSpatrick }
38109467b48Spatrick
38209467b48Spatrick // propagate definitions at the immediate successors of the node in RPO
38373471bf0Spatrick for (; BlockIdx >= FloorIdx; --BlockIdx) {
38473471bf0Spatrick LLVM_DEBUG(dbgs() << "Before next visit:\n"; printDefs(dbgs()));
38573471bf0Spatrick
38673471bf0Spatrick // Any label available here
38773471bf0Spatrick const auto *Label = BlockLabels[BlockIdx];
38873471bf0Spatrick if (!Label)
38973471bf0Spatrick continue;
39073471bf0Spatrick
39173471bf0Spatrick // Ok. Get the block
39273471bf0Spatrick const auto *Block = LoopPOT.getBlockAt(BlockIdx);
39309467b48Spatrick LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
39409467b48Spatrick
39509467b48Spatrick auto *BlockLoop = LI.getLoopFor(Block);
39673471bf0Spatrick bool IsLoopHeader = BlockLoop && BlockLoop->getHeader() == Block;
39773471bf0Spatrick bool CausedJoin = false;
39873471bf0Spatrick int LoweredFloorIdx = FloorIdx;
39973471bf0Spatrick if (IsLoopHeader) {
40073471bf0Spatrick // Disconnect from immediate successors and propagate directly to loop
40173471bf0Spatrick // exits.
40209467b48Spatrick SmallVector<BasicBlock *, 4> BlockLoopExits;
40309467b48Spatrick BlockLoop->getExitBlocks(BlockLoopExits);
40473471bf0Spatrick
40573471bf0Spatrick bool IsParentLoop = BlockLoop->contains(&DivTermBlock);
40609467b48Spatrick for (const auto *BlockLoopExit : BlockLoopExits) {
40773471bf0Spatrick CausedJoin |= visitLoopExitEdge(*BlockLoopExit, *Label, IsParentLoop);
40873471bf0Spatrick LoweredFloorIdx = std::min<int>(LoweredFloorIdx,
40973471bf0Spatrick LoopPOT.getIndexOf(*BlockLoopExit));
41073471bf0Spatrick }
41173471bf0Spatrick } else {
41273471bf0Spatrick // Acyclic successor case
41373471bf0Spatrick for (const auto *SuccBlock : successors(Block)) {
41473471bf0Spatrick CausedJoin |= visitEdge(*SuccBlock, *Label);
41573471bf0Spatrick LoweredFloorIdx =
41673471bf0Spatrick std::min<int>(LoweredFloorIdx, LoopPOT.getIndexOf(*SuccBlock));
41773471bf0Spatrick }
41809467b48Spatrick }
41909467b48Spatrick
42073471bf0Spatrick // Floor update
42173471bf0Spatrick if (CausedJoin) {
42273471bf0Spatrick // 1. Different labels pushed to successors
42373471bf0Spatrick FloorIdx = LoweredFloorIdx;
42473471bf0Spatrick } else if (FloorLabel != Label) {
42573471bf0Spatrick // 2. No join caused BUT we pushed a label that is different than the
42673471bf0Spatrick // last pushed label
42773471bf0Spatrick FloorIdx = LoweredFloorIdx;
42873471bf0Spatrick FloorLabel = Label;
42909467b48Spatrick }
43009467b48Spatrick }
43109467b48Spatrick
43209467b48Spatrick LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
43309467b48Spatrick
43473471bf0Spatrick return std::move(DivDesc);
43509467b48Spatrick }
43609467b48Spatrick };
437*d415bd75Srobert } // end anonymous namespace
43809467b48Spatrick
43973471bf0Spatrick #ifndef NDEBUG
printBlockSet(ConstBlockSet & Blocks,raw_ostream & Out)44073471bf0Spatrick static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) {
44173471bf0Spatrick Out << "[";
44273471bf0Spatrick ListSeparator LS;
44373471bf0Spatrick for (const auto *BB : Blocks)
44473471bf0Spatrick Out << LS << BB->getName();
44573471bf0Spatrick Out << "]";
44609467b48Spatrick }
44773471bf0Spatrick #endif
44809467b48Spatrick
44973471bf0Spatrick const ControlDivergenceDesc &
getJoinBlocks(const Instruction & Term)45073471bf0Spatrick SyncDependenceAnalysis::getJoinBlocks(const Instruction &Term) {
45109467b48Spatrick // trivial case
45273471bf0Spatrick if (Term.getNumSuccessors() <= 1) {
45373471bf0Spatrick return EmptyDivergenceDesc;
45409467b48Spatrick }
45509467b48Spatrick
45609467b48Spatrick // already available in cache?
45773471bf0Spatrick auto ItCached = CachedControlDivDescs.find(&Term);
45873471bf0Spatrick if (ItCached != CachedControlDivDescs.end())
45909467b48Spatrick return *ItCached->second;
46009467b48Spatrick
46109467b48Spatrick // compute all join points
46273471bf0Spatrick // Special handling of divergent loop exits is not needed for LCSSA
46309467b48Spatrick const auto &TermBlock = *Term.getParent();
46473471bf0Spatrick DivergencePropagator Propagator(LoopPO, DT, PDT, LI, TermBlock);
46573471bf0Spatrick auto DivDesc = Propagator.computeJoinPoints();
46609467b48Spatrick
46773471bf0Spatrick LLVM_DEBUG(dbgs() << "Result (" << Term.getParent()->getName() << "):\n";
46873471bf0Spatrick dbgs() << "JoinDivBlocks: ";
46973471bf0Spatrick printBlockSet(DivDesc->JoinDivBlocks, dbgs());
47073471bf0Spatrick dbgs() << "\nLoopDivBlocks: ";
47173471bf0Spatrick printBlockSet(DivDesc->LoopDivBlocks, dbgs()); dbgs() << "\n";);
47273471bf0Spatrick
47373471bf0Spatrick auto ItInserted = CachedControlDivDescs.emplace(&Term, std::move(DivDesc));
47409467b48Spatrick assert(ItInserted.second);
47509467b48Spatrick return *ItInserted.first->second;
47609467b48Spatrick }
47709467b48Spatrick
47809467b48Spatrick } // namespace llvm
479