xref: /llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp (revision 986529634368d99a3b469033d84e45a62d53eec5)
1 //===- StructurizeCFG.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/EquivalenceClasses.h"
12 #include "llvm/ADT/MapVector.h"
13 #include "llvm/ADT/SCCIterator.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/InstructionSimplify.h"
19 #include "llvm/Analysis/RegionInfo.h"
20 #include "llvm/Analysis/RegionIterator.h"
21 #include "llvm/Analysis/RegionPass.h"
22 #include "llvm/Analysis/UniformityAnalysis.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/PassManager.h"
33 #include "llvm/IR/PatternMatch.h"
34 #include "llvm/IR/ProfDataUtils.h"
35 #include "llvm/IR/Type.h"
36 #include "llvm/IR/Use.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/IR/ValueHandle.h"
39 #include "llvm/InitializePasses.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include "llvm/Transforms/Scalar.h"
46 #include "llvm/Transforms/Utils.h"
47 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
48 #include "llvm/Transforms/Utils/Local.h"
49 #include "llvm/Transforms/Utils/SSAUpdater.h"
50 #include <cassert>
51 #include <utility>
52 
53 using namespace llvm;
54 using namespace llvm::PatternMatch;
55 
56 #define DEBUG_TYPE "structurizecfg"
57 
58 // The name for newly created blocks.
59 const char FlowBlockName[] = "Flow";
60 
61 namespace {
62 
63 static cl::opt<bool> ForceSkipUniformRegions(
64   "structurizecfg-skip-uniform-regions",
65   cl::Hidden,
66   cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
67   cl::init(false));
68 
69 static cl::opt<bool>
70     RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
71                           cl::desc("Allow relaxed uniform region checks"),
72                           cl::init(true));
73 
74 // Definition of the complex types used in this pass.
75 
76 using BBValuePair = std::pair<BasicBlock *, Value *>;
77 
78 using RNVector = SmallVector<RegionNode *, 8>;
79 using BBVector = SmallVector<BasicBlock *, 8>;
80 using BranchVector = SmallVector<BranchInst *, 8>;
81 using BBValueVector = SmallVector<BBValuePair, 2>;
82 
83 using BBSet = SmallPtrSet<BasicBlock *, 8>;
84 
85 using PhiMap = MapVector<PHINode *, BBValueVector>;
86 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
87 
88 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
89 
90 using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
91 
92 class CondBranchWeights {
93   uint32_t TrueWeight;
94   uint32_t FalseWeight;
95 
96   CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
97 
98 public:
99   static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
100     assert(Br.isConditional());
101 
102     uint64_t T, F;
103     if (!extractBranchWeights(Br, T, F))
104       return std::nullopt;
105 
106     return CondBranchWeights(T, F);
107   }
108 
109   static void setMetadata(BranchInst &Br,
110                           const MaybeCondBranchWeights &Weights) {
111     assert(Br.isConditional());
112     if (!Weights)
113       return;
114     uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
115     setBranchWeights(Br, Arr, false);
116   }
117 
118   CondBranchWeights invert() const {
119     return CondBranchWeights{FalseWeight, TrueWeight};
120   }
121 };
122 
123 struct PredInfo {
124   Value *Pred;
125   MaybeCondBranchWeights Weights;
126 };
127 
128 using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
129 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
130 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
131 
132 using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>;
133 
134 // A traits type that is intended to be used in graph algorithms. The graph
135 // traits starts at an entry node, and traverses the RegionNodes that are in
136 // the Nodes set.
137 struct SubGraphTraits {
138   using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
139   using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
140 
141   // This wraps a set of Nodes into the iterator, so we know which edges to
142   // filter out.
143   class WrappedSuccIterator
144       : public iterator_adaptor_base<
145             WrappedSuccIterator, BaseSuccIterator,
146             typename std::iterator_traits<BaseSuccIterator>::iterator_category,
147             NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
148     SmallDenseSet<RegionNode *> *Nodes;
149 
150   public:
151     WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
152         : iterator_adaptor_base(It), Nodes(Nodes) {}
153 
154     NodeRef operator*() const { return {*I, Nodes}; }
155   };
156 
157   static bool filterAll(const NodeRef &N) { return true; }
158   static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
159 
160   using ChildIteratorType =
161       filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
162 
163   static NodeRef getEntryNode(Region *R) {
164     return {GraphTraits<Region *>::getEntryNode(R), nullptr};
165   }
166 
167   static NodeRef getEntryNode(NodeRef N) { return N; }
168 
169   static iterator_range<ChildIteratorType> children(const NodeRef &N) {
170     auto *filter = N.second ? &filterSet : &filterAll;
171     return make_filter_range(
172         make_range<WrappedSuccIterator>(
173             {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
174             {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
175         filter);
176   }
177 
178   static ChildIteratorType child_begin(const NodeRef &N) {
179     return children(N).begin();
180   }
181 
182   static ChildIteratorType child_end(const NodeRef &N) {
183     return children(N).end();
184   }
185 };
186 
187 /// Finds the nearest common dominator of a set of BasicBlocks.
188 ///
189 /// For every BB you add to the set, you can specify whether we "remember" the
190 /// block.  When you get the common dominator, you can also ask whether it's one
191 /// of the blocks we remembered.
192 class NearestCommonDominator {
193   DominatorTree *DT;
194   BasicBlock *Result = nullptr;
195   bool ResultIsRemembered = false;
196 
197   /// Add BB to the resulting dominator.
198   void addBlock(BasicBlock *BB, bool Remember) {
199     if (!Result) {
200       Result = BB;
201       ResultIsRemembered = Remember;
202       return;
203     }
204 
205     BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
206     if (NewResult != Result)
207       ResultIsRemembered = false;
208     if (NewResult == BB)
209       ResultIsRemembered |= Remember;
210     Result = NewResult;
211   }
212 
213 public:
214   explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
215 
216   void addBlock(BasicBlock *BB) {
217     addBlock(BB, /* Remember = */ false);
218   }
219 
220   void addAndRememberBlock(BasicBlock *BB) {
221     addBlock(BB, /* Remember = */ true);
222   }
223 
224   /// Get the nearest common dominator of all the BBs added via addBlock() and
225   /// addAndRememberBlock().
226   BasicBlock *result() { return Result; }
227 
228   /// Is the BB returned by getResult() one of the blocks we added to the set
229   /// with addAndRememberBlock()?
230   bool resultIsRememberedBlock() { return ResultIsRemembered; }
231 };
232 
233 /// Transforms the control flow graph on one single entry/exit region
234 /// at a time.
235 ///
236 /// After the transform all "If"/"Then"/"Else" style control flow looks like
237 /// this:
238 ///
239 /// \verbatim
240 /// 1
241 /// ||
242 /// | |
243 /// 2 |
244 /// | /
245 /// |/
246 /// 3
247 /// ||   Where:
248 /// | |  1 = "If" block, calculates the condition
249 /// 4 |  2 = "Then" subregion, runs if the condition is true
250 /// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
251 /// |/   4 = "Else" optional subregion, runs if the condition is false
252 /// 5    5 = "End" block, also rejoins the control flow
253 /// \endverbatim
254 ///
255 /// Control flow is expressed as a branch where the true exit goes into the
256 /// "Then"/"Else" region, while the false exit skips the region
257 /// The condition for the optional "Else" region is expressed as a PHI node.
258 /// The incoming values of the PHI node are true for the "If" edge and false
259 /// for the "Then" edge.
260 ///
261 /// Additionally to that even complicated loops look like this:
262 ///
263 /// \verbatim
264 /// 1
265 /// ||
266 /// | |
267 /// 2 ^  Where:
268 /// | /  1 = "Entry" block
269 /// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
270 /// 3    3 = "Flow" block, with back edge to entry block
271 /// |
272 /// \endverbatim
273 ///
274 /// The back edge of the "Flow" block is always on the false side of the branch
275 /// while the true side continues the general flow. So the loop condition
276 /// consist of a network of PHI nodes where the true incoming values expresses
277 /// breaks and the false values expresses continue states.
278 
279 class StructurizeCFG {
280   Type *Boolean;
281   ConstantInt *BoolTrue;
282   ConstantInt *BoolFalse;
283   Value *BoolPoison;
284 
285   Function *Func;
286   Region *ParentRegion;
287 
288   UniformityInfo *UA = nullptr;
289   DominatorTree *DT;
290 
291   SmallVector<RegionNode *, 8> Order;
292   BBSet Visited;
293   BBSet FlowSet;
294 
295   SmallVector<WeakVH, 8> AffectedPhis;
296   BBPhiMap DeletedPhis;
297   BB2BBVecMap AddedPhis;
298 
299   PredMap Predicates;
300   BranchVector Conditions;
301 
302   BB2BBMap Loops;
303   PredMap LoopPreds;
304   BranchVector LoopConds;
305 
306   BranchDebugLocMap TermDL;
307 
308   RegionNode *PrevNode;
309 
310   void orderNodes();
311 
312   void analyzeLoops(RegionNode *N);
313 
314   PredInfo buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
315 
316   void gatherPredicates(RegionNode *N);
317 
318   void collectInfos();
319 
320   void insertConditions(bool Loops);
321 
322   void simplifyConditions();
323 
324   void delPhiValues(BasicBlock *From, BasicBlock *To);
325 
326   void addPhiValues(BasicBlock *From, BasicBlock *To);
327 
328   void findUndefBlocks(BasicBlock *PHIBlock,
329                        const SmallSet<BasicBlock *, 8> &Incomings,
330                        SmallVector<BasicBlock *> &UndefBlks) const;
331 
332   void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A,
333                          PHINode *B);
334 
335   void setPhiValues();
336 
337   void simplifyAffectedPhis();
338 
339   void killTerminator(BasicBlock *BB);
340 
341   void changeExit(RegionNode *Node, BasicBlock *NewExit,
342                   bool IncludeDominator);
343 
344   BasicBlock *getNextFlow(BasicBlock *Dominator);
345 
346   BasicBlock *needPrefix(bool NeedEmpty);
347 
348   BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
349 
350   void setPrevNode(BasicBlock *BB);
351 
352   bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
353 
354   bool isPredictableTrue(RegionNode *Node);
355 
356   void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
357 
358   void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
359 
360   void createFlow();
361 
362   void rebuildSSA();
363 
364 public:
365   void init(Region *R);
366   bool run(Region *R, DominatorTree *DT);
367   bool makeUniformRegion(Region *R, UniformityInfo &UA);
368 };
369 
370 class StructurizeCFGLegacyPass : public RegionPass {
371   bool SkipUniformRegions;
372 
373 public:
374   static char ID;
375 
376   explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
377       : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
378     if (ForceSkipUniformRegions.getNumOccurrences())
379       SkipUniformRegions = ForceSkipUniformRegions.getValue();
380     initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
381   }
382 
383   bool runOnRegion(Region *R, RGPassManager &RGM) override {
384     StructurizeCFG SCFG;
385     SCFG.init(R);
386     if (SkipUniformRegions) {
387       UniformityInfo &UA =
388           getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
389       if (SCFG.makeUniformRegion(R, UA))
390         return false;
391     }
392     DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
393     return SCFG.run(R, DT);
394   }
395 
396   StringRef getPassName() const override { return "Structurize control flow"; }
397 
398   void getAnalysisUsage(AnalysisUsage &AU) const override {
399     if (SkipUniformRegions)
400       AU.addRequired<UniformityInfoWrapperPass>();
401     AU.addRequired<DominatorTreeWrapperPass>();
402 
403     AU.addPreserved<DominatorTreeWrapperPass>();
404     RegionPass::getAnalysisUsage(AU);
405   }
406 };
407 
408 } // end anonymous namespace
409 
410 char StructurizeCFGLegacyPass::ID = 0;
411 
412 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
413                       "Structurize the CFG", false, false)
414 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
415 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
416 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
417 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
418                     "Structurize the CFG", false, false)
419 
420 /// Build up the general order of nodes, by performing a topological sort of the
421 /// parent region's nodes, while ensuring that there is no outer cycle node
422 /// between any two inner cycle nodes.
423 void StructurizeCFG::orderNodes() {
424   Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
425                              GraphTraits<Region *>::nodes_end(ParentRegion)));
426   if (Order.empty())
427     return;
428 
429   SmallDenseSet<RegionNode *> Nodes;
430   auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
431 
432   // A list of range indices of SCCs in Order, to be processed.
433   SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
434   unsigned I = 0, E = Order.size();
435   while (true) {
436     // Run through all the SCCs in the subgraph starting with Entry.
437     for (auto SCCI =
438              scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
439                  EntryNode);
440          !SCCI.isAtEnd(); ++SCCI) {
441       auto &SCC = *SCCI;
442 
443       // An SCC up to the size of 2, can be reduced to an entry (the last node),
444       // and a possible additional node. Therefore, it is already in order, and
445       // there is no need to add it to the work-list.
446       unsigned Size = SCC.size();
447       if (Size > 2)
448         WorkList.emplace_back(I, I + Size);
449 
450       // Add the SCC nodes to the Order array.
451       for (const auto &N : SCC) {
452         assert(I < E && "SCC size mismatch!");
453         Order[I++] = N.first;
454       }
455     }
456     assert(I == E && "SCC size mismatch!");
457 
458     // If there are no more SCCs to order, then we are done.
459     if (WorkList.empty())
460       break;
461 
462     std::tie(I, E) = WorkList.pop_back_val();
463 
464     // Collect the set of nodes in the SCC's subgraph. These are only the
465     // possible child nodes; we do not add the entry (last node) otherwise we
466     // will have the same exact SCC all over again.
467     Nodes.clear();
468     Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
469 
470     // Update the entry node.
471     EntryNode.first = Order[E - 1];
472     EntryNode.second = &Nodes;
473   }
474 }
475 
476 /// Determine the end of the loops
477 void StructurizeCFG::analyzeLoops(RegionNode *N) {
478   if (N->isSubRegion()) {
479     // Test for exit as back edge
480     BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
481     if (Visited.count(Exit))
482       Loops[Exit] = N->getEntry();
483 
484   } else {
485     // Test for successors as back edge
486     BasicBlock *BB = N->getNodeAs<BasicBlock>();
487     BranchInst *Term = cast<BranchInst>(BB->getTerminator());
488 
489     for (BasicBlock *Succ : Term->successors())
490       if (Visited.count(Succ))
491         Loops[Succ] = BB;
492   }
493 }
494 
495 /// Build the condition for one edge
496 PredInfo StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
497                                         bool Invert) {
498   Value *Cond = Invert ? BoolFalse : BoolTrue;
499   MaybeCondBranchWeights Weights;
500 
501   if (Term->isConditional()) {
502     Cond = Term->getCondition();
503     Weights = CondBranchWeights::tryParse(*Term);
504 
505     if (Idx != (unsigned)Invert) {
506       Cond = invertCondition(Cond);
507       if (Weights)
508         Weights = Weights->invert();
509     }
510   }
511   return {Cond, Weights};
512 }
513 
514 /// Analyze the predecessors of each block and build up predicates
515 void StructurizeCFG::gatherPredicates(RegionNode *N) {
516   RegionInfo *RI = ParentRegion->getRegionInfo();
517   BasicBlock *BB = N->getEntry();
518   BBPredicates &Pred = Predicates[BB];
519   BBPredicates &LPred = LoopPreds[BB];
520 
521   for (BasicBlock *P : predecessors(BB)) {
522     // Ignore it if it's a branch from outside into our region entry
523     if (!ParentRegion->contains(P))
524       continue;
525 
526     Region *R = RI->getRegionFor(P);
527     if (R == ParentRegion) {
528       // It's a top level block in our region
529       BranchInst *Term = cast<BranchInst>(P->getTerminator());
530       for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
531         BasicBlock *Succ = Term->getSuccessor(i);
532         if (Succ != BB)
533           continue;
534 
535         if (Visited.count(P)) {
536           // Normal forward edge
537           if (Term->isConditional()) {
538             // Try to treat it like an ELSE block
539             BasicBlock *Other = Term->getSuccessor(!i);
540             if (Visited.count(Other) && !Loops.count(Other) &&
541                 !Pred.count(Other) && !Pred.count(P)) {
542 
543               Pred[Other] = {BoolFalse, std::nullopt};
544               Pred[P] = {BoolTrue, std::nullopt};
545               continue;
546             }
547           }
548           Pred[P] = buildCondition(Term, i, false);
549         } else {
550           // Back edge
551           LPred[P] = buildCondition(Term, i, true);
552         }
553       }
554     } else {
555       // It's an exit from a sub region
556       while (R->getParent() != ParentRegion)
557         R = R->getParent();
558 
559       // Edge from inside a subregion to its entry, ignore it
560       if (*R == *N)
561         continue;
562 
563       BasicBlock *Entry = R->getEntry();
564       if (Visited.count(Entry))
565         Pred[Entry] = {BoolTrue, std::nullopt};
566       else
567         LPred[Entry] = {BoolFalse, std::nullopt};
568     }
569   }
570 }
571 
572 /// Collect various loop and predicate infos
573 void StructurizeCFG::collectInfos() {
574   // Reset predicate
575   Predicates.clear();
576 
577   // and loop infos
578   Loops.clear();
579   LoopPreds.clear();
580 
581   // Reset the visited nodes
582   Visited.clear();
583 
584   for (RegionNode *RN : reverse(Order)) {
585     LLVM_DEBUG(dbgs() << "Visiting: "
586                       << (RN->isSubRegion() ? "SubRegion with entry: " : "")
587                       << RN->getEntry()->getName() << "\n");
588 
589     // Analyze all the conditions leading to a node
590     gatherPredicates(RN);
591 
592     // Remember that we've seen this node
593     Visited.insert(RN->getEntry());
594 
595     // Find the last back edges
596     analyzeLoops(RN);
597   }
598 
599   // Reset the collected term debug locations
600   TermDL.clear();
601 
602   for (BasicBlock &BB : *Func) {
603     if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc())
604       TermDL[&BB] = DL;
605   }
606 }
607 
608 /// Insert the missing branch conditions
609 void StructurizeCFG::insertConditions(bool Loops) {
610   BranchVector &Conds = Loops ? LoopConds : Conditions;
611   Value *Default = Loops ? BoolTrue : BoolFalse;
612   SSAUpdater PhiInserter;
613 
614   for (BranchInst *Term : Conds) {
615     assert(Term->isConditional());
616 
617     BasicBlock *Parent = Term->getParent();
618     BasicBlock *SuccTrue = Term->getSuccessor(0);
619     BasicBlock *SuccFalse = Term->getSuccessor(1);
620 
621     BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
622 
623     if (Preds.size() == 1 && Preds.begin()->first == Parent) {
624       auto &PI = Preds.begin()->second;
625       Term->setCondition(PI.Pred);
626       CondBranchWeights::setMetadata(*Term, PI.Weights);
627     } else {
628       PhiInserter.Initialize(Boolean, "");
629       PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
630 
631       NearestCommonDominator Dominator(DT);
632       Dominator.addBlock(Parent);
633 
634       for (auto [BB, PI] : Preds) {
635         assert(BB != Parent);
636         PhiInserter.AddAvailableValue(BB, PI.Pred);
637         Dominator.addAndRememberBlock(BB);
638       }
639 
640       if (!Dominator.resultIsRememberedBlock())
641         PhiInserter.AddAvailableValue(Dominator.result(), Default);
642 
643       Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
644     }
645   }
646 }
647 
648 /// Simplify any inverted conditions that were built by buildConditions.
649 void StructurizeCFG::simplifyConditions() {
650   SmallVector<Instruction *> InstToErase;
651   for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
652     auto &Preds = I.second;
653     for (auto [BB, PI] : Preds) {
654       Instruction *Inverted;
655       if (match(PI.Pred, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
656           !PI.Pred->use_empty()) {
657         if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
658           InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
659           PI.Pred->replaceAllUsesWith(InvertedCmp);
660           InstToErase.push_back(cast<Instruction>(PI.Pred));
661         }
662       }
663     }
664   }
665   for (auto *I : InstToErase)
666     I->eraseFromParent();
667 }
668 
669 /// Remove all PHI values coming from "From" into "To" and remember
670 /// them in DeletedPhis
671 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
672   PhiMap &Map = DeletedPhis[To];
673   for (PHINode &Phi : To->phis()) {
674     bool Recorded = false;
675     while (Phi.getBasicBlockIndex(From) != -1) {
676       Value *Deleted = Phi.removeIncomingValue(From, false);
677       Map[&Phi].push_back(std::make_pair(From, Deleted));
678       if (!Recorded) {
679         AffectedPhis.push_back(&Phi);
680         Recorded = true;
681       }
682     }
683   }
684 }
685 
686 /// Add a dummy PHI value as soon as we knew the new predecessor
687 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
688   for (PHINode &Phi : To->phis()) {
689     Value *Poison = PoisonValue::get(Phi.getType());
690     Phi.addIncoming(Poison, From);
691   }
692   AddedPhis[To].push_back(From);
693 }
694 
695 /// When we are reconstructing a PHI inside \p PHIBlock with incoming values
696 /// from predecessors \p Incomings, we have a chance to mark the available value
697 /// from some blocks as undefined. The function will find out all such blocks
698 /// and return in \p UndefBlks.
699 void StructurizeCFG::findUndefBlocks(
700     BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings,
701     SmallVector<BasicBlock *> &UndefBlks) const {
702   //  We may get a post-structured CFG like below:
703   //
704   //  | P1
705   //  |/
706   //  F1
707   //  |\
708   //  | N
709   //  |/
710   //  F2
711   //  |\
712   //  | P2
713   //  |/
714   //  F3
715   //  |\
716   //  B
717   //
718   // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
719   // of B before structurization. F1/F2/F3 are flow blocks inserted during
720   // structurization process. Block N is not a predecessor of B before
721   // structurization, but are placed between the predecessors(P1/P2) of B after
722   // structurization. This usually means that threads went to N never take the
723   // path N->F2->F3->B. For example, the threads take the branch F1->N may
724   // always take the branch F2->P2. So, when we are reconstructing a PHI
725   // originally in B, we can safely say the incoming value from N is undefined.
726   SmallSet<BasicBlock *, 8> VisitedBlock;
727   SmallVector<BasicBlock *, 8> Stack;
728   if (PHIBlock == ParentRegion->getExit()) {
729     for (auto P : predecessors(PHIBlock)) {
730       if (ParentRegion->contains(P))
731         Stack.push_back(P);
732     }
733   } else {
734     append_range(Stack, predecessors(PHIBlock));
735   }
736 
737   // Do a backward traversal over the CFG, and stop further searching if
738   // the block is not a Flow. If a block is neither flow block nor the
739   // incoming predecessor, then the incoming value from the block is
740   // undefined value for the PHI being reconstructed.
741   while (!Stack.empty()) {
742     BasicBlock *Current = Stack.pop_back_val();
743     if (!VisitedBlock.insert(Current).second)
744       continue;
745 
746     if (FlowSet.contains(Current)) {
747       for (auto P : predecessors(Current))
748         Stack.push_back(P);
749     } else if (!Incomings.contains(Current)) {
750       UndefBlks.push_back(Current);
751     }
752   }
753 }
754 
755 // If two phi nodes have compatible incoming values (for each
756 // incoming block, either they have the same incoming value or only one phi
757 // node has an incoming value), let them share the merged incoming values. The
758 // merge process is guided by the equivalence information from \p PhiClasses.
759 // The function will possibly update the incoming values of leader phi in
760 // DeletedPhis.
761 void StructurizeCFG::mergeIfCompatible(
762     EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) {
763   auto ItA = PhiClasses.findLeader(PhiClasses.insert(A));
764   auto ItB = PhiClasses.findLeader(PhiClasses.insert(B));
765   // They are already in the same class, no work needed.
766   if (ItA == ItB)
767     return;
768 
769   PHINode *LeaderA = *ItA;
770   PHINode *LeaderB = *ItB;
771   BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA];
772   BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB];
773 
774   DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end());
775   for (auto [BB, V] : IncomingB) {
776     auto BBIt = Mergeable.find(BB);
777     if (BBIt != Mergeable.end() && BBIt->second != V)
778       return;
779     // Either IncomingA does not have this value or IncomingA has the same
780     // value.
781     Mergeable.insert({BB, V});
782   }
783 
784   // Update the incoming value of leaderA.
785   IncomingA.assign(Mergeable.begin(), Mergeable.end());
786   PhiClasses.unionSets(ItA, ItB);
787 }
788 
789 /// Add the real PHI value as soon as everything is set up
790 void StructurizeCFG::setPhiValues() {
791   SmallVector<PHINode *, 8> InsertedPhis;
792   SSAUpdater Updater(&InsertedPhis);
793   DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap;
794 
795   // Find phi nodes that have compatible incoming values (either they have
796   // the same value for the same block or only one phi node has an incoming
797   // value, see example below). We only search again the phi's that are
798   // referenced by another phi, which is the case we care about.
799   //
800   // For example (-- means no incoming value):
801   // phi1 : BB1:phi2   BB2:v  BB3:--
802   // phi2:  BB1:--     BB2:v  BB3:w
803   //
804   // Then we can merge these incoming values and let phi1, phi2 use the
805   // same set of incoming values:
806   //
807   // phi1&phi2: BB1:phi2  BB2:v  BB3:w
808   //
809   // By doing this, phi1 and phi2 would share more intermediate phi nodes.
810   // This would help reduce the number of phi nodes during SSA reconstruction
811   // and ultimately result in fewer COPY instructions.
812   //
813   // This should be correct, because if a phi node does not have incoming
814   // value from certain block, this means the block is not the predecessor
815   // of the parent block, so we actually don't care about its incoming value.
816   EquivalenceClasses<PHINode *> PhiClasses;
817   for (const auto &[To, From] : AddedPhis) {
818     auto OldPhiIt = DeletedPhis.find(To);
819     if (OldPhiIt == DeletedPhis.end())
820       continue;
821 
822     PhiMap &BlkPhis = OldPhiIt->second;
823     SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
824     SmallSet<BasicBlock *, 8> Incomings;
825 
826     // Get the undefined blocks shared by all the phi nodes.
827     if (!BlkPhis.empty()) {
828       for (const auto &VI : BlkPhis.front().second)
829         Incomings.insert(VI.first);
830       findUndefBlocks(To, Incomings, UndefBlks);
831     }
832 
833     for (const auto &[Phi, Incomings] : OldPhiIt->second) {
834       SmallVector<PHINode *> IncomingPHIs;
835       for (const auto &[BB, V] : Incomings) {
836         // First, for each phi, check whether it has incoming value which is
837         // another phi.
838         if (PHINode *P = dyn_cast<PHINode>(V))
839           IncomingPHIs.push_back(P);
840       }
841 
842       for (auto *OtherPhi : IncomingPHIs) {
843         // Skip phis that are unrelated to the phi reconstruction for now.
844         if (!DeletedPhis.contains(OtherPhi->getParent()))
845           continue;
846         mergeIfCompatible(PhiClasses, Phi, OtherPhi);
847       }
848     }
849   }
850 
851   for (const auto &AddedPhi : AddedPhis) {
852     BasicBlock *To = AddedPhi.first;
853     const BBVector &From = AddedPhi.second;
854 
855     if (!DeletedPhis.count(To))
856       continue;
857 
858     PhiMap &Map = DeletedPhis[To];
859     SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
860     for (const auto &[Phi, Incoming] : Map) {
861       Value *Undef = UndefValue::get(Phi->getType());
862       Updater.Initialize(Phi->getType(), "");
863       Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
864       Updater.AddAvailableValue(To, Undef);
865 
866       // Use leader phi's incoming if there is.
867       auto LeaderIt = PhiClasses.findLeader(Phi);
868       bool UseIncomingOfLeader =
869           LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi;
870       const auto &IncomingMap =
871           UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt]
872                               : Incoming;
873 
874       SmallVector<BasicBlock *> ConstantPreds;
875       for (const auto &[BB, V] : IncomingMap) {
876         Updater.AddAvailableValue(BB, V);
877         if (isa<Constant>(V))
878           ConstantPreds.push_back(BB);
879       }
880 
881       for (auto UB : UndefBlks) {
882         // If this undef block is dominated by any predecessor(before
883         // structurization) of reconstructed PHI with constant incoming value,
884         // don't mark the available value as undefined. Setting undef to such
885         // block will stop us from getting optimal phi insertion.
886         if (any_of(ConstantPreds,
887                    [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
888           continue;
889         // Maybe already get a value through sharing with other phi nodes.
890         if (Updater.HasValueForBlock(UB))
891           continue;
892 
893         Updater.AddAvailableValue(UB, Undef);
894       }
895 
896       for (BasicBlock *FI : From)
897         Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
898       AffectedPhis.push_back(Phi);
899     }
900   }
901 
902   AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
903 }
904 
905 void StructurizeCFG::simplifyAffectedPhis() {
906   bool Changed;
907   do {
908     Changed = false;
909     SimplifyQuery Q(Func->getDataLayout());
910     Q.DT = DT;
911     // Setting CanUseUndef to true might extend value liveness, set it to false
912     // to achieve better register pressure.
913     Q.CanUseUndef = false;
914     for (WeakVH VH : AffectedPhis) {
915       if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
916         if (auto NewValue = simplifyInstruction(Phi, Q)) {
917           Phi->replaceAllUsesWith(NewValue);
918           Phi->eraseFromParent();
919           Changed = true;
920         }
921       }
922     }
923   } while (Changed);
924 }
925 
926 /// Remove phi values from all successors and then remove the terminator.
927 void StructurizeCFG::killTerminator(BasicBlock *BB) {
928   Instruction *Term = BB->getTerminator();
929   if (!Term)
930     return;
931 
932   for (BasicBlock *Succ : successors(BB))
933     delPhiValues(BB, Succ);
934 
935   Term->eraseFromParent();
936 }
937 
938 /// Let node exit(s) point to NewExit
939 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
940                                 bool IncludeDominator) {
941   if (Node->isSubRegion()) {
942     Region *SubRegion = Node->getNodeAs<Region>();
943     BasicBlock *OldExit = SubRegion->getExit();
944     BasicBlock *Dominator = nullptr;
945 
946     // Find all the edges from the sub region to the exit.
947     // We use make_early_inc_range here because we modify BB's terminator.
948     for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
949       if (!SubRegion->contains(BB))
950         continue;
951 
952       // Modify the edges to point to the new exit
953       delPhiValues(BB, OldExit);
954       BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
955       addPhiValues(BB, NewExit);
956 
957       // Find the new dominator (if requested)
958       if (IncludeDominator) {
959         if (!Dominator)
960           Dominator = BB;
961         else
962           Dominator = DT->findNearestCommonDominator(Dominator, BB);
963       }
964     }
965 
966     // Change the dominator (if requested)
967     if (Dominator)
968       DT->changeImmediateDominator(NewExit, Dominator);
969 
970     // Update the region info
971     SubRegion->replaceExit(NewExit);
972   } else {
973     BasicBlock *BB = Node->getNodeAs<BasicBlock>();
974     killTerminator(BB);
975     BranchInst *Br = BranchInst::Create(NewExit, BB);
976     Br->setDebugLoc(TermDL[BB]);
977     addPhiValues(BB, NewExit);
978     if (IncludeDominator)
979       DT->changeImmediateDominator(NewExit, BB);
980   }
981 }
982 
983 /// Create a new flow node and update dominator tree and region info
984 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
985   LLVMContext &Context = Func->getContext();
986   BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
987                        Order.back()->getEntry();
988   BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
989                                         Func, Insert);
990   FlowSet.insert(Flow);
991 
992   // use a temporary variable to avoid a use-after-free if the map's storage is
993   // reallocated
994   DebugLoc DL = TermDL[Dominator];
995   TermDL[Flow] = std::move(DL);
996 
997   DT->addNewBlock(Flow, Dominator);
998   ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
999   return Flow;
1000 }
1001 
1002 /// Create a new or reuse the previous node as flow node
1003 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
1004   BasicBlock *Entry = PrevNode->getEntry();
1005 
1006   if (!PrevNode->isSubRegion()) {
1007     killTerminator(Entry);
1008     if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
1009       return Entry;
1010   }
1011 
1012   // create a new flow node
1013   BasicBlock *Flow = getNextFlow(Entry);
1014 
1015   // and wire it up
1016   changeExit(PrevNode, Flow, true);
1017   PrevNode = ParentRegion->getBBNode(Flow);
1018   return Flow;
1019 }
1020 
1021 /// Returns the region exit if possible, otherwise just a new flow node
1022 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
1023                                         bool ExitUseAllowed) {
1024   if (!Order.empty() || !ExitUseAllowed)
1025     return getNextFlow(Flow);
1026 
1027   BasicBlock *Exit = ParentRegion->getExit();
1028   DT->changeImmediateDominator(Exit, Flow);
1029   addPhiValues(Flow, Exit);
1030   return Exit;
1031 }
1032 
1033 /// Set the previous node
1034 void StructurizeCFG::setPrevNode(BasicBlock *BB) {
1035   PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
1036                                         : nullptr;
1037 }
1038 
1039 /// Does BB dominate all the predicates of Node?
1040 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
1041   BBPredicates &Preds = Predicates[Node->getEntry()];
1042   return llvm::all_of(Preds, [&](std::pair<BasicBlock *, PredInfo> Pred) {
1043     return DT->dominates(BB, Pred.first);
1044   });
1045 }
1046 
1047 /// Can we predict that this node will always be called?
1048 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
1049   BBPredicates &Preds = Predicates[Node->getEntry()];
1050   bool Dominated = false;
1051 
1052   // Regionentry is always true
1053   if (!PrevNode)
1054     return true;
1055 
1056   for (auto [BB, PI] : Preds) {
1057     if (PI.Pred != BoolTrue)
1058       return false;
1059 
1060     if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
1061       Dominated = true;
1062   }
1063 
1064   // TODO: The dominator check is too strict
1065   return Dominated;
1066 }
1067 
1068 /// Take one node from the order vector and wire it up
1069 void StructurizeCFG::wireFlow(bool ExitUseAllowed,
1070                               BasicBlock *LoopEnd) {
1071   RegionNode *Node = Order.pop_back_val();
1072   Visited.insert(Node->getEntry());
1073 
1074   if (isPredictableTrue(Node)) {
1075     // Just a linear flow
1076     if (PrevNode) {
1077       changeExit(PrevNode, Node->getEntry(), true);
1078     }
1079     PrevNode = Node;
1080   } else {
1081     // Insert extra prefix node (or reuse last one)
1082     BasicBlock *Flow = needPrefix(false);
1083 
1084     // Insert extra postfix node (or use exit instead)
1085     BasicBlock *Entry = Node->getEntry();
1086     BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
1087 
1088     // let it point to entry and next block
1089     BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow);
1090     Br->setDebugLoc(TermDL[Flow]);
1091     Conditions.push_back(Br);
1092     addPhiValues(Flow, Entry);
1093     DT->changeImmediateDominator(Entry, Flow);
1094 
1095     PrevNode = Node;
1096     while (!Order.empty() && !Visited.count(LoopEnd) &&
1097            dominatesPredicates(Entry, Order.back())) {
1098       handleLoops(false, LoopEnd);
1099     }
1100 
1101     changeExit(PrevNode, Next, false);
1102     setPrevNode(Next);
1103   }
1104 }
1105 
1106 void StructurizeCFG::handleLoops(bool ExitUseAllowed,
1107                                  BasicBlock *LoopEnd) {
1108   RegionNode *Node = Order.back();
1109   BasicBlock *LoopStart = Node->getEntry();
1110 
1111   if (!Loops.count(LoopStart)) {
1112     wireFlow(ExitUseAllowed, LoopEnd);
1113     return;
1114   }
1115 
1116   if (!isPredictableTrue(Node))
1117     LoopStart = needPrefix(true);
1118 
1119   LoopEnd = Loops[Node->getEntry()];
1120   wireFlow(false, LoopEnd);
1121   while (!Visited.count(LoopEnd)) {
1122     handleLoops(false, LoopEnd);
1123   }
1124 
1125   assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
1126 
1127   // Create an extra loop end node
1128   LoopEnd = needPrefix(false);
1129   BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
1130   BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd);
1131   Br->setDebugLoc(TermDL[LoopEnd]);
1132   LoopConds.push_back(Br);
1133   addPhiValues(LoopEnd, LoopStart);
1134   setPrevNode(Next);
1135 }
1136 
1137 /// After this function control flow looks like it should be, but
1138 /// branches and PHI nodes only have undefined conditions.
1139 void StructurizeCFG::createFlow() {
1140   BasicBlock *Exit = ParentRegion->getExit();
1141   bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1142 
1143   AffectedPhis.clear();
1144   DeletedPhis.clear();
1145   AddedPhis.clear();
1146   Conditions.clear();
1147   LoopConds.clear();
1148 
1149   PrevNode = nullptr;
1150   Visited.clear();
1151 
1152   while (!Order.empty()) {
1153     handleLoops(EntryDominatesExit, nullptr);
1154   }
1155 
1156   if (PrevNode)
1157     changeExit(PrevNode, Exit, EntryDominatesExit);
1158   else
1159     assert(EntryDominatesExit);
1160 }
1161 
1162 /// Handle a rare case where the disintegrated nodes instructions
1163 /// no longer dominate all their uses. Not sure if this is really necessary
1164 void StructurizeCFG::rebuildSSA() {
1165   SSAUpdater Updater;
1166   for (BasicBlock *BB : ParentRegion->blocks())
1167     for (Instruction &I : *BB) {
1168       bool Initialized = false;
1169       // We may modify the use list as we iterate over it, so we use
1170       // make_early_inc_range.
1171       for (Use &U : llvm::make_early_inc_range(I.uses())) {
1172         Instruction *User = cast<Instruction>(U.getUser());
1173         if (User->getParent() == BB) {
1174           continue;
1175         } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1176           if (UserPN->getIncomingBlock(U) == BB)
1177             continue;
1178         }
1179 
1180         if (DT->dominates(&I, User))
1181           continue;
1182 
1183         if (!Initialized) {
1184           Value *Undef = UndefValue::get(I.getType());
1185           Updater.Initialize(I.getType(), "");
1186           Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
1187           Updater.AddAvailableValue(BB, &I);
1188           Initialized = true;
1189         }
1190         Updater.RewriteUseAfterInsertions(U);
1191       }
1192     }
1193 }
1194 
1195 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1196                                    const UniformityInfo &UA) {
1197   // Bool for if all sub-regions are uniform.
1198   bool SubRegionsAreUniform = true;
1199   // Count of how many direct children are conditional.
1200   unsigned ConditionalDirectChildren = 0;
1201 
1202   for (auto *E : R->elements()) {
1203     if (!E->isSubRegion()) {
1204       auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
1205       if (!Br || !Br->isConditional())
1206         continue;
1207 
1208       if (!UA.isUniform(Br))
1209         return false;
1210 
1211       // One of our direct children is conditional.
1212       ConditionalDirectChildren++;
1213 
1214       LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1215                         << " has uniform terminator\n");
1216     } else {
1217       // Explicitly refuse to treat regions as uniform if they have non-uniform
1218       // subregions. We cannot rely on UniformityAnalysis for branches in
1219       // subregions because those branches may have been removed and re-created,
1220       // so we look for our metadata instead.
1221       //
1222       // Warning: It would be nice to treat regions as uniform based only on
1223       // their direct child basic blocks' terminators, regardless of whether
1224       // subregions are uniform or not. However, this requires a very careful
1225       // look at SIAnnotateControlFlow to make sure nothing breaks there.
1226       for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1227         auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1228         if (!Br || !Br->isConditional())
1229           continue;
1230 
1231         if (!Br->getMetadata(UniformMDKindID)) {
1232           // Early exit if we cannot have relaxed uniform regions.
1233           if (!RelaxedUniformRegions)
1234             return false;
1235 
1236           SubRegionsAreUniform = false;
1237           break;
1238         }
1239       }
1240     }
1241   }
1242 
1243   // Our region is uniform if:
1244   // 1. All conditional branches that are direct children are uniform (checked
1245   // above).
1246   // 2. And either:
1247   //   a. All sub-regions are uniform.
1248   //   b. There is one or less conditional branches among the direct children.
1249   return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1250 }
1251 
1252 void StructurizeCFG::init(Region *R) {
1253   LLVMContext &Context = R->getEntry()->getContext();
1254 
1255   Boolean = Type::getInt1Ty(Context);
1256   BoolTrue = ConstantInt::getTrue(Context);
1257   BoolFalse = ConstantInt::getFalse(Context);
1258   BoolPoison = PoisonValue::get(Boolean);
1259 
1260   this->UA = nullptr;
1261 }
1262 
1263 bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1264   if (R->isTopLevelRegion())
1265     return false;
1266 
1267   this->UA = &UA;
1268 
1269   // TODO: We could probably be smarter here with how we handle sub-regions.
1270   // We currently rely on the fact that metadata is set by earlier invocations
1271   // of the pass on sub-regions, and that this metadata doesn't get lost --
1272   // but we shouldn't rely on metadata for correctness!
1273   unsigned UniformMDKindID =
1274       R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1275 
1276   if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1277     LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1278                       << '\n');
1279 
1280     // Mark all direct child block terminators as having been treated as
1281     // uniform. To account for a possible future in which non-uniform
1282     // sub-regions are treated more cleverly, indirect children are not
1283     // marked as uniform.
1284     MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1285     for (RegionNode *E : R->elements()) {
1286       if (E->isSubRegion())
1287         continue;
1288 
1289       if (Instruction *Term = E->getEntry()->getTerminator())
1290         Term->setMetadata(UniformMDKindID, MD);
1291     }
1292 
1293     return true;
1294   }
1295   return false;
1296 }
1297 
1298 /// Run the transformation for each region found
1299 bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
1300   if (R->isTopLevelRegion())
1301     return false;
1302 
1303   this->DT = DT;
1304 
1305   Func = R->getEntry()->getParent();
1306   assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
1307 
1308   ParentRegion = R;
1309 
1310   orderNodes();
1311   collectInfos();
1312   createFlow();
1313   insertConditions(false);
1314   insertConditions(true);
1315   setPhiValues();
1316   simplifyConditions();
1317   simplifyAffectedPhis();
1318   rebuildSSA();
1319 
1320   // Cleanup
1321   Order.clear();
1322   Visited.clear();
1323   DeletedPhis.clear();
1324   AddedPhis.clear();
1325   Predicates.clear();
1326   Conditions.clear();
1327   Loops.clear();
1328   LoopPreds.clear();
1329   LoopConds.clear();
1330   FlowSet.clear();
1331   TermDL.clear();
1332 
1333   return true;
1334 }
1335 
1336 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1337   return new StructurizeCFGLegacyPass(SkipUniformRegions);
1338 }
1339 
1340 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1341   Regions.push_back(&R);
1342   for (const auto &E : R)
1343     addRegionIntoQueue(*E, Regions);
1344 }
1345 
1346 StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_)
1347     : SkipUniformRegions(SkipUniformRegions_) {
1348   if (ForceSkipUniformRegions.getNumOccurrences())
1349     SkipUniformRegions = ForceSkipUniformRegions.getValue();
1350 }
1351 
1352 void StructurizeCFGPass::printPipeline(
1353     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1354   static_cast<PassInfoMixin<StructurizeCFGPass> *>(this)->printPipeline(
1355       OS, MapClassName2PassName);
1356   if (SkipUniformRegions)
1357     OS << "<skip-uniform-regions>";
1358 }
1359 
1360 PreservedAnalyses StructurizeCFGPass::run(Function &F,
1361                                           FunctionAnalysisManager &AM) {
1362 
1363   bool Changed = false;
1364   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1365   auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1366 
1367   UniformityInfo *UI = nullptr;
1368   if (SkipUniformRegions)
1369     UI = &AM.getResult<UniformityInfoAnalysis>(F);
1370 
1371   std::vector<Region *> Regions;
1372   addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1373   while (!Regions.empty()) {
1374     Region *R = Regions.back();
1375     Regions.pop_back();
1376 
1377     StructurizeCFG SCFG;
1378     SCFG.init(R);
1379 
1380     if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
1381       Changed = true; // May have added metadata.
1382       continue;
1383     }
1384 
1385     Changed |= SCFG.run(R, DT);
1386   }
1387   if (!Changed)
1388     return PreservedAnalyses::all();
1389   PreservedAnalyses PA;
1390   PA.preserve<DominatorTreeAnalysis>();
1391   return PA;
1392 }
1393