xref: /llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp (revision 107af4a62ee9afb4be2cba1bc7c12afb677445ef)
1 //===- StructurizeCFG.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/EquivalenceClasses.h"
12 #include "llvm/ADT/MapVector.h"
13 #include "llvm/ADT/SCCIterator.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/InstructionSimplify.h"
19 #include "llvm/Analysis/RegionInfo.h"
20 #include "llvm/Analysis/RegionIterator.h"
21 #include "llvm/Analysis/RegionPass.h"
22 #include "llvm/Analysis/UniformityAnalysis.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/PassManager.h"
33 #include "llvm/IR/PatternMatch.h"
34 #include "llvm/IR/ProfDataUtils.h"
35 #include "llvm/IR/Type.h"
36 #include "llvm/IR/Use.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/IR/ValueHandle.h"
39 #include "llvm/InitializePasses.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include "llvm/Transforms/Scalar.h"
46 #include "llvm/Transforms/Utils.h"
47 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
48 #include "llvm/Transforms/Utils/Local.h"
49 #include "llvm/Transforms/Utils/SSAUpdater.h"
50 #include <cassert>
51 #include <utility>
52 
53 using namespace llvm;
54 using namespace llvm::PatternMatch;
55 
56 #define DEBUG_TYPE "structurizecfg"
57 
58 // The name for newly created blocks.
59 const char FlowBlockName[] = "Flow";
60 
61 namespace {
62 
63 static cl::opt<bool> ForceSkipUniformRegions(
64   "structurizecfg-skip-uniform-regions",
65   cl::Hidden,
66   cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
67   cl::init(false));
68 
69 static cl::opt<bool>
70     RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
71                           cl::desc("Allow relaxed uniform region checks"),
72                           cl::init(true));
73 
74 // Definition of the complex types used in this pass.
75 
76 using BBValuePair = std::pair<BasicBlock *, Value *>;
77 
78 using RNVector = SmallVector<RegionNode *, 8>;
79 using BBVector = SmallVector<BasicBlock *, 8>;
80 using BranchVector = SmallVector<BranchInst *, 8>;
81 using BBValueVector = SmallVector<BBValuePair, 2>;
82 
83 using BBSet = SmallPtrSet<BasicBlock *, 8>;
84 
85 using PhiMap = MapVector<PHINode *, BBValueVector>;
86 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
87 
88 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
89 
90 using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
91 
92 class CondBranchWeights {
93   uint32_t TrueWeight;
94   uint32_t FalseWeight;
95 
96   CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
97 
98 public:
99   static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
100     assert(Br.isConditional());
101 
102     uint64_t T, F;
103     if (!extractBranchWeights(Br, T, F))
104       return std::nullopt;
105 
106     return CondBranchWeights(T, F);
107   }
108 
109   static void setMetadata(BranchInst &Br,
110                           const MaybeCondBranchWeights &Weights) {
111     assert(Br.isConditional());
112     if (!Weights)
113       return;
114     uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
115     setBranchWeights(Br, Arr, false);
116   }
117 
118   CondBranchWeights invert() const {
119     return CondBranchWeights{FalseWeight, TrueWeight};
120   }
121 };
122 
123 struct PredInfo {
124   Value *Pred;
125   MaybeCondBranchWeights Weights;
126 };
127 
128 using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
129 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
130 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
131 
132 using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>;
133 
134 // A traits type that is intended to be used in graph algorithms. The graph
135 // traits starts at an entry node, and traverses the RegionNodes that are in
136 // the Nodes set.
137 struct SubGraphTraits {
138   using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
139   using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
140 
141   // This wraps a set of Nodes into the iterator, so we know which edges to
142   // filter out.
143   class WrappedSuccIterator
144       : public iterator_adaptor_base<
145             WrappedSuccIterator, BaseSuccIterator,
146             typename std::iterator_traits<BaseSuccIterator>::iterator_category,
147             NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
148     SmallDenseSet<RegionNode *> *Nodes;
149 
150   public:
151     WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
152         : iterator_adaptor_base(It), Nodes(Nodes) {}
153 
154     NodeRef operator*() const { return {*I, Nodes}; }
155   };
156 
157   static bool filterAll(const NodeRef &N) { return true; }
158   static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
159 
160   using ChildIteratorType =
161       filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
162 
163   static NodeRef getEntryNode(Region *R) {
164     return {GraphTraits<Region *>::getEntryNode(R), nullptr};
165   }
166 
167   static NodeRef getEntryNode(NodeRef N) { return N; }
168 
169   static iterator_range<ChildIteratorType> children(const NodeRef &N) {
170     auto *filter = N.second ? &filterSet : &filterAll;
171     return make_filter_range(
172         make_range<WrappedSuccIterator>(
173             {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
174             {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
175         filter);
176   }
177 
178   static ChildIteratorType child_begin(const NodeRef &N) {
179     return children(N).begin();
180   }
181 
182   static ChildIteratorType child_end(const NodeRef &N) {
183     return children(N).end();
184   }
185 };
186 
187 /// Finds the nearest common dominator of a set of BasicBlocks.
188 ///
189 /// For every BB you add to the set, you can specify whether we "remember" the
190 /// block.  When you get the common dominator, you can also ask whether it's one
191 /// of the blocks we remembered.
192 class NearestCommonDominator {
193   DominatorTree *DT;
194   BasicBlock *Result = nullptr;
195   bool ResultIsRemembered = false;
196 
197   /// Add BB to the resulting dominator.
198   void addBlock(BasicBlock *BB, bool Remember) {
199     if (!Result) {
200       Result = BB;
201       ResultIsRemembered = Remember;
202       return;
203     }
204 
205     BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
206     if (NewResult != Result)
207       ResultIsRemembered = false;
208     if (NewResult == BB)
209       ResultIsRemembered |= Remember;
210     Result = NewResult;
211   }
212 
213 public:
214   explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
215 
216   void addBlock(BasicBlock *BB) {
217     addBlock(BB, /* Remember = */ false);
218   }
219 
220   void addAndRememberBlock(BasicBlock *BB) {
221     addBlock(BB, /* Remember = */ true);
222   }
223 
224   /// Get the nearest common dominator of all the BBs added via addBlock() and
225   /// addAndRememberBlock().
226   BasicBlock *result() { return Result; }
227 
228   /// Is the BB returned by getResult() one of the blocks we added to the set
229   /// with addAndRememberBlock()?
230   bool resultIsRememberedBlock() { return ResultIsRemembered; }
231 };
232 
233 /// Transforms the control flow graph on one single entry/exit region
234 /// at a time.
235 ///
236 /// After the transform all "If"/"Then"/"Else" style control flow looks like
237 /// this:
238 ///
239 /// \verbatim
240 /// 1
241 /// ||
242 /// | |
243 /// 2 |
244 /// | /
245 /// |/
246 /// 3
247 /// ||   Where:
248 /// | |  1 = "If" block, calculates the condition
249 /// 4 |  2 = "Then" subregion, runs if the condition is true
250 /// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
251 /// |/   4 = "Else" optional subregion, runs if the condition is false
252 /// 5    5 = "End" block, also rejoins the control flow
253 /// \endverbatim
254 ///
255 /// Control flow is expressed as a branch where the true exit goes into the
256 /// "Then"/"Else" region, while the false exit skips the region
257 /// The condition for the optional "Else" region is expressed as a PHI node.
258 /// The incoming values of the PHI node are true for the "If" edge and false
259 /// for the "Then" edge.
260 ///
261 /// Additionally to that even complicated loops look like this:
262 ///
263 /// \verbatim
264 /// 1
265 /// ||
266 /// | |
267 /// 2 ^  Where:
268 /// | /  1 = "Entry" block
269 /// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
270 /// 3    3 = "Flow" block, with back edge to entry block
271 /// |
272 /// \endverbatim
273 ///
274 /// The back edge of the "Flow" block is always on the false side of the branch
275 /// while the true side continues the general flow. So the loop condition
276 /// consist of a network of PHI nodes where the true incoming values expresses
277 /// breaks and the false values expresses continue states.
278 
279 class StructurizeCFG {
280   Type *Boolean;
281   ConstantInt *BoolTrue;
282   ConstantInt *BoolFalse;
283   Value *BoolPoison;
284 
285   Function *Func;
286   Region *ParentRegion;
287 
288   UniformityInfo *UA = nullptr;
289   DominatorTree *DT;
290 
291   SmallVector<RegionNode *, 8> Order;
292   BBSet Visited;
293   BBSet FlowSet;
294 
295   SmallVector<WeakVH, 8> AffectedPhis;
296   BBPhiMap DeletedPhis;
297   BB2BBVecMap AddedPhis;
298 
299   PredMap Predicates;
300   BranchVector Conditions;
301 
302   BB2BBMap Loops;
303   PredMap LoopPreds;
304   BranchVector LoopConds;
305 
306   BranchDebugLocMap TermDL;
307 
308   RegionNode *PrevNode;
309 
310   void orderNodes();
311 
312   void analyzeLoops(RegionNode *N);
313 
314   PredInfo buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
315 
316   void gatherPredicates(RegionNode *N);
317 
318   void collectInfos();
319 
320   void insertConditions(bool Loops);
321 
322   void simplifyConditions();
323 
324   void delPhiValues(BasicBlock *From, BasicBlock *To);
325 
326   void addPhiValues(BasicBlock *From, BasicBlock *To);
327 
328   void findUndefBlocks(BasicBlock *PHIBlock,
329                        const SmallSet<BasicBlock *, 8> &Incomings,
330                        SmallVector<BasicBlock *> &UndefBlks) const;
331 
332   void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A,
333                          PHINode *B);
334 
335   void setPhiValues();
336 
337   void simplifyAffectedPhis();
338 
339   void killTerminator(BasicBlock *BB);
340 
341   void changeExit(RegionNode *Node, BasicBlock *NewExit,
342                   bool IncludeDominator);
343 
344   BasicBlock *getNextFlow(BasicBlock *Dominator);
345 
346   BasicBlock *needPrefix(bool NeedEmpty);
347 
348   BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
349 
350   void setPrevNode(BasicBlock *BB);
351 
352   bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
353 
354   bool isPredictableTrue(RegionNode *Node);
355 
356   void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
357 
358   void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
359 
360   void createFlow();
361 
362   void rebuildSSA();
363 
364 public:
365   void init(Region *R);
366   bool run(Region *R, DominatorTree *DT);
367   bool makeUniformRegion(Region *R, UniformityInfo &UA);
368 };
369 
370 class StructurizeCFGLegacyPass : public RegionPass {
371   bool SkipUniformRegions;
372 
373 public:
374   static char ID;
375 
376   explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
377       : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
378     if (ForceSkipUniformRegions.getNumOccurrences())
379       SkipUniformRegions = ForceSkipUniformRegions.getValue();
380     initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
381   }
382 
383   bool runOnRegion(Region *R, RGPassManager &RGM) override {
384     StructurizeCFG SCFG;
385     SCFG.init(R);
386     if (SkipUniformRegions) {
387       UniformityInfo &UA =
388           getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
389       if (SCFG.makeUniformRegion(R, UA))
390         return false;
391     }
392     DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
393     return SCFG.run(R, DT);
394   }
395 
396   StringRef getPassName() const override { return "Structurize control flow"; }
397 
398   void getAnalysisUsage(AnalysisUsage &AU) const override {
399     if (SkipUniformRegions)
400       AU.addRequired<UniformityInfoWrapperPass>();
401     AU.addRequired<DominatorTreeWrapperPass>();
402 
403     AU.addPreserved<DominatorTreeWrapperPass>();
404     RegionPass::getAnalysisUsage(AU);
405   }
406 };
407 
408 } // end anonymous namespace
409 
410 char StructurizeCFGLegacyPass::ID = 0;
411 
412 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
413                       "Structurize the CFG", false, false)
414 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
415 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
416 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
417 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
418                     "Structurize the CFG", false, false)
419 
420 /// Build up the general order of nodes, by performing a topological sort of the
421 /// parent region's nodes, while ensuring that there is no outer cycle node
422 /// between any two inner cycle nodes.
423 void StructurizeCFG::orderNodes() {
424   Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
425                              GraphTraits<Region *>::nodes_end(ParentRegion)));
426   if (Order.empty())
427     return;
428 
429   SmallDenseSet<RegionNode *> Nodes;
430   auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
431 
432   // A list of range indices of SCCs in Order, to be processed.
433   SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
434   unsigned I = 0, E = Order.size();
435   while (true) {
436     // Run through all the SCCs in the subgraph starting with Entry.
437     for (auto SCCI =
438              scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
439                  EntryNode);
440          !SCCI.isAtEnd(); ++SCCI) {
441       auto &SCC = *SCCI;
442 
443       // An SCC up to the size of 2, can be reduced to an entry (the last node),
444       // and a possible additional node. Therefore, it is already in order, and
445       // there is no need to add it to the work-list.
446       unsigned Size = SCC.size();
447       if (Size > 2)
448         WorkList.emplace_back(I, I + Size);
449 
450       // Add the SCC nodes to the Order array.
451       for (const auto &N : SCC) {
452         assert(I < E && "SCC size mismatch!");
453         Order[I++] = N.first;
454       }
455     }
456     assert(I == E && "SCC size mismatch!");
457 
458     // If there are no more SCCs to order, then we are done.
459     if (WorkList.empty())
460       break;
461 
462     std::tie(I, E) = WorkList.pop_back_val();
463 
464     // Collect the set of nodes in the SCC's subgraph. These are only the
465     // possible child nodes; we do not add the entry (last node) otherwise we
466     // will have the same exact SCC all over again.
467     Nodes.clear();
468     Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
469 
470     // Update the entry node.
471     EntryNode.first = Order[E - 1];
472     EntryNode.second = &Nodes;
473   }
474 }
475 
476 /// Determine the end of the loops
477 void StructurizeCFG::analyzeLoops(RegionNode *N) {
478   if (N->isSubRegion()) {
479     // Test for exit as back edge
480     BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
481     if (Visited.count(Exit))
482       Loops[Exit] = N->getEntry();
483 
484   } else {
485     // Test for successors as back edge
486     BasicBlock *BB = N->getNodeAs<BasicBlock>();
487     BranchInst *Term = cast<BranchInst>(BB->getTerminator());
488 
489     for (BasicBlock *Succ : Term->successors())
490       if (Visited.count(Succ))
491         Loops[Succ] = BB;
492   }
493 }
494 
495 /// Build the condition for one edge
496 PredInfo StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
497                                         bool Invert) {
498   Value *Cond = Invert ? BoolFalse : BoolTrue;
499   MaybeCondBranchWeights Weights;
500 
501   if (Term->isConditional()) {
502     Cond = Term->getCondition();
503     Weights = CondBranchWeights::tryParse(*Term);
504 
505     if (Idx != (unsigned)Invert) {
506       Cond = invertCondition(Cond);
507       if (Weights)
508         Weights = Weights->invert();
509     }
510   }
511   return {Cond, Weights};
512 }
513 
514 /// Analyze the predecessors of each block and build up predicates
515 void StructurizeCFG::gatherPredicates(RegionNode *N) {
516   RegionInfo *RI = ParentRegion->getRegionInfo();
517   BasicBlock *BB = N->getEntry();
518   BBPredicates &Pred = Predicates[BB];
519   BBPredicates &LPred = LoopPreds[BB];
520 
521   for (BasicBlock *P : predecessors(BB)) {
522     // Ignore it if it's a branch from outside into our region entry
523     if (!ParentRegion->contains(P))
524       continue;
525 
526     Region *R = RI->getRegionFor(P);
527     if (R == ParentRegion) {
528       // It's a top level block in our region
529       BranchInst *Term = cast<BranchInst>(P->getTerminator());
530       for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
531         BasicBlock *Succ = Term->getSuccessor(i);
532         if (Succ != BB)
533           continue;
534 
535         if (Visited.count(P)) {
536           // Normal forward edge
537           if (Term->isConditional()) {
538             // Try to treat it like an ELSE block
539             BasicBlock *Other = Term->getSuccessor(!i);
540             if (Visited.count(Other) && !Loops.count(Other) &&
541                 !Pred.count(Other) && !Pred.count(P)) {
542 
543               Pred[Other] = {BoolFalse, std::nullopt};
544               Pred[P] = {BoolTrue, std::nullopt};
545               continue;
546             }
547           }
548           Pred[P] = buildCondition(Term, i, false);
549         } else {
550           // Back edge
551           LPred[P] = buildCondition(Term, i, true);
552         }
553       }
554     } else {
555       // It's an exit from a sub region
556       while (R->getParent() != ParentRegion)
557         R = R->getParent();
558 
559       // Edge from inside a subregion to its entry, ignore it
560       if (*R == *N)
561         continue;
562 
563       BasicBlock *Entry = R->getEntry();
564       if (Visited.count(Entry))
565         Pred[Entry] = {BoolTrue, std::nullopt};
566       else
567         LPred[Entry] = {BoolFalse, std::nullopt};
568     }
569   }
570 }
571 
572 /// Collect various loop and predicate infos
573 void StructurizeCFG::collectInfos() {
574   // Reset predicate
575   Predicates.clear();
576 
577   // and loop infos
578   Loops.clear();
579   LoopPreds.clear();
580 
581   // Reset the visited nodes
582   Visited.clear();
583 
584   for (RegionNode *RN : reverse(Order)) {
585     LLVM_DEBUG(dbgs() << "Visiting: "
586                       << (RN->isSubRegion() ? "SubRegion with entry: " : "")
587                       << RN->getEntry()->getName() << "\n");
588 
589     // Analyze all the conditions leading to a node
590     gatherPredicates(RN);
591 
592     // Remember that we've seen this node
593     Visited.insert(RN->getEntry());
594 
595     // Find the last back edges
596     analyzeLoops(RN);
597   }
598 
599   // Reset the collected term debug locations
600   TermDL.clear();
601 
602   for (BasicBlock &BB : *Func) {
603     if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc())
604       TermDL[&BB] = DL;
605   }
606 }
607 
608 /// Insert the missing branch conditions
609 void StructurizeCFG::insertConditions(bool Loops) {
610   BranchVector &Conds = Loops ? LoopConds : Conditions;
611   Value *Default = Loops ? BoolTrue : BoolFalse;
612   SSAUpdater PhiInserter;
613 
614   for (BranchInst *Term : Conds) {
615     assert(Term->isConditional());
616 
617     BasicBlock *Parent = Term->getParent();
618     BasicBlock *SuccTrue = Term->getSuccessor(0);
619     BasicBlock *SuccFalse = Term->getSuccessor(1);
620 
621     PhiInserter.Initialize(Boolean, "");
622     PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
623     PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
624 
625     BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
626 
627     NearestCommonDominator Dominator(DT);
628     Dominator.addBlock(Parent);
629 
630     PredInfo ParentInfo{nullptr, std::nullopt};
631     for (auto [BB, PI] : Preds) {
632       if (BB == Parent) {
633         ParentInfo = PI;
634         break;
635       }
636       PhiInserter.AddAvailableValue(BB, PI.Pred);
637       Dominator.addAndRememberBlock(BB);
638     }
639 
640     if (ParentInfo.Pred) {
641       Term->setCondition(ParentInfo.Pred);
642       CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
643     } else {
644       if (!Dominator.resultIsRememberedBlock())
645         PhiInserter.AddAvailableValue(Dominator.result(), Default);
646 
647       Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
648     }
649   }
650 }
651 
652 /// Simplify any inverted conditions that were built by buildConditions.
653 void StructurizeCFG::simplifyConditions() {
654   SmallVector<Instruction *> InstToErase;
655   for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
656     auto &Preds = I.second;
657     for (auto [BB, PI] : Preds) {
658       Instruction *Inverted;
659       if (match(PI.Pred, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
660           !PI.Pred->use_empty()) {
661         if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
662           InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
663           PI.Pred->replaceAllUsesWith(InvertedCmp);
664           InstToErase.push_back(cast<Instruction>(PI.Pred));
665         }
666       }
667     }
668   }
669   for (auto *I : InstToErase)
670     I->eraseFromParent();
671 }
672 
673 /// Remove all PHI values coming from "From" into "To" and remember
674 /// them in DeletedPhis
675 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
676   PhiMap &Map = DeletedPhis[To];
677   for (PHINode &Phi : To->phis()) {
678     bool Recorded = false;
679     while (Phi.getBasicBlockIndex(From) != -1) {
680       Value *Deleted = Phi.removeIncomingValue(From, false);
681       Map[&Phi].push_back(std::make_pair(From, Deleted));
682       if (!Recorded) {
683         AffectedPhis.push_back(&Phi);
684         Recorded = true;
685       }
686     }
687   }
688 }
689 
690 /// Add a dummy PHI value as soon as we knew the new predecessor
691 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
692   for (PHINode &Phi : To->phis()) {
693     Value *Undef = UndefValue::get(Phi.getType());
694     Phi.addIncoming(Undef, From);
695   }
696   AddedPhis[To].push_back(From);
697 }
698 
699 /// When we are reconstructing a PHI inside \p PHIBlock with incoming values
700 /// from predecessors \p Incomings, we have a chance to mark the available value
701 /// from some blocks as undefined. The function will find out all such blocks
702 /// and return in \p UndefBlks.
703 void StructurizeCFG::findUndefBlocks(
704     BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings,
705     SmallVector<BasicBlock *> &UndefBlks) const {
706   //  We may get a post-structured CFG like below:
707   //
708   //  | P1
709   //  |/
710   //  F1
711   //  |\
712   //  | N
713   //  |/
714   //  F2
715   //  |\
716   //  | P2
717   //  |/
718   //  F3
719   //  |\
720   //  B
721   //
722   // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
723   // of B before structurization. F1/F2/F3 are flow blocks inserted during
724   // structurization process. Block N is not a predecessor of B before
725   // structurization, but are placed between the predecessors(P1/P2) of B after
726   // structurization. This usually means that threads went to N never take the
727   // path N->F2->F3->B. For example, the threads take the branch F1->N may
728   // always take the branch F2->P2. So, when we are reconstructing a PHI
729   // originally in B, we can safely say the incoming value from N is undefined.
730   SmallSet<BasicBlock *, 8> VisitedBlock;
731   SmallVector<BasicBlock *, 8> Stack;
732   if (PHIBlock == ParentRegion->getExit()) {
733     for (auto P : predecessors(PHIBlock)) {
734       if (ParentRegion->contains(P))
735         Stack.push_back(P);
736     }
737   } else {
738     append_range(Stack, predecessors(PHIBlock));
739   }
740 
741   // Do a backward traversal over the CFG, and stop further searching if
742   // the block is not a Flow. If a block is neither flow block nor the
743   // incoming predecessor, then the incoming value from the block is
744   // undefined value for the PHI being reconstructed.
745   while (!Stack.empty()) {
746     BasicBlock *Current = Stack.pop_back_val();
747     if (!VisitedBlock.insert(Current).second)
748       continue;
749 
750     if (FlowSet.contains(Current)) {
751       for (auto P : predecessors(Current))
752         Stack.push_back(P);
753     } else if (!Incomings.contains(Current)) {
754       UndefBlks.push_back(Current);
755     }
756   }
757 }
758 
759 // If two phi nodes have compatible incoming values (for each
760 // incoming block, either they have the same incoming value or only one phi
761 // node has an incoming value), let them share the merged incoming values. The
762 // merge process is guided by the equivalence information from \p PhiClasses.
763 // The function will possibly update the incoming values of leader phi in
764 // DeletedPhis.
765 void StructurizeCFG::mergeIfCompatible(
766     EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) {
767   auto ItA = PhiClasses.findLeader(PhiClasses.insert(A));
768   auto ItB = PhiClasses.findLeader(PhiClasses.insert(B));
769   // They are already in the same class, no work needed.
770   if (ItA == ItB)
771     return;
772 
773   PHINode *LeaderA = *ItA;
774   PHINode *LeaderB = *ItB;
775   BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA];
776   BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB];
777 
778   DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end());
779   for (auto [BB, V] : IncomingB) {
780     auto BBIt = Mergeable.find(BB);
781     if (BBIt != Mergeable.end() && BBIt->second != V)
782       return;
783     // Either IncomingA does not have this value or IncomingA has the same
784     // value.
785     Mergeable.insert({BB, V});
786   }
787 
788   // Update the incoming value of leaderA.
789   IncomingA.assign(Mergeable.begin(), Mergeable.end());
790   PhiClasses.unionSets(ItA, ItB);
791 }
792 
793 /// Add the real PHI value as soon as everything is set up
794 void StructurizeCFG::setPhiValues() {
795   SmallVector<PHINode *, 8> InsertedPhis;
796   SSAUpdater Updater(&InsertedPhis);
797   DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap;
798 
799   // Find phi nodes that have compatible incoming values (either they have
800   // the same value for the same block or only one phi node has an incoming
801   // value, see example below). We only search again the phi's that are
802   // referenced by another phi, which is the case we care about.
803   //
804   // For example (-- means no incoming value):
805   // phi1 : BB1:phi2   BB2:v  BB3:--
806   // phi2:  BB1:--     BB2:v  BB3:w
807   //
808   // Then we can merge these incoming values and let phi1, phi2 use the
809   // same set of incoming values:
810   //
811   // phi1&phi2: BB1:phi2  BB2:v  BB3:w
812   //
813   // By doing this, phi1 and phi2 would share more intermediate phi nodes.
814   // This would help reduce the number of phi nodes during SSA reconstruction
815   // and ultimately result in fewer COPY instructions.
816   //
817   // This should be correct, because if a phi node does not have incoming
818   // value from certain block, this means the block is not the predecessor
819   // of the parent block, so we actually don't care about its incoming value.
820   EquivalenceClasses<PHINode *> PhiClasses;
821   for (const auto &[To, From] : AddedPhis) {
822     auto OldPhiIt = DeletedPhis.find(To);
823     if (OldPhiIt == DeletedPhis.end())
824       continue;
825 
826     PhiMap &BlkPhis = OldPhiIt->second;
827     SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
828     SmallSet<BasicBlock *, 8> Incomings;
829 
830     // Get the undefined blocks shared by all the phi nodes.
831     if (!BlkPhis.empty()) {
832       for (const auto &VI : BlkPhis.front().second)
833         Incomings.insert(VI.first);
834       findUndefBlocks(To, Incomings, UndefBlks);
835     }
836 
837     for (const auto &[Phi, Incomings] : OldPhiIt->second) {
838       SmallVector<PHINode *> IncomingPHIs;
839       for (const auto &[BB, V] : Incomings) {
840         // First, for each phi, check whether it has incoming value which is
841         // another phi.
842         if (PHINode *P = dyn_cast<PHINode>(V))
843           IncomingPHIs.push_back(P);
844       }
845 
846       for (auto *OtherPhi : IncomingPHIs) {
847         // Skip phis that are unrelated to the phi reconstruction for now.
848         if (!DeletedPhis.contains(OtherPhi->getParent()))
849           continue;
850         mergeIfCompatible(PhiClasses, Phi, OtherPhi);
851       }
852     }
853   }
854 
855   for (const auto &AddedPhi : AddedPhis) {
856     BasicBlock *To = AddedPhi.first;
857     const BBVector &From = AddedPhi.second;
858 
859     if (!DeletedPhis.count(To))
860       continue;
861 
862     PhiMap &Map = DeletedPhis[To];
863     SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
864     for (const auto &[Phi, Incoming] : Map) {
865       Value *Undef = UndefValue::get(Phi->getType());
866       Updater.Initialize(Phi->getType(), "");
867       Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
868       Updater.AddAvailableValue(To, Undef);
869 
870       // Use leader phi's incoming if there is.
871       auto LeaderIt = PhiClasses.findLeader(Phi);
872       bool UseIncomingOfLeader =
873           LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi;
874       const auto &IncomingMap =
875           UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt]
876                               : Incoming;
877 
878       SmallVector<BasicBlock *> ConstantPreds;
879       for (const auto &[BB, V] : IncomingMap) {
880         Updater.AddAvailableValue(BB, V);
881         if (isa<Constant>(V))
882           ConstantPreds.push_back(BB);
883       }
884 
885       for (auto UB : UndefBlks) {
886         // If this undef block is dominated by any predecessor(before
887         // structurization) of reconstructed PHI with constant incoming value,
888         // don't mark the available value as undefined. Setting undef to such
889         // block will stop us from getting optimal phi insertion.
890         if (any_of(ConstantPreds,
891                    [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
892           continue;
893         // Maybe already get a value through sharing with other phi nodes.
894         if (Updater.HasValueForBlock(UB))
895           continue;
896 
897         Updater.AddAvailableValue(UB, Undef);
898       }
899 
900       for (BasicBlock *FI : From)
901         Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
902       AffectedPhis.push_back(Phi);
903     }
904   }
905 
906   AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
907 }
908 
909 void StructurizeCFG::simplifyAffectedPhis() {
910   bool Changed;
911   do {
912     Changed = false;
913     SimplifyQuery Q(Func->getDataLayout());
914     Q.DT = DT;
915     // Setting CanUseUndef to true might extend value liveness, set it to false
916     // to achieve better register pressure.
917     Q.CanUseUndef = false;
918     for (WeakVH VH : AffectedPhis) {
919       if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
920         if (auto NewValue = simplifyInstruction(Phi, Q)) {
921           Phi->replaceAllUsesWith(NewValue);
922           Phi->eraseFromParent();
923           Changed = true;
924         }
925       }
926     }
927   } while (Changed);
928 }
929 
930 /// Remove phi values from all successors and then remove the terminator.
931 void StructurizeCFG::killTerminator(BasicBlock *BB) {
932   Instruction *Term = BB->getTerminator();
933   if (!Term)
934     return;
935 
936   for (BasicBlock *Succ : successors(BB))
937     delPhiValues(BB, Succ);
938 
939   Term->eraseFromParent();
940 }
941 
942 /// Let node exit(s) point to NewExit
943 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
944                                 bool IncludeDominator) {
945   if (Node->isSubRegion()) {
946     Region *SubRegion = Node->getNodeAs<Region>();
947     BasicBlock *OldExit = SubRegion->getExit();
948     BasicBlock *Dominator = nullptr;
949 
950     // Find all the edges from the sub region to the exit.
951     // We use make_early_inc_range here because we modify BB's terminator.
952     for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
953       if (!SubRegion->contains(BB))
954         continue;
955 
956       // Modify the edges to point to the new exit
957       delPhiValues(BB, OldExit);
958       BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
959       addPhiValues(BB, NewExit);
960 
961       // Find the new dominator (if requested)
962       if (IncludeDominator) {
963         if (!Dominator)
964           Dominator = BB;
965         else
966           Dominator = DT->findNearestCommonDominator(Dominator, BB);
967       }
968     }
969 
970     // Change the dominator (if requested)
971     if (Dominator)
972       DT->changeImmediateDominator(NewExit, Dominator);
973 
974     // Update the region info
975     SubRegion->replaceExit(NewExit);
976   } else {
977     BasicBlock *BB = Node->getNodeAs<BasicBlock>();
978     killTerminator(BB);
979     BranchInst *Br = BranchInst::Create(NewExit, BB);
980     Br->setDebugLoc(TermDL[BB]);
981     addPhiValues(BB, NewExit);
982     if (IncludeDominator)
983       DT->changeImmediateDominator(NewExit, BB);
984   }
985 }
986 
987 /// Create a new flow node and update dominator tree and region info
988 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
989   LLVMContext &Context = Func->getContext();
990   BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
991                        Order.back()->getEntry();
992   BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
993                                         Func, Insert);
994   FlowSet.insert(Flow);
995 
996   // use a temporary variable to avoid a use-after-free if the map's storage is
997   // reallocated
998   DebugLoc DL = TermDL[Dominator];
999   TermDL[Flow] = std::move(DL);
1000 
1001   DT->addNewBlock(Flow, Dominator);
1002   ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
1003   return Flow;
1004 }
1005 
1006 /// Create a new or reuse the previous node as flow node
1007 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
1008   BasicBlock *Entry = PrevNode->getEntry();
1009 
1010   if (!PrevNode->isSubRegion()) {
1011     killTerminator(Entry);
1012     if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
1013       return Entry;
1014   }
1015 
1016   // create a new flow node
1017   BasicBlock *Flow = getNextFlow(Entry);
1018 
1019   // and wire it up
1020   changeExit(PrevNode, Flow, true);
1021   PrevNode = ParentRegion->getBBNode(Flow);
1022   return Flow;
1023 }
1024 
1025 /// Returns the region exit if possible, otherwise just a new flow node
1026 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
1027                                         bool ExitUseAllowed) {
1028   if (!Order.empty() || !ExitUseAllowed)
1029     return getNextFlow(Flow);
1030 
1031   BasicBlock *Exit = ParentRegion->getExit();
1032   DT->changeImmediateDominator(Exit, Flow);
1033   addPhiValues(Flow, Exit);
1034   return Exit;
1035 }
1036 
1037 /// Set the previous node
1038 void StructurizeCFG::setPrevNode(BasicBlock *BB) {
1039   PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
1040                                         : nullptr;
1041 }
1042 
1043 /// Does BB dominate all the predicates of Node?
1044 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
1045   BBPredicates &Preds = Predicates[Node->getEntry()];
1046   return llvm::all_of(Preds, [&](std::pair<BasicBlock *, PredInfo> Pred) {
1047     return DT->dominates(BB, Pred.first);
1048   });
1049 }
1050 
1051 /// Can we predict that this node will always be called?
1052 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
1053   BBPredicates &Preds = Predicates[Node->getEntry()];
1054   bool Dominated = false;
1055 
1056   // Regionentry is always true
1057   if (!PrevNode)
1058     return true;
1059 
1060   for (auto [BB, PI] : Preds) {
1061     if (PI.Pred != BoolTrue)
1062       return false;
1063 
1064     if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
1065       Dominated = true;
1066   }
1067 
1068   // TODO: The dominator check is too strict
1069   return Dominated;
1070 }
1071 
1072 /// Take one node from the order vector and wire it up
1073 void StructurizeCFG::wireFlow(bool ExitUseAllowed,
1074                               BasicBlock *LoopEnd) {
1075   RegionNode *Node = Order.pop_back_val();
1076   Visited.insert(Node->getEntry());
1077 
1078   if (isPredictableTrue(Node)) {
1079     // Just a linear flow
1080     if (PrevNode) {
1081       changeExit(PrevNode, Node->getEntry(), true);
1082     }
1083     PrevNode = Node;
1084   } else {
1085     // Insert extra prefix node (or reuse last one)
1086     BasicBlock *Flow = needPrefix(false);
1087 
1088     // Insert extra postfix node (or use exit instead)
1089     BasicBlock *Entry = Node->getEntry();
1090     BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
1091 
1092     // let it point to entry and next block
1093     BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow);
1094     Br->setDebugLoc(TermDL[Flow]);
1095     Conditions.push_back(Br);
1096     addPhiValues(Flow, Entry);
1097     DT->changeImmediateDominator(Entry, Flow);
1098 
1099     PrevNode = Node;
1100     while (!Order.empty() && !Visited.count(LoopEnd) &&
1101            dominatesPredicates(Entry, Order.back())) {
1102       handleLoops(false, LoopEnd);
1103     }
1104 
1105     changeExit(PrevNode, Next, false);
1106     setPrevNode(Next);
1107   }
1108 }
1109 
1110 void StructurizeCFG::handleLoops(bool ExitUseAllowed,
1111                                  BasicBlock *LoopEnd) {
1112   RegionNode *Node = Order.back();
1113   BasicBlock *LoopStart = Node->getEntry();
1114 
1115   if (!Loops.count(LoopStart)) {
1116     wireFlow(ExitUseAllowed, LoopEnd);
1117     return;
1118   }
1119 
1120   if (!isPredictableTrue(Node))
1121     LoopStart = needPrefix(true);
1122 
1123   LoopEnd = Loops[Node->getEntry()];
1124   wireFlow(false, LoopEnd);
1125   while (!Visited.count(LoopEnd)) {
1126     handleLoops(false, LoopEnd);
1127   }
1128 
1129   assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
1130 
1131   // Create an extra loop end node
1132   LoopEnd = needPrefix(false);
1133   BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
1134   BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd);
1135   Br->setDebugLoc(TermDL[LoopEnd]);
1136   LoopConds.push_back(Br);
1137   addPhiValues(LoopEnd, LoopStart);
1138   setPrevNode(Next);
1139 }
1140 
1141 /// After this function control flow looks like it should be, but
1142 /// branches and PHI nodes only have undefined conditions.
1143 void StructurizeCFG::createFlow() {
1144   BasicBlock *Exit = ParentRegion->getExit();
1145   bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1146 
1147   AffectedPhis.clear();
1148   DeletedPhis.clear();
1149   AddedPhis.clear();
1150   Conditions.clear();
1151   LoopConds.clear();
1152 
1153   PrevNode = nullptr;
1154   Visited.clear();
1155 
1156   while (!Order.empty()) {
1157     handleLoops(EntryDominatesExit, nullptr);
1158   }
1159 
1160   if (PrevNode)
1161     changeExit(PrevNode, Exit, EntryDominatesExit);
1162   else
1163     assert(EntryDominatesExit);
1164 }
1165 
1166 /// Handle a rare case where the disintegrated nodes instructions
1167 /// no longer dominate all their uses. Not sure if this is really necessary
1168 void StructurizeCFG::rebuildSSA() {
1169   SSAUpdater Updater;
1170   for (BasicBlock *BB : ParentRegion->blocks())
1171     for (Instruction &I : *BB) {
1172       bool Initialized = false;
1173       // We may modify the use list as we iterate over it, so we use
1174       // make_early_inc_range.
1175       for (Use &U : llvm::make_early_inc_range(I.uses())) {
1176         Instruction *User = cast<Instruction>(U.getUser());
1177         if (User->getParent() == BB) {
1178           continue;
1179         } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1180           if (UserPN->getIncomingBlock(U) == BB)
1181             continue;
1182         }
1183 
1184         if (DT->dominates(&I, User))
1185           continue;
1186 
1187         if (!Initialized) {
1188           Value *Undef = UndefValue::get(I.getType());
1189           Updater.Initialize(I.getType(), "");
1190           Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
1191           Updater.AddAvailableValue(BB, &I);
1192           Initialized = true;
1193         }
1194         Updater.RewriteUseAfterInsertions(U);
1195       }
1196     }
1197 }
1198 
1199 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1200                                    const UniformityInfo &UA) {
1201   // Bool for if all sub-regions are uniform.
1202   bool SubRegionsAreUniform = true;
1203   // Count of how many direct children are conditional.
1204   unsigned ConditionalDirectChildren = 0;
1205 
1206   for (auto *E : R->elements()) {
1207     if (!E->isSubRegion()) {
1208       auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
1209       if (!Br || !Br->isConditional())
1210         continue;
1211 
1212       if (!UA.isUniform(Br))
1213         return false;
1214 
1215       // One of our direct children is conditional.
1216       ConditionalDirectChildren++;
1217 
1218       LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1219                         << " has uniform terminator\n");
1220     } else {
1221       // Explicitly refuse to treat regions as uniform if they have non-uniform
1222       // subregions. We cannot rely on UniformityAnalysis for branches in
1223       // subregions because those branches may have been removed and re-created,
1224       // so we look for our metadata instead.
1225       //
1226       // Warning: It would be nice to treat regions as uniform based only on
1227       // their direct child basic blocks' terminators, regardless of whether
1228       // subregions are uniform or not. However, this requires a very careful
1229       // look at SIAnnotateControlFlow to make sure nothing breaks there.
1230       for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1231         auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1232         if (!Br || !Br->isConditional())
1233           continue;
1234 
1235         if (!Br->getMetadata(UniformMDKindID)) {
1236           // Early exit if we cannot have relaxed uniform regions.
1237           if (!RelaxedUniformRegions)
1238             return false;
1239 
1240           SubRegionsAreUniform = false;
1241           break;
1242         }
1243       }
1244     }
1245   }
1246 
1247   // Our region is uniform if:
1248   // 1. All conditional branches that are direct children are uniform (checked
1249   // above).
1250   // 2. And either:
1251   //   a. All sub-regions are uniform.
1252   //   b. There is one or less conditional branches among the direct children.
1253   return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1254 }
1255 
1256 void StructurizeCFG::init(Region *R) {
1257   LLVMContext &Context = R->getEntry()->getContext();
1258 
1259   Boolean = Type::getInt1Ty(Context);
1260   BoolTrue = ConstantInt::getTrue(Context);
1261   BoolFalse = ConstantInt::getFalse(Context);
1262   BoolPoison = PoisonValue::get(Boolean);
1263 
1264   this->UA = nullptr;
1265 }
1266 
1267 bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1268   if (R->isTopLevelRegion())
1269     return false;
1270 
1271   this->UA = &UA;
1272 
1273   // TODO: We could probably be smarter here with how we handle sub-regions.
1274   // We currently rely on the fact that metadata is set by earlier invocations
1275   // of the pass on sub-regions, and that this metadata doesn't get lost --
1276   // but we shouldn't rely on metadata for correctness!
1277   unsigned UniformMDKindID =
1278       R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1279 
1280   if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1281     LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1282                       << '\n');
1283 
1284     // Mark all direct child block terminators as having been treated as
1285     // uniform. To account for a possible future in which non-uniform
1286     // sub-regions are treated more cleverly, indirect children are not
1287     // marked as uniform.
1288     MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1289     for (RegionNode *E : R->elements()) {
1290       if (E->isSubRegion())
1291         continue;
1292 
1293       if (Instruction *Term = E->getEntry()->getTerminator())
1294         Term->setMetadata(UniformMDKindID, MD);
1295     }
1296 
1297     return true;
1298   }
1299   return false;
1300 }
1301 
1302 /// Run the transformation for each region found
1303 bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
1304   if (R->isTopLevelRegion())
1305     return false;
1306 
1307   this->DT = DT;
1308 
1309   Func = R->getEntry()->getParent();
1310   assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
1311 
1312   ParentRegion = R;
1313 
1314   orderNodes();
1315   collectInfos();
1316   createFlow();
1317   insertConditions(false);
1318   insertConditions(true);
1319   setPhiValues();
1320   simplifyConditions();
1321   simplifyAffectedPhis();
1322   rebuildSSA();
1323 
1324   // Cleanup
1325   Order.clear();
1326   Visited.clear();
1327   DeletedPhis.clear();
1328   AddedPhis.clear();
1329   Predicates.clear();
1330   Conditions.clear();
1331   Loops.clear();
1332   LoopPreds.clear();
1333   LoopConds.clear();
1334   FlowSet.clear();
1335   TermDL.clear();
1336 
1337   return true;
1338 }
1339 
1340 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1341   return new StructurizeCFGLegacyPass(SkipUniformRegions);
1342 }
1343 
1344 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1345   Regions.push_back(&R);
1346   for (const auto &E : R)
1347     addRegionIntoQueue(*E, Regions);
1348 }
1349 
1350 StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_)
1351     : SkipUniformRegions(SkipUniformRegions_) {
1352   if (ForceSkipUniformRegions.getNumOccurrences())
1353     SkipUniformRegions = ForceSkipUniformRegions.getValue();
1354 }
1355 
1356 void StructurizeCFGPass::printPipeline(
1357     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1358   static_cast<PassInfoMixin<StructurizeCFGPass> *>(this)->printPipeline(
1359       OS, MapClassName2PassName);
1360   if (SkipUniformRegions)
1361     OS << "<skip-uniform-regions>";
1362 }
1363 
1364 PreservedAnalyses StructurizeCFGPass::run(Function &F,
1365                                           FunctionAnalysisManager &AM) {
1366 
1367   bool Changed = false;
1368   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1369   auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1370 
1371   UniformityInfo *UI = nullptr;
1372   if (SkipUniformRegions)
1373     UI = &AM.getResult<UniformityInfoAnalysis>(F);
1374 
1375   std::vector<Region *> Regions;
1376   addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1377   while (!Regions.empty()) {
1378     Region *R = Regions.back();
1379     Regions.pop_back();
1380 
1381     StructurizeCFG SCFG;
1382     SCFG.init(R);
1383 
1384     if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
1385       Changed = true; // May have added metadata.
1386       continue;
1387     }
1388 
1389     Changed |= SCFG.run(R, DT);
1390   }
1391   if (!Changed)
1392     return PreservedAnalyses::all();
1393   PreservedAnalyses PA;
1394   PA.preserve<DominatorTreeAnalysis>();
1395   return PA;
1396 }
1397