xref: /llvm-project/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp (revision 94f9cbbe49b4c836cfbed046637cdc0c63a4a083)
1 //===- StructurizeCFG.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
10 #include "llvm/ADT/DenseMap.h"
11 #include "llvm/ADT/EquivalenceClasses.h"
12 #include "llvm/ADT/MapVector.h"
13 #include "llvm/ADT/SCCIterator.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/InstructionSimplify.h"
19 #include "llvm/Analysis/RegionInfo.h"
20 #include "llvm/Analysis/RegionIterator.h"
21 #include "llvm/Analysis/RegionPass.h"
22 #include "llvm/Analysis/UniformityAnalysis.h"
23 #include "llvm/IR/BasicBlock.h"
24 #include "llvm/IR/CFG.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/PassManager.h"
33 #include "llvm/IR/PatternMatch.h"
34 #include "llvm/IR/ProfDataUtils.h"
35 #include "llvm/IR/Type.h"
36 #include "llvm/IR/Use.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/IR/ValueHandle.h"
39 #include "llvm/InitializePasses.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include "llvm/Transforms/Scalar.h"
46 #include "llvm/Transforms/Utils.h"
47 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
48 #include "llvm/Transforms/Utils/Local.h"
49 #include "llvm/Transforms/Utils/SSAUpdater.h"
50 #include <cassert>
51 #include <utility>
52 
53 using namespace llvm;
54 using namespace llvm::PatternMatch;
55 
56 #define DEBUG_TYPE "structurizecfg"
57 
58 // The name for newly created blocks.
59 const char FlowBlockName[] = "Flow";
60 
61 namespace {
62 
63 static cl::opt<bool> ForceSkipUniformRegions(
64   "structurizecfg-skip-uniform-regions",
65   cl::Hidden,
66   cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
67   cl::init(false));
68 
69 static cl::opt<bool>
70     RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
71                           cl::desc("Allow relaxed uniform region checks"),
72                           cl::init(true));
73 
74 // Definition of the complex types used in this pass.
75 
76 using BBValuePair = std::pair<BasicBlock *, Value *>;
77 
78 using RNVector = SmallVector<RegionNode *, 8>;
79 using BBVector = SmallVector<BasicBlock *, 8>;
80 using BranchVector = SmallVector<BranchInst *, 8>;
81 using BBValueVector = SmallVector<BBValuePair, 2>;
82 
83 using BBSet = SmallPtrSet<BasicBlock *, 8>;
84 
85 using PhiMap = MapVector<PHINode *, BBValueVector>;
86 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
87 
88 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
89 
90 using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
91 
92 class CondBranchWeights {
93   uint32_t TrueWeight;
94   uint32_t FalseWeight;
95 
96   CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
97 
98 public:
99   static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
100     assert(Br.isConditional());
101 
102     uint64_t T, F;
103     if (!extractBranchWeights(Br, T, F))
104       return std::nullopt;
105 
106     return CondBranchWeights(T, F);
107   }
108 
109   static void setMetadata(BranchInst &Br,
110                           const MaybeCondBranchWeights &Weights) {
111     assert(Br.isConditional());
112     if (!Weights)
113       return;
114     uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
115     setBranchWeights(Br, Arr, false);
116   }
117 
118   CondBranchWeights invert() const {
119     return CondBranchWeights{FalseWeight, TrueWeight};
120   }
121 };
122 
123 using ValueWeightPair = std::pair<Value *, MaybeCondBranchWeights>;
124 
125 using BBPredicates = DenseMap<BasicBlock *, ValueWeightPair>;
126 using PredMap = DenseMap<BasicBlock *, BBPredicates>;
127 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
128 
129 using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>;
130 
131 // A traits type that is intended to be used in graph algorithms. The graph
132 // traits starts at an entry node, and traverses the RegionNodes that are in
133 // the Nodes set.
134 struct SubGraphTraits {
135   using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
136   using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
137 
138   // This wraps a set of Nodes into the iterator, so we know which edges to
139   // filter out.
140   class WrappedSuccIterator
141       : public iterator_adaptor_base<
142             WrappedSuccIterator, BaseSuccIterator,
143             typename std::iterator_traits<BaseSuccIterator>::iterator_category,
144             NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
145     SmallDenseSet<RegionNode *> *Nodes;
146 
147   public:
148     WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
149         : iterator_adaptor_base(It), Nodes(Nodes) {}
150 
151     NodeRef operator*() const { return {*I, Nodes}; }
152   };
153 
154   static bool filterAll(const NodeRef &N) { return true; }
155   static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
156 
157   using ChildIteratorType =
158       filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
159 
160   static NodeRef getEntryNode(Region *R) {
161     return {GraphTraits<Region *>::getEntryNode(R), nullptr};
162   }
163 
164   static NodeRef getEntryNode(NodeRef N) { return N; }
165 
166   static iterator_range<ChildIteratorType> children(const NodeRef &N) {
167     auto *filter = N.second ? &filterSet : &filterAll;
168     return make_filter_range(
169         make_range<WrappedSuccIterator>(
170             {GraphTraits<RegionNode *>::child_begin(N.first), N.second},
171             {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
172         filter);
173   }
174 
175   static ChildIteratorType child_begin(const NodeRef &N) {
176     return children(N).begin();
177   }
178 
179   static ChildIteratorType child_end(const NodeRef &N) {
180     return children(N).end();
181   }
182 };
183 
184 /// Finds the nearest common dominator of a set of BasicBlocks.
185 ///
186 /// For every BB you add to the set, you can specify whether we "remember" the
187 /// block.  When you get the common dominator, you can also ask whether it's one
188 /// of the blocks we remembered.
189 class NearestCommonDominator {
190   DominatorTree *DT;
191   BasicBlock *Result = nullptr;
192   bool ResultIsRemembered = false;
193 
194   /// Add BB to the resulting dominator.
195   void addBlock(BasicBlock *BB, bool Remember) {
196     if (!Result) {
197       Result = BB;
198       ResultIsRemembered = Remember;
199       return;
200     }
201 
202     BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
203     if (NewResult != Result)
204       ResultIsRemembered = false;
205     if (NewResult == BB)
206       ResultIsRemembered |= Remember;
207     Result = NewResult;
208   }
209 
210 public:
211   explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
212 
213   void addBlock(BasicBlock *BB) {
214     addBlock(BB, /* Remember = */ false);
215   }
216 
217   void addAndRememberBlock(BasicBlock *BB) {
218     addBlock(BB, /* Remember = */ true);
219   }
220 
221   /// Get the nearest common dominator of all the BBs added via addBlock() and
222   /// addAndRememberBlock().
223   BasicBlock *result() { return Result; }
224 
225   /// Is the BB returned by getResult() one of the blocks we added to the set
226   /// with addAndRememberBlock()?
227   bool resultIsRememberedBlock() { return ResultIsRemembered; }
228 };
229 
230 /// Transforms the control flow graph on one single entry/exit region
231 /// at a time.
232 ///
233 /// After the transform all "If"/"Then"/"Else" style control flow looks like
234 /// this:
235 ///
236 /// \verbatim
237 /// 1
238 /// ||
239 /// | |
240 /// 2 |
241 /// | /
242 /// |/
243 /// 3
244 /// ||   Where:
245 /// | |  1 = "If" block, calculates the condition
246 /// 4 |  2 = "Then" subregion, runs if the condition is true
247 /// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
248 /// |/   4 = "Else" optional subregion, runs if the condition is false
249 /// 5    5 = "End" block, also rejoins the control flow
250 /// \endverbatim
251 ///
252 /// Control flow is expressed as a branch where the true exit goes into the
253 /// "Then"/"Else" region, while the false exit skips the region
254 /// The condition for the optional "Else" region is expressed as a PHI node.
255 /// The incoming values of the PHI node are true for the "If" edge and false
256 /// for the "Then" edge.
257 ///
258 /// Additionally to that even complicated loops look like this:
259 ///
260 /// \verbatim
261 /// 1
262 /// ||
263 /// | |
264 /// 2 ^  Where:
265 /// | /  1 = "Entry" block
266 /// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
267 /// 3    3 = "Flow" block, with back edge to entry block
268 /// |
269 /// \endverbatim
270 ///
271 /// The back edge of the "Flow" block is always on the false side of the branch
272 /// while the true side continues the general flow. So the loop condition
273 /// consist of a network of PHI nodes where the true incoming values expresses
274 /// breaks and the false values expresses continue states.
275 
276 class StructurizeCFG {
277   Type *Boolean;
278   ConstantInt *BoolTrue;
279   ConstantInt *BoolFalse;
280   Value *BoolPoison;
281 
282   Function *Func;
283   Region *ParentRegion;
284 
285   UniformityInfo *UA = nullptr;
286   DominatorTree *DT;
287 
288   SmallVector<RegionNode *, 8> Order;
289   BBSet Visited;
290   BBSet FlowSet;
291 
292   SmallVector<WeakVH, 8> AffectedPhis;
293   BBPhiMap DeletedPhis;
294   BB2BBVecMap AddedPhis;
295 
296   PredMap Predicates;
297   BranchVector Conditions;
298 
299   BB2BBMap Loops;
300   PredMap LoopPreds;
301   BranchVector LoopConds;
302 
303   BranchDebugLocMap TermDL;
304 
305   RegionNode *PrevNode;
306 
307   void orderNodes();
308 
309   void analyzeLoops(RegionNode *N);
310 
311   ValueWeightPair buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
312 
313   void gatherPredicates(RegionNode *N);
314 
315   void collectInfos();
316 
317   void insertConditions(bool Loops);
318 
319   void simplifyConditions();
320 
321   void delPhiValues(BasicBlock *From, BasicBlock *To);
322 
323   void addPhiValues(BasicBlock *From, BasicBlock *To);
324 
325   void findUndefBlocks(BasicBlock *PHIBlock,
326                        const SmallSet<BasicBlock *, 8> &Incomings,
327                        SmallVector<BasicBlock *> &UndefBlks) const;
328 
329   void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A,
330                          PHINode *B);
331 
332   void setPhiValues();
333 
334   void simplifyAffectedPhis();
335 
336   void killTerminator(BasicBlock *BB);
337 
338   void changeExit(RegionNode *Node, BasicBlock *NewExit,
339                   bool IncludeDominator);
340 
341   BasicBlock *getNextFlow(BasicBlock *Dominator);
342 
343   BasicBlock *needPrefix(bool NeedEmpty);
344 
345   BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
346 
347   void setPrevNode(BasicBlock *BB);
348 
349   bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
350 
351   bool isPredictableTrue(RegionNode *Node);
352 
353   void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
354 
355   void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
356 
357   void createFlow();
358 
359   void rebuildSSA();
360 
361 public:
362   void init(Region *R);
363   bool run(Region *R, DominatorTree *DT);
364   bool makeUniformRegion(Region *R, UniformityInfo &UA);
365 };
366 
367 class StructurizeCFGLegacyPass : public RegionPass {
368   bool SkipUniformRegions;
369 
370 public:
371   static char ID;
372 
373   explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
374       : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
375     if (ForceSkipUniformRegions.getNumOccurrences())
376       SkipUniformRegions = ForceSkipUniformRegions.getValue();
377     initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry());
378   }
379 
380   bool runOnRegion(Region *R, RGPassManager &RGM) override {
381     StructurizeCFG SCFG;
382     SCFG.init(R);
383     if (SkipUniformRegions) {
384       UniformityInfo &UA =
385           getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
386       if (SCFG.makeUniformRegion(R, UA))
387         return false;
388     }
389     DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
390     return SCFG.run(R, DT);
391   }
392 
393   StringRef getPassName() const override { return "Structurize control flow"; }
394 
395   void getAnalysisUsage(AnalysisUsage &AU) const override {
396     if (SkipUniformRegions)
397       AU.addRequired<UniformityInfoWrapperPass>();
398     AU.addRequired<DominatorTreeWrapperPass>();
399 
400     AU.addPreserved<DominatorTreeWrapperPass>();
401     RegionPass::getAnalysisUsage(AU);
402   }
403 };
404 
405 } // end anonymous namespace
406 
407 char StructurizeCFGLegacyPass::ID = 0;
408 
409 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
410                       "Structurize the CFG", false, false)
411 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
412 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
413 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
414 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
415                     "Structurize the CFG", false, false)
416 
417 /// Build up the general order of nodes, by performing a topological sort of the
418 /// parent region's nodes, while ensuring that there is no outer cycle node
419 /// between any two inner cycle nodes.
420 void StructurizeCFG::orderNodes() {
421   Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
422                              GraphTraits<Region *>::nodes_end(ParentRegion)));
423   if (Order.empty())
424     return;
425 
426   SmallDenseSet<RegionNode *> Nodes;
427   auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
428 
429   // A list of range indices of SCCs in Order, to be processed.
430   SmallVector<std::pair<unsigned, unsigned>, 8> WorkList;
431   unsigned I = 0, E = Order.size();
432   while (true) {
433     // Run through all the SCCs in the subgraph starting with Entry.
434     for (auto SCCI =
435              scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin(
436                  EntryNode);
437          !SCCI.isAtEnd(); ++SCCI) {
438       auto &SCC = *SCCI;
439 
440       // An SCC up to the size of 2, can be reduced to an entry (the last node),
441       // and a possible additional node. Therefore, it is already in order, and
442       // there is no need to add it to the work-list.
443       unsigned Size = SCC.size();
444       if (Size > 2)
445         WorkList.emplace_back(I, I + Size);
446 
447       // Add the SCC nodes to the Order array.
448       for (const auto &N : SCC) {
449         assert(I < E && "SCC size mismatch!");
450         Order[I++] = N.first;
451       }
452     }
453     assert(I == E && "SCC size mismatch!");
454 
455     // If there are no more SCCs to order, then we are done.
456     if (WorkList.empty())
457       break;
458 
459     std::tie(I, E) = WorkList.pop_back_val();
460 
461     // Collect the set of nodes in the SCC's subgraph. These are only the
462     // possible child nodes; we do not add the entry (last node) otherwise we
463     // will have the same exact SCC all over again.
464     Nodes.clear();
465     Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
466 
467     // Update the entry node.
468     EntryNode.first = Order[E - 1];
469     EntryNode.second = &Nodes;
470   }
471 }
472 
473 /// Determine the end of the loops
474 void StructurizeCFG::analyzeLoops(RegionNode *N) {
475   if (N->isSubRegion()) {
476     // Test for exit as back edge
477     BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
478     if (Visited.count(Exit))
479       Loops[Exit] = N->getEntry();
480 
481   } else {
482     // Test for successors as back edge
483     BasicBlock *BB = N->getNodeAs<BasicBlock>();
484     BranchInst *Term = cast<BranchInst>(BB->getTerminator());
485 
486     for (BasicBlock *Succ : Term->successors())
487       if (Visited.count(Succ))
488         Loops[Succ] = BB;
489   }
490 }
491 
492 /// Build the condition for one edge
493 ValueWeightPair StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
494                                                bool Invert) {
495   Value *Cond = Invert ? BoolFalse : BoolTrue;
496   MaybeCondBranchWeights Weights;
497 
498   if (Term->isConditional()) {
499     Cond = Term->getCondition();
500     Weights = CondBranchWeights::tryParse(*Term);
501 
502     if (Idx != (unsigned)Invert) {
503       Cond = invertCondition(Cond);
504       if (Weights)
505         Weights = Weights->invert();
506     }
507   }
508   return {Cond, Weights};
509 }
510 
511 /// Analyze the predecessors of each block and build up predicates
512 void StructurizeCFG::gatherPredicates(RegionNode *N) {
513   RegionInfo *RI = ParentRegion->getRegionInfo();
514   BasicBlock *BB = N->getEntry();
515   BBPredicates &Pred = Predicates[BB];
516   BBPredicates &LPred = LoopPreds[BB];
517 
518   for (BasicBlock *P : predecessors(BB)) {
519     // Ignore it if it's a branch from outside into our region entry
520     if (!ParentRegion->contains(P))
521       continue;
522 
523     Region *R = RI->getRegionFor(P);
524     if (R == ParentRegion) {
525       // It's a top level block in our region
526       BranchInst *Term = cast<BranchInst>(P->getTerminator());
527       for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
528         BasicBlock *Succ = Term->getSuccessor(i);
529         if (Succ != BB)
530           continue;
531 
532         if (Visited.count(P)) {
533           // Normal forward edge
534           if (Term->isConditional()) {
535             // Try to treat it like an ELSE block
536             BasicBlock *Other = Term->getSuccessor(!i);
537             if (Visited.count(Other) && !Loops.count(Other) &&
538                 !Pred.count(Other) && !Pred.count(P)) {
539 
540               Pred[Other] = {BoolFalse, std::nullopt};
541               Pred[P] = {BoolTrue, std::nullopt};
542               continue;
543             }
544           }
545           Pred[P] = buildCondition(Term, i, false);
546         } else {
547           // Back edge
548           LPred[P] = buildCondition(Term, i, true);
549         }
550       }
551     } else {
552       // It's an exit from a sub region
553       while (R->getParent() != ParentRegion)
554         R = R->getParent();
555 
556       // Edge from inside a subregion to its entry, ignore it
557       if (*R == *N)
558         continue;
559 
560       BasicBlock *Entry = R->getEntry();
561       if (Visited.count(Entry))
562         Pred[Entry] = {BoolTrue, std::nullopt};
563       else
564         LPred[Entry] = {BoolFalse, std::nullopt};
565     }
566   }
567 }
568 
569 /// Collect various loop and predicate infos
570 void StructurizeCFG::collectInfos() {
571   // Reset predicate
572   Predicates.clear();
573 
574   // and loop infos
575   Loops.clear();
576   LoopPreds.clear();
577 
578   // Reset the visited nodes
579   Visited.clear();
580 
581   for (RegionNode *RN : reverse(Order)) {
582     LLVM_DEBUG(dbgs() << "Visiting: "
583                       << (RN->isSubRegion() ? "SubRegion with entry: " : "")
584                       << RN->getEntry()->getName() << "\n");
585 
586     // Analyze all the conditions leading to a node
587     gatherPredicates(RN);
588 
589     // Remember that we've seen this node
590     Visited.insert(RN->getEntry());
591 
592     // Find the last back edges
593     analyzeLoops(RN);
594   }
595 
596   // Reset the collected term debug locations
597   TermDL.clear();
598 
599   for (BasicBlock &BB : *Func) {
600     if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc())
601       TermDL[&BB] = DL;
602   }
603 }
604 
605 /// Insert the missing branch conditions
606 void StructurizeCFG::insertConditions(bool Loops) {
607   BranchVector &Conds = Loops ? LoopConds : Conditions;
608   Value *Default = Loops ? BoolTrue : BoolFalse;
609   SSAUpdater PhiInserter;
610 
611   for (BranchInst *Term : Conds) {
612     assert(Term->isConditional());
613 
614     BasicBlock *Parent = Term->getParent();
615     BasicBlock *SuccTrue = Term->getSuccessor(0);
616     BasicBlock *SuccFalse = Term->getSuccessor(1);
617 
618     PhiInserter.Initialize(Boolean, "");
619     PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
620     PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
621 
622     BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
623 
624     NearestCommonDominator Dominator(DT);
625     Dominator.addBlock(Parent);
626 
627     Value *ParentValue = nullptr;
628     MaybeCondBranchWeights ParentWeights = std::nullopt;
629     for (std::pair<BasicBlock *, ValueWeightPair> BBAndPred : Preds) {
630       BasicBlock *BB = BBAndPred.first;
631       auto [Pred, Weight] = BBAndPred.second;
632 
633       if (BB == Parent) {
634         ParentValue = Pred;
635         ParentWeights = Weight;
636         break;
637       }
638       PhiInserter.AddAvailableValue(BB, Pred);
639       Dominator.addAndRememberBlock(BB);
640     }
641 
642     if (ParentValue) {
643       Term->setCondition(ParentValue);
644       CondBranchWeights::setMetadata(*Term, ParentWeights);
645     } else {
646       if (!Dominator.resultIsRememberedBlock())
647         PhiInserter.AddAvailableValue(Dominator.result(), Default);
648 
649       Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
650     }
651   }
652 }
653 
654 /// Simplify any inverted conditions that were built by buildConditions.
655 void StructurizeCFG::simplifyConditions() {
656   SmallVector<Instruction *> InstToErase;
657   for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
658     auto &Preds = I.second;
659     for (auto &J : Preds) {
660       Value *Cond = J.second.first;
661       Instruction *Inverted;
662       if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
663           !Cond->use_empty()) {
664         if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
665           InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
666           Cond->replaceAllUsesWith(InvertedCmp);
667           InstToErase.push_back(cast<Instruction>(Cond));
668         }
669       }
670     }
671   }
672   for (auto *I : InstToErase)
673     I->eraseFromParent();
674 }
675 
676 /// Remove all PHI values coming from "From" into "To" and remember
677 /// them in DeletedPhis
678 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
679   PhiMap &Map = DeletedPhis[To];
680   for (PHINode &Phi : To->phis()) {
681     bool Recorded = false;
682     while (Phi.getBasicBlockIndex(From) != -1) {
683       Value *Deleted = Phi.removeIncomingValue(From, false);
684       Map[&Phi].push_back(std::make_pair(From, Deleted));
685       if (!Recorded) {
686         AffectedPhis.push_back(&Phi);
687         Recorded = true;
688       }
689     }
690   }
691 }
692 
693 /// Add a dummy PHI value as soon as we knew the new predecessor
694 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
695   for (PHINode &Phi : To->phis()) {
696     Value *Undef = UndefValue::get(Phi.getType());
697     Phi.addIncoming(Undef, From);
698   }
699   AddedPhis[To].push_back(From);
700 }
701 
702 /// When we are reconstructing a PHI inside \p PHIBlock with incoming values
703 /// from predecessors \p Incomings, we have a chance to mark the available value
704 /// from some blocks as undefined. The function will find out all such blocks
705 /// and return in \p UndefBlks.
706 void StructurizeCFG::findUndefBlocks(
707     BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings,
708     SmallVector<BasicBlock *> &UndefBlks) const {
709   //  We may get a post-structured CFG like below:
710   //
711   //  | P1
712   //  |/
713   //  F1
714   //  |\
715   //  | N
716   //  |/
717   //  F2
718   //  |\
719   //  | P2
720   //  |/
721   //  F3
722   //  |\
723   //  B
724   //
725   // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
726   // of B before structurization. F1/F2/F3 are flow blocks inserted during
727   // structurization process. Block N is not a predecessor of B before
728   // structurization, but are placed between the predecessors(P1/P2) of B after
729   // structurization. This usually means that threads went to N never take the
730   // path N->F2->F3->B. For example, the threads take the branch F1->N may
731   // always take the branch F2->P2. So, when we are reconstructing a PHI
732   // originally in B, we can safely say the incoming value from N is undefined.
733   SmallSet<BasicBlock *, 8> VisitedBlock;
734   SmallVector<BasicBlock *, 8> Stack;
735   if (PHIBlock == ParentRegion->getExit()) {
736     for (auto P : predecessors(PHIBlock)) {
737       if (ParentRegion->contains(P))
738         Stack.push_back(P);
739     }
740   } else {
741     append_range(Stack, predecessors(PHIBlock));
742   }
743 
744   // Do a backward traversal over the CFG, and stop further searching if
745   // the block is not a Flow. If a block is neither flow block nor the
746   // incoming predecessor, then the incoming value from the block is
747   // undefined value for the PHI being reconstructed.
748   while (!Stack.empty()) {
749     BasicBlock *Current = Stack.pop_back_val();
750     if (!VisitedBlock.insert(Current).second)
751       continue;
752 
753     if (FlowSet.contains(Current)) {
754       for (auto P : predecessors(Current))
755         Stack.push_back(P);
756     } else if (!Incomings.contains(Current)) {
757       UndefBlks.push_back(Current);
758     }
759   }
760 }
761 
762 // If two phi nodes have compatible incoming values (for each
763 // incoming block, either they have the same incoming value or only one phi
764 // node has an incoming value), let them share the merged incoming values. The
765 // merge process is guided by the equivalence information from \p PhiClasses.
766 // The function will possibly update the incoming values of leader phi in
767 // DeletedPhis.
768 void StructurizeCFG::mergeIfCompatible(
769     EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) {
770   auto ItA = PhiClasses.findLeader(PhiClasses.insert(A));
771   auto ItB = PhiClasses.findLeader(PhiClasses.insert(B));
772   // They are already in the same class, no work needed.
773   if (ItA == ItB)
774     return;
775 
776   PHINode *LeaderA = *ItA;
777   PHINode *LeaderB = *ItB;
778   BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA];
779   BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB];
780 
781   DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end());
782   for (auto [BB, V] : IncomingB) {
783     auto BBIt = Mergeable.find(BB);
784     if (BBIt != Mergeable.end() && BBIt->second != V)
785       return;
786     // Either IncomingA does not have this value or IncomingA has the same
787     // value.
788     Mergeable.insert({BB, V});
789   }
790 
791   // Update the incoming value of leaderA.
792   IncomingA.assign(Mergeable.begin(), Mergeable.end());
793   PhiClasses.unionSets(ItA, ItB);
794 }
795 
796 /// Add the real PHI value as soon as everything is set up
797 void StructurizeCFG::setPhiValues() {
798   SmallVector<PHINode *, 8> InsertedPhis;
799   SSAUpdater Updater(&InsertedPhis);
800   DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap;
801 
802   // Find phi nodes that have compatible incoming values (either they have
803   // the same value for the same block or only one phi node has an incoming
804   // value, see example below). We only search again the phi's that are
805   // referenced by another phi, which is the case we care about.
806   //
807   // For example (-- means no incoming value):
808   // phi1 : BB1:phi2   BB2:v  BB3:--
809   // phi2:  BB1:--     BB2:v  BB3:w
810   //
811   // Then we can merge these incoming values and let phi1, phi2 use the
812   // same set of incoming values:
813   //
814   // phi1&phi2: BB1:phi2  BB2:v  BB3:w
815   //
816   // By doing this, phi1 and phi2 would share more intermediate phi nodes.
817   // This would help reduce the number of phi nodes during SSA reconstruction
818   // and ultimately result in fewer COPY instructions.
819   //
820   // This should be correct, because if a phi node does not have incoming
821   // value from certain block, this means the block is not the predecessor
822   // of the parent block, so we actually don't care about its incoming value.
823   EquivalenceClasses<PHINode *> PhiClasses;
824   for (const auto &[To, From] : AddedPhis) {
825     auto OldPhiIt = DeletedPhis.find(To);
826     if (OldPhiIt == DeletedPhis.end())
827       continue;
828 
829     PhiMap &BlkPhis = OldPhiIt->second;
830     SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
831     SmallSet<BasicBlock *, 8> Incomings;
832 
833     // Get the undefined blocks shared by all the phi nodes.
834     if (!BlkPhis.empty()) {
835       for (const auto &VI : BlkPhis.front().second)
836         Incomings.insert(VI.first);
837       findUndefBlocks(To, Incomings, UndefBlks);
838     }
839 
840     for (const auto &[Phi, Incomings] : OldPhiIt->second) {
841       SmallVector<PHINode *> IncomingPHIs;
842       for (const auto &[BB, V] : Incomings) {
843         // First, for each phi, check whether it has incoming value which is
844         // another phi.
845         if (PHINode *P = dyn_cast<PHINode>(V))
846           IncomingPHIs.push_back(P);
847       }
848 
849       for (auto *OtherPhi : IncomingPHIs) {
850         // Skip phis that are unrelated to the phi reconstruction for now.
851         if (!DeletedPhis.contains(OtherPhi->getParent()))
852           continue;
853         mergeIfCompatible(PhiClasses, Phi, OtherPhi);
854       }
855     }
856   }
857 
858   for (const auto &AddedPhi : AddedPhis) {
859     BasicBlock *To = AddedPhi.first;
860     const BBVector &From = AddedPhi.second;
861 
862     if (!DeletedPhis.count(To))
863       continue;
864 
865     PhiMap &Map = DeletedPhis[To];
866     SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
867     for (const auto &[Phi, Incoming] : Map) {
868       Value *Undef = UndefValue::get(Phi->getType());
869       Updater.Initialize(Phi->getType(), "");
870       Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
871       Updater.AddAvailableValue(To, Undef);
872 
873       // Use leader phi's incoming if there is.
874       auto LeaderIt = PhiClasses.findLeader(Phi);
875       bool UseIncomingOfLeader =
876           LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi;
877       const auto &IncomingMap =
878           UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt]
879                               : Incoming;
880 
881       SmallVector<BasicBlock *> ConstantPreds;
882       for (const auto &[BB, V] : IncomingMap) {
883         Updater.AddAvailableValue(BB, V);
884         if (isa<Constant>(V))
885           ConstantPreds.push_back(BB);
886       }
887 
888       for (auto UB : UndefBlks) {
889         // If this undef block is dominated by any predecessor(before
890         // structurization) of reconstructed PHI with constant incoming value,
891         // don't mark the available value as undefined. Setting undef to such
892         // block will stop us from getting optimal phi insertion.
893         if (any_of(ConstantPreds,
894                    [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
895           continue;
896         // Maybe already get a value through sharing with other phi nodes.
897         if (Updater.HasValueForBlock(UB))
898           continue;
899 
900         Updater.AddAvailableValue(UB, Undef);
901       }
902 
903       for (BasicBlock *FI : From)
904         Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
905       AffectedPhis.push_back(Phi);
906     }
907   }
908 
909   AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
910 }
911 
912 void StructurizeCFG::simplifyAffectedPhis() {
913   bool Changed;
914   do {
915     Changed = false;
916     SimplifyQuery Q(Func->getDataLayout());
917     Q.DT = DT;
918     // Setting CanUseUndef to true might extend value liveness, set it to false
919     // to achieve better register pressure.
920     Q.CanUseUndef = false;
921     for (WeakVH VH : AffectedPhis) {
922       if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
923         if (auto NewValue = simplifyInstruction(Phi, Q)) {
924           Phi->replaceAllUsesWith(NewValue);
925           Phi->eraseFromParent();
926           Changed = true;
927         }
928       }
929     }
930   } while (Changed);
931 }
932 
933 /// Remove phi values from all successors and then remove the terminator.
934 void StructurizeCFG::killTerminator(BasicBlock *BB) {
935   Instruction *Term = BB->getTerminator();
936   if (!Term)
937     return;
938 
939   for (BasicBlock *Succ : successors(BB))
940     delPhiValues(BB, Succ);
941 
942   Term->eraseFromParent();
943 }
944 
945 /// Let node exit(s) point to NewExit
946 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
947                                 bool IncludeDominator) {
948   if (Node->isSubRegion()) {
949     Region *SubRegion = Node->getNodeAs<Region>();
950     BasicBlock *OldExit = SubRegion->getExit();
951     BasicBlock *Dominator = nullptr;
952 
953     // Find all the edges from the sub region to the exit.
954     // We use make_early_inc_range here because we modify BB's terminator.
955     for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
956       if (!SubRegion->contains(BB))
957         continue;
958 
959       // Modify the edges to point to the new exit
960       delPhiValues(BB, OldExit);
961       BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
962       addPhiValues(BB, NewExit);
963 
964       // Find the new dominator (if requested)
965       if (IncludeDominator) {
966         if (!Dominator)
967           Dominator = BB;
968         else
969           Dominator = DT->findNearestCommonDominator(Dominator, BB);
970       }
971     }
972 
973     // Change the dominator (if requested)
974     if (Dominator)
975       DT->changeImmediateDominator(NewExit, Dominator);
976 
977     // Update the region info
978     SubRegion->replaceExit(NewExit);
979   } else {
980     BasicBlock *BB = Node->getNodeAs<BasicBlock>();
981     killTerminator(BB);
982     BranchInst *Br = BranchInst::Create(NewExit, BB);
983     Br->setDebugLoc(TermDL[BB]);
984     addPhiValues(BB, NewExit);
985     if (IncludeDominator)
986       DT->changeImmediateDominator(NewExit, BB);
987   }
988 }
989 
990 /// Create a new flow node and update dominator tree and region info
991 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
992   LLVMContext &Context = Func->getContext();
993   BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
994                        Order.back()->getEntry();
995   BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
996                                         Func, Insert);
997   FlowSet.insert(Flow);
998 
999   // use a temporary variable to avoid a use-after-free if the map's storage is
1000   // reallocated
1001   DebugLoc DL = TermDL[Dominator];
1002   TermDL[Flow] = std::move(DL);
1003 
1004   DT->addNewBlock(Flow, Dominator);
1005   ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
1006   return Flow;
1007 }
1008 
1009 /// Create a new or reuse the previous node as flow node
1010 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
1011   BasicBlock *Entry = PrevNode->getEntry();
1012 
1013   if (!PrevNode->isSubRegion()) {
1014     killTerminator(Entry);
1015     if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
1016       return Entry;
1017   }
1018 
1019   // create a new flow node
1020   BasicBlock *Flow = getNextFlow(Entry);
1021 
1022   // and wire it up
1023   changeExit(PrevNode, Flow, true);
1024   PrevNode = ParentRegion->getBBNode(Flow);
1025   return Flow;
1026 }
1027 
1028 /// Returns the region exit if possible, otherwise just a new flow node
1029 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
1030                                         bool ExitUseAllowed) {
1031   if (!Order.empty() || !ExitUseAllowed)
1032     return getNextFlow(Flow);
1033 
1034   BasicBlock *Exit = ParentRegion->getExit();
1035   DT->changeImmediateDominator(Exit, Flow);
1036   addPhiValues(Flow, Exit);
1037   return Exit;
1038 }
1039 
1040 /// Set the previous node
1041 void StructurizeCFG::setPrevNode(BasicBlock *BB) {
1042   PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
1043                                         : nullptr;
1044 }
1045 
1046 /// Does BB dominate all the predicates of Node?
1047 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
1048   BBPredicates &Preds = Predicates[Node->getEntry()];
1049   return llvm::all_of(Preds,
1050                       [&](std::pair<BasicBlock *, ValueWeightPair> Pred) {
1051                         return DT->dominates(BB, Pred.first);
1052                       });
1053 }
1054 
1055 /// Can we predict that this node will always be called?
1056 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
1057   BBPredicates &Preds = Predicates[Node->getEntry()];
1058   bool Dominated = false;
1059 
1060   // Regionentry is always true
1061   if (!PrevNode)
1062     return true;
1063 
1064   for (std::pair<BasicBlock *, ValueWeightPair> Pred : Preds) {
1065     BasicBlock *BB = Pred.first;
1066     Value *V = Pred.second.first;
1067 
1068     if (V != BoolTrue)
1069       return false;
1070 
1071     if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
1072       Dominated = true;
1073   }
1074 
1075   // TODO: The dominator check is too strict
1076   return Dominated;
1077 }
1078 
1079 /// Take one node from the order vector and wire it up
1080 void StructurizeCFG::wireFlow(bool ExitUseAllowed,
1081                               BasicBlock *LoopEnd) {
1082   RegionNode *Node = Order.pop_back_val();
1083   Visited.insert(Node->getEntry());
1084 
1085   if (isPredictableTrue(Node)) {
1086     // Just a linear flow
1087     if (PrevNode) {
1088       changeExit(PrevNode, Node->getEntry(), true);
1089     }
1090     PrevNode = Node;
1091   } else {
1092     // Insert extra prefix node (or reuse last one)
1093     BasicBlock *Flow = needPrefix(false);
1094 
1095     // Insert extra postfix node (or use exit instead)
1096     BasicBlock *Entry = Node->getEntry();
1097     BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
1098 
1099     // let it point to entry and next block
1100     BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow);
1101     Br->setDebugLoc(TermDL[Flow]);
1102     Conditions.push_back(Br);
1103     addPhiValues(Flow, Entry);
1104     DT->changeImmediateDominator(Entry, Flow);
1105 
1106     PrevNode = Node;
1107     while (!Order.empty() && !Visited.count(LoopEnd) &&
1108            dominatesPredicates(Entry, Order.back())) {
1109       handleLoops(false, LoopEnd);
1110     }
1111 
1112     changeExit(PrevNode, Next, false);
1113     setPrevNode(Next);
1114   }
1115 }
1116 
1117 void StructurizeCFG::handleLoops(bool ExitUseAllowed,
1118                                  BasicBlock *LoopEnd) {
1119   RegionNode *Node = Order.back();
1120   BasicBlock *LoopStart = Node->getEntry();
1121 
1122   if (!Loops.count(LoopStart)) {
1123     wireFlow(ExitUseAllowed, LoopEnd);
1124     return;
1125   }
1126 
1127   if (!isPredictableTrue(Node))
1128     LoopStart = needPrefix(true);
1129 
1130   LoopEnd = Loops[Node->getEntry()];
1131   wireFlow(false, LoopEnd);
1132   while (!Visited.count(LoopEnd)) {
1133     handleLoops(false, LoopEnd);
1134   }
1135 
1136   assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
1137 
1138   // Create an extra loop end node
1139   LoopEnd = needPrefix(false);
1140   BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
1141   BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd);
1142   Br->setDebugLoc(TermDL[LoopEnd]);
1143   LoopConds.push_back(Br);
1144   addPhiValues(LoopEnd, LoopStart);
1145   setPrevNode(Next);
1146 }
1147 
1148 /// After this function control flow looks like it should be, but
1149 /// branches and PHI nodes only have undefined conditions.
1150 void StructurizeCFG::createFlow() {
1151   BasicBlock *Exit = ParentRegion->getExit();
1152   bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1153 
1154   AffectedPhis.clear();
1155   DeletedPhis.clear();
1156   AddedPhis.clear();
1157   Conditions.clear();
1158   LoopConds.clear();
1159 
1160   PrevNode = nullptr;
1161   Visited.clear();
1162 
1163   while (!Order.empty()) {
1164     handleLoops(EntryDominatesExit, nullptr);
1165   }
1166 
1167   if (PrevNode)
1168     changeExit(PrevNode, Exit, EntryDominatesExit);
1169   else
1170     assert(EntryDominatesExit);
1171 }
1172 
1173 /// Handle a rare case where the disintegrated nodes instructions
1174 /// no longer dominate all their uses. Not sure if this is really necessary
1175 void StructurizeCFG::rebuildSSA() {
1176   SSAUpdater Updater;
1177   for (BasicBlock *BB : ParentRegion->blocks())
1178     for (Instruction &I : *BB) {
1179       bool Initialized = false;
1180       // We may modify the use list as we iterate over it, so we use
1181       // make_early_inc_range.
1182       for (Use &U : llvm::make_early_inc_range(I.uses())) {
1183         Instruction *User = cast<Instruction>(U.getUser());
1184         if (User->getParent() == BB) {
1185           continue;
1186         } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1187           if (UserPN->getIncomingBlock(U) == BB)
1188             continue;
1189         }
1190 
1191         if (DT->dominates(&I, User))
1192           continue;
1193 
1194         if (!Initialized) {
1195           Value *Undef = UndefValue::get(I.getType());
1196           Updater.Initialize(I.getType(), "");
1197           Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
1198           Updater.AddAvailableValue(BB, &I);
1199           Initialized = true;
1200         }
1201         Updater.RewriteUseAfterInsertions(U);
1202       }
1203     }
1204 }
1205 
1206 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1207                                    const UniformityInfo &UA) {
1208   // Bool for if all sub-regions are uniform.
1209   bool SubRegionsAreUniform = true;
1210   // Count of how many direct children are conditional.
1211   unsigned ConditionalDirectChildren = 0;
1212 
1213   for (auto *E : R->elements()) {
1214     if (!E->isSubRegion()) {
1215       auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
1216       if (!Br || !Br->isConditional())
1217         continue;
1218 
1219       if (!UA.isUniform(Br))
1220         return false;
1221 
1222       // One of our direct children is conditional.
1223       ConditionalDirectChildren++;
1224 
1225       LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1226                         << " has uniform terminator\n");
1227     } else {
1228       // Explicitly refuse to treat regions as uniform if they have non-uniform
1229       // subregions. We cannot rely on UniformityAnalysis for branches in
1230       // subregions because those branches may have been removed and re-created,
1231       // so we look for our metadata instead.
1232       //
1233       // Warning: It would be nice to treat regions as uniform based only on
1234       // their direct child basic blocks' terminators, regardless of whether
1235       // subregions are uniform or not. However, this requires a very careful
1236       // look at SIAnnotateControlFlow to make sure nothing breaks there.
1237       for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1238         auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1239         if (!Br || !Br->isConditional())
1240           continue;
1241 
1242         if (!Br->getMetadata(UniformMDKindID)) {
1243           // Early exit if we cannot have relaxed uniform regions.
1244           if (!RelaxedUniformRegions)
1245             return false;
1246 
1247           SubRegionsAreUniform = false;
1248           break;
1249         }
1250       }
1251     }
1252   }
1253 
1254   // Our region is uniform if:
1255   // 1. All conditional branches that are direct children are uniform (checked
1256   // above).
1257   // 2. And either:
1258   //   a. All sub-regions are uniform.
1259   //   b. There is one or less conditional branches among the direct children.
1260   return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1261 }
1262 
1263 void StructurizeCFG::init(Region *R) {
1264   LLVMContext &Context = R->getEntry()->getContext();
1265 
1266   Boolean = Type::getInt1Ty(Context);
1267   BoolTrue = ConstantInt::getTrue(Context);
1268   BoolFalse = ConstantInt::getFalse(Context);
1269   BoolPoison = PoisonValue::get(Boolean);
1270 
1271   this->UA = nullptr;
1272 }
1273 
1274 bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1275   if (R->isTopLevelRegion())
1276     return false;
1277 
1278   this->UA = &UA;
1279 
1280   // TODO: We could probably be smarter here with how we handle sub-regions.
1281   // We currently rely on the fact that metadata is set by earlier invocations
1282   // of the pass on sub-regions, and that this metadata doesn't get lost --
1283   // but we shouldn't rely on metadata for correctness!
1284   unsigned UniformMDKindID =
1285       R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1286 
1287   if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1288     LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1289                       << '\n');
1290 
1291     // Mark all direct child block terminators as having been treated as
1292     // uniform. To account for a possible future in which non-uniform
1293     // sub-regions are treated more cleverly, indirect children are not
1294     // marked as uniform.
1295     MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1296     for (RegionNode *E : R->elements()) {
1297       if (E->isSubRegion())
1298         continue;
1299 
1300       if (Instruction *Term = E->getEntry()->getTerminator())
1301         Term->setMetadata(UniformMDKindID, MD);
1302     }
1303 
1304     return true;
1305   }
1306   return false;
1307 }
1308 
1309 /// Run the transformation for each region found
1310 bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
1311   if (R->isTopLevelRegion())
1312     return false;
1313 
1314   this->DT = DT;
1315 
1316   Func = R->getEntry()->getParent();
1317   assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
1318 
1319   ParentRegion = R;
1320 
1321   orderNodes();
1322   collectInfos();
1323   createFlow();
1324   insertConditions(false);
1325   insertConditions(true);
1326   setPhiValues();
1327   simplifyConditions();
1328   simplifyAffectedPhis();
1329   rebuildSSA();
1330 
1331   // Cleanup
1332   Order.clear();
1333   Visited.clear();
1334   DeletedPhis.clear();
1335   AddedPhis.clear();
1336   Predicates.clear();
1337   Conditions.clear();
1338   Loops.clear();
1339   LoopPreds.clear();
1340   LoopConds.clear();
1341   FlowSet.clear();
1342   TermDL.clear();
1343 
1344   return true;
1345 }
1346 
1347 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1348   return new StructurizeCFGLegacyPass(SkipUniformRegions);
1349 }
1350 
1351 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1352   Regions.push_back(&R);
1353   for (const auto &E : R)
1354     addRegionIntoQueue(*E, Regions);
1355 }
1356 
1357 StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_)
1358     : SkipUniformRegions(SkipUniformRegions_) {
1359   if (ForceSkipUniformRegions.getNumOccurrences())
1360     SkipUniformRegions = ForceSkipUniformRegions.getValue();
1361 }
1362 
1363 void StructurizeCFGPass::printPipeline(
1364     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1365   static_cast<PassInfoMixin<StructurizeCFGPass> *>(this)->printPipeline(
1366       OS, MapClassName2PassName);
1367   if (SkipUniformRegions)
1368     OS << "<skip-uniform-regions>";
1369 }
1370 
1371 PreservedAnalyses StructurizeCFGPass::run(Function &F,
1372                                           FunctionAnalysisManager &AM) {
1373 
1374   bool Changed = false;
1375   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1376   auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1377 
1378   UniformityInfo *UI = nullptr;
1379   if (SkipUniformRegions)
1380     UI = &AM.getResult<UniformityInfoAnalysis>(F);
1381 
1382   std::vector<Region *> Regions;
1383   addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1384   while (!Regions.empty()) {
1385     Region *R = Regions.back();
1386     Regions.pop_back();
1387 
1388     StructurizeCFG SCFG;
1389     SCFG.init(R);
1390 
1391     if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
1392       Changed = true; // May have added metadata.
1393       continue;
1394     }
1395 
1396     Changed |= SCFG.run(R, DT);
1397   }
1398   if (!Changed)
1399     return PreservedAnalyses::all();
1400   PreservedAnalyses PA;
1401   PA.preserve<DominatorTreeAnalysis>();
1402   return PA;
1403 }
1404