xref: /llvm-project/mlir/lib/Transforms/Utils/CFGToSCF.cpp (revision a9304edf20756dd63f896a98bad89e9eac54aebd)
1 //===- CFGToSCF.h - Control Flow Graph to Structured Control Flow *- C++ -*===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This code is an implementation of:
10 // Helge Bahmann, Nico Reissmann, Magnus Jahre, and Jan Christian Meyer. 2015.
11 // Perfect Reconstructability of Control Flow from Demand Dependence Graphs. ACM
12 // Trans. Archit. Code Optim. 11, 4, Article 66 (January 2015), 25 pages.
13 // https://doi.org/10.1145/2693261
14 //
15 // It defines an algorithm to translate any control flow graph with a single
16 // entry and single exit block into structured control flow operations
17 // consisting of regions of do-while loops and operations conditionally
18 // dispatching to one out of multiple regions before continuing after the
19 // operation. This includes control flow graphs containing irreducible
20 // control flow.
21 //
22 // The implementation here additionally supports the transformation on
23 // regions with multiple exit blocks. This is implemented by first
24 // transforming all occurrences of return-like operations to branch to a
25 // single exit block containing an instance of that return-like operation.
26 // If there are multiple kinds of return-like operations, multiple exit
27 // blocks are created. In that case the transformation leaves behind a
28 // conditional control flow graph operation that dispatches to the given regions
29 // terminating with different kinds of return-like operations each.
30 //
31 // If the function only contains a single kind of return-like operations,
32 // it is guaranteed that all control flow graph ops will be lifted to structured
33 // control flow, and that no more control flow graph ops remain after the
34 // operation.
35 //
36 // The algorithm to lift CFGs consists of two transformations applied after each
37 // other on any single-entry, single-exit region:
38 // 1) Lifting cycles to structured control flow loops
39 // 2) Lifting conditional branches to structured control flow branches
40 // These are then applied recursively on any new single-entry single-exit
41 // regions created by the transformation until no more CFG operations remain.
42 //
43 // The first part of cycle lifting is to detect any cycles in the CFG.
44 // This is done using an algorithm for iterating over SCCs. Every SCC
45 // representing a cycle is then transformed into a structured loop with a single
46 // entry block and a single latch containing the only back edge to the entry
47 // block and the only edge to an exit block outside the loop. Rerouting control
48 // flow to create single entry and exit blocks is achieved via a multiplexer
49 // construct that can be visualized as follows:
50 //                         +-----+ +-----+   +-----+
51 //                         | bb0 | | bb1 |...| bbN |
52 //                         +--+--+ +--+--+   +-+---+
53 //                            |       |        |
54 //                            |       v        |
55 //                            |  +------+      |
56 //                            | ++      ++<----+
57 //                            | | Region |
58 //                            +>|        |<----+
59 //                              ++      ++     |
60 //                               +------+------+
61 //
62 // The above transforms to:
63 //                         +-----+ +-----+   +-----+
64 //                         | bb0 | | bb1 |...| bbN |
65 //                         +-----+ +--|--+   ++----+
66 //                              |     v       |
67 //                              +->+-----+<---+
68 //                                 | bbM |<-------+
69 //                                 +---+-+        |
70 //                             +---+   | +----+   |
71 //                             |       v      |   |
72 //                             |   +------+   |   |
73 //                             |  ++      ++<-+   |
74 //                             +->| Region |      |
75 //                                ++      ++      |
76 //                                 +------+-------+
77 //
78 // bbM in the above is the multiplexer block, and any block previously branching
79 // to an entry block of the region are redirected to it. This includes any
80 // branches from within the region. Using a block argument, bbM then dispatches
81 // to the correct entry block of the region dependent on the predecessor.
82 //
83 // A similar transformation is done to create the latch block with the single
84 // back edge and loop exit edge.
85 //
86 // The above form has the advantage that bbM now acts as the loop header
87 // of the loop body. After the transformation on the latch, this results in a
88 // structured loop that can then be lifted to structured control flow. The
89 // conditional branches created in bbM are later lifted to conditional
90 // branches.
91 //
92 // Lifting conditional branches is done by analyzing the *first* conditional
93 // branch encountered in the entry region. The algorithm then identifies
94 // all blocks that are dominated by a specific control flow edge and
95 // the region where control flow continues:
96 //                                 +-----+
97 //                           +-----+ bb0 +----+
98 //                           v     +-----+    v
99 //                Region 1 +-+-+    ...     +-+-+ Region n
100 //                         +---+            +---+
101 //                          ...              ...
102 //                           |                |
103 //                           |      +---+     |
104 //                           +---->++   ++<---+
105 //                                 |     |
106 //                                 ++   ++ Region T
107 //                                  +---+
108 // Every region following bb0 consists of 0 or more blocks that eventually
109 // branch to Region T. If there are multiple entry blocks into Region T, a
110 // single entry block is created using a multiplexer block as shown above.
111 // Region 1 to Region n are then lifted together with the conditional control
112 // flow operation terminating bb0 into a structured conditional operation
113 // followed by the operations of the entry block of Region T.
114 //===----------------------------------------------------------------------===//
115 
116 #include "mlir/Transforms/CFGToSCF.h"
117 
118 #include "mlir/IR/RegionGraphTraits.h"
119 #include "mlir/Interfaces/ControlFlowInterfaces.h"
120 #include "mlir/Interfaces/SideEffectInterfaces.h"
121 #include "llvm/ADT/DepthFirstIterator.h"
122 #include "llvm/ADT/MapVector.h"
123 #include "llvm/ADT/SCCIterator.h"
124 #include "llvm/ADT/SetVector.h"
125 #include "llvm/ADT/SmallPtrSet.h"
126 
127 using namespace mlir;
128 
129 /// Returns the mutable operand range used to transfer operands from `block` to
130 /// its successor with the given index. The returned range being mutable allows
131 /// us to modify the operands being transferred.
132 static MutableOperandRange
getMutableSuccessorOperands(Block * block,unsigned successorIndex)133 getMutableSuccessorOperands(Block *block, unsigned successorIndex) {
134   auto branchOpInterface = cast<BranchOpInterface>(block->getTerminator());
135   SuccessorOperands succOps =
136       branchOpInterface.getSuccessorOperands(successorIndex);
137   return succOps.getMutableForwardedOperands();
138 }
139 
140 /// Return the operand range used to transfer operands from `block` to its
141 /// successor with the given index.
getSuccessorOperands(Block * block,unsigned successorIndex)142 static OperandRange getSuccessorOperands(Block *block,
143                                          unsigned successorIndex) {
144   return getMutableSuccessorOperands(block, successorIndex);
145 }
146 
147 /// Appends all the block arguments from `other` to the block arguments of
148 /// `block`, copying their types and locations.
addBlockArgumentsFromOther(Block * block,Block * other)149 static void addBlockArgumentsFromOther(Block *block, Block *other) {
150   for (BlockArgument arg : other->getArguments())
151     block->addArgument(arg.getType(), arg.getLoc());
152 }
153 
154 namespace {
155 
156 /// Class representing an edge in the CFG. Consists of a from-block, a successor
157 /// and corresponding successor operands passed to the block arguments of the
158 /// successor.
159 class Edge {
160   Block *fromBlock;
161   unsigned successorIndex;
162 
163 public:
164   /// Constructs a new edge from `fromBlock` to the successor corresponding to
165   /// `successorIndex`.
Edge(Block * fromBlock,unsigned int successorIndex)166   Edge(Block *fromBlock, unsigned int successorIndex)
167       : fromBlock(fromBlock), successorIndex(successorIndex) {}
168 
169   /// Returns the from-block.
getFromBlock() const170   Block *getFromBlock() const { return fromBlock; }
171 
172   /// Returns the successor of the edge.
getSuccessor() const173   Block *getSuccessor() const {
174     return fromBlock->getSuccessor(successorIndex);
175   }
176 
177   /// Sets the successor of the edge, adjusting the terminator in the
178   /// from-block.
setSuccessor(Block * block) const179   void setSuccessor(Block *block) const {
180     fromBlock->getTerminator()->setSuccessor(block, successorIndex);
181   }
182 
183   /// Returns the arguments of this edge that are passed to the block arguments
184   /// of the successor.
getMutableSuccessorOperands() const185   MutableOperandRange getMutableSuccessorOperands() const {
186     return ::getMutableSuccessorOperands(fromBlock, successorIndex);
187   }
188 
189   /// Returns the arguments of this edge that are passed to the block arguments
190   /// of the successor.
getSuccessorOperands() const191   OperandRange getSuccessorOperands() const {
192     return ::getSuccessorOperands(fromBlock, successorIndex);
193   }
194 };
195 
196 /// Structure containing the entry, exit and back edges of a cycle. A cycle is a
197 /// generalization of a loop that may have multiple entry edges. See also
198 /// https://llvm.org/docs/CycleTerminology.html.
199 struct CycleEdges {
200   /// All edges from a block outside the cycle to a block inside the cycle.
201   /// The targets of these edges are entry blocks.
202   SmallVector<Edge> entryEdges;
203   /// All edges from a block inside the cycle to a block outside the cycle.
204   SmallVector<Edge> exitEdges;
205   /// All edges from a block inside the cycle to an entry block.
206   SmallVector<Edge> backEdges;
207 };
208 
209 /// Class used to orchestrate creation of so-called edge multiplexers.
210 /// This class creates a new basic block and routes all inputs edges
211 /// to this basic block before branching to their original target.
212 /// The purpose of this transformation is to create single-entry,
213 /// single-exit regions.
214 class EdgeMultiplexer {
215 public:
216   /// Creates a new edge multiplexer capable of redirecting all edges to one of
217   /// the `entryBlocks`. This creates the multiplexer basic block with
218   /// appropriate block arguments after the first entry block. `extraArgs`
219   /// contains the types of possible extra block arguments passed to the
220   /// multiplexer block that are added to the successor operands of every
221   /// outgoing edge.
222   ///
223   /// NOTE: This does not yet redirect edges to branch to the
224   /// multiplexer block nor code dispatching from the multiplexer code
225   /// to the original successors.
226   /// See `redirectEdge` and `createSwitch`.
create(Location loc,ArrayRef<Block * > entryBlocks,function_ref<Value (unsigned)> getSwitchValue,function_ref<Value (Type)> getUndefValue,TypeRange extraArgs={})227   static EdgeMultiplexer create(Location loc, ArrayRef<Block *> entryBlocks,
228                                 function_ref<Value(unsigned)> getSwitchValue,
229                                 function_ref<Value(Type)> getUndefValue,
230                                 TypeRange extraArgs = {}) {
231     assert(!entryBlocks.empty() && "Require at least one entry block");
232 
233     auto *multiplexerBlock = new Block;
234     multiplexerBlock->insertAfter(entryBlocks.front());
235 
236     // To implement the multiplexer block, we have to add the block arguments of
237     // every distinct successor block to the multiplexer block. When redirecting
238     // edges, block arguments designated for blocks that aren't branched to will
239     // be assigned the `getUndefValue`. The amount of block arguments and their
240     // offset is saved in the map for `redirectEdge` to transform the edges.
241     llvm::SmallMapVector<Block *, unsigned, 4> blockArgMapping;
242     for (Block *entryBlock : entryBlocks) {
243       auto [iter, inserted] = blockArgMapping.insert(
244           {entryBlock, multiplexerBlock->getNumArguments()});
245       if (inserted)
246         addBlockArgumentsFromOther(multiplexerBlock, entryBlock);
247     }
248 
249     // If we have more than one successor, we have to additionally add a
250     // discriminator value, denoting which successor to jump to.
251     // When redirecting edges, an appropriate value will be passed using
252     // `getSwitchValue`.
253     Value discriminator;
254     if (blockArgMapping.size() > 1)
255       discriminator =
256           multiplexerBlock->addArgument(getSwitchValue(0).getType(), loc);
257 
258     multiplexerBlock->addArguments(
259         extraArgs, SmallVector<Location>(extraArgs.size(), loc));
260 
261     return EdgeMultiplexer(multiplexerBlock, getSwitchValue, getUndefValue,
262                            std::move(blockArgMapping), discriminator);
263   }
264 
265   /// Returns the created multiplexer block.
getMultiplexerBlock() const266   Block *getMultiplexerBlock() const { return multiplexerBlock; }
267 
268   /// Redirects `edge` to branch to the multiplexer block before continuing to
269   /// its original target. The edges successor must have originally been part
270   /// of the entry blocks array passed to the `create` function. `extraArgs`
271   /// must be used to pass along any additional values corresponding to
272   /// `extraArgs` in `create`.
redirectEdge(Edge edge,ValueRange extraArgs={}) const273   void redirectEdge(Edge edge, ValueRange extraArgs = {}) const {
274     const auto *result = blockArgMapping.find(edge.getSuccessor());
275     assert(result != blockArgMapping.end() &&
276            "Edge was not originally passed to `create` method.");
277 
278     MutableOperandRange successorOperands = edge.getMutableSuccessorOperands();
279 
280     // Extra arguments are always appended at the end of the block arguments.
281     unsigned extraArgsBeginIndex =
282         multiplexerBlock->getNumArguments() - extraArgs.size();
283     // If a discriminator exists, it is right before the extra arguments.
284     std::optional<unsigned> discriminatorIndex =
285         discriminator ? extraArgsBeginIndex - 1 : std::optional<unsigned>{};
286 
287     SmallVector<Value> newSuccOperands(multiplexerBlock->getNumArguments());
288     for (BlockArgument argument : multiplexerBlock->getArguments()) {
289       unsigned index = argument.getArgNumber();
290       if (index >= result->second &&
291           index < result->second + edge.getSuccessor()->getNumArguments()) {
292         // Original block arguments to the entry block.
293         newSuccOperands[index] =
294             successorOperands[index - result->second].get();
295         continue;
296       }
297 
298       // Discriminator value if it exists.
299       if (index == discriminatorIndex) {
300         newSuccOperands[index] =
301             getSwitchValue(result - blockArgMapping.begin());
302         continue;
303       }
304 
305       // Followed by the extra arguments.
306       if (index >= extraArgsBeginIndex) {
307         newSuccOperands[index] = extraArgs[index - extraArgsBeginIndex];
308         continue;
309       }
310 
311       // Otherwise undef values for any unused block arguments used by other
312       // entry blocks.
313       newSuccOperands[index] = getUndefValue(argument.getType());
314     }
315 
316     edge.setSuccessor(multiplexerBlock);
317     successorOperands.assign(newSuccOperands);
318   }
319 
320   /// Creates a switch op using `builder` which dispatches to the original
321   /// successors of the edges passed to `create` minus the ones in `excluded`.
322   /// The builder's insertion point has to be in a block dominated by the
323   /// multiplexer block. All edges to the multiplexer block must have already
324   /// been redirected using `redirectEdge`.
325   void createSwitch(
326       Location loc, OpBuilder &builder, CFGToSCFInterface &interface,
327       const SmallPtrSetImpl<Block *> &excluded = SmallPtrSet<Block *, 1>{}) {
328     // We create the switch by creating a case for all entries and then
329     // splitting of the last entry as a default case.
330 
331     SmallVector<ValueRange> caseArguments;
332     SmallVector<unsigned> caseValues;
333     SmallVector<Block *> caseDestinations;
334     for (auto &&[index, pair] : llvm::enumerate(blockArgMapping)) {
335       auto &&[succ, offset] = pair;
336       if (excluded.contains(succ))
337         continue;
338 
339       caseValues.push_back(index);
340       caseArguments.push_back(multiplexerBlock->getArguments().slice(
341           offset, succ->getNumArguments()));
342       caseDestinations.push_back(succ);
343     }
344 
345     // If we don't have a discriminator due to only having one entry we have to
346     // create a dummy flag for the switch.
347     Value realDiscriminator = discriminator;
348     if (!realDiscriminator || caseArguments.size() == 1)
349       realDiscriminator = getSwitchValue(0);
350 
351     caseValues.pop_back();
352     Block *defaultDest = caseDestinations.pop_back_val();
353     ValueRange defaultArgs = caseArguments.pop_back_val();
354 
355     assert(!builder.getInsertionBlock()->hasNoPredecessors() &&
356            "Edges need to be redirected prior to creating switch.");
357     interface.createCFGSwitchOp(loc, builder, realDiscriminator, caseValues,
358                                 caseDestinations, caseArguments, defaultDest,
359                                 defaultArgs);
360   }
361 
362 private:
363   /// Newly created multiplexer block.
364   Block *multiplexerBlock;
365   /// Callback used to create a constant suitable as flag for
366   /// the interfaces `createCFGSwitchOp`.
367   function_ref<Value(unsigned)> getSwitchValue;
368   /// Callback used to create undefined values of a given type.
369   function_ref<Value(Type)> getUndefValue;
370 
371   /// Mapping of the block arguments of an entry block to the corresponding
372   /// block arguments in the multiplexer block. Block arguments of an entry
373   /// block are simply appended ot the multiplexer block. This map simply
374   /// contains the offset to the range in the multiplexer block.
375   llvm::SmallMapVector<Block *, unsigned, 4> blockArgMapping;
376   /// Discriminator value used in the multiplexer block to dispatch to the
377   /// correct entry block. Null value if not required due to only having one
378   /// entry block.
379   Value discriminator;
380 
EdgeMultiplexer(Block * multiplexerBlock,function_ref<Value (unsigned)> getSwitchValue,function_ref<Value (Type)> getUndefValue,llvm::SmallMapVector<Block *,unsigned,4> && entries,Value dispatchFlag)381   EdgeMultiplexer(Block *multiplexerBlock,
382                   function_ref<Value(unsigned)> getSwitchValue,
383                   function_ref<Value(Type)> getUndefValue,
384                   llvm::SmallMapVector<Block *, unsigned, 4> &&entries,
385                   Value dispatchFlag)
386       : multiplexerBlock(multiplexerBlock), getSwitchValue(getSwitchValue),
387         getUndefValue(getUndefValue), blockArgMapping(std::move(entries)),
388         discriminator(dispatchFlag) {}
389 };
390 
391 /// Alternative implementation of DenseMapInfo<Operation*> using the operation
392 /// equivalence infrastructure to check whether two 'return-like' operations are
393 /// equivalent in the context of this transformation. This means that both
394 /// operations are of the same kind, have the same amount of operands and types
395 /// and the same attributes and properties. The operands themselves don't have
396 /// to be equivalent.
397 struct ReturnLikeOpEquivalence : public llvm::DenseMapInfo<Operation *> {
getHashValue__anoncfa425e90111::ReturnLikeOpEquivalence398   static unsigned getHashValue(const Operation *opC) {
399     return OperationEquivalence::computeHash(
400         const_cast<Operation *>(opC),
401         /*hashOperands=*/OperationEquivalence::ignoreHashValue,
402         /*hashResults=*/OperationEquivalence::ignoreHashValue,
403         OperationEquivalence::IgnoreLocations);
404   }
405 
isEqual__anoncfa425e90111::ReturnLikeOpEquivalence406   static bool isEqual(const Operation *lhs, const Operation *rhs) {
407     if (lhs == rhs)
408       return true;
409     if (lhs == getTombstoneKey() || lhs == getEmptyKey() ||
410         rhs == getTombstoneKey() || rhs == getEmptyKey())
411       return false;
412     return OperationEquivalence::isEquivalentTo(
413         const_cast<Operation *>(lhs), const_cast<Operation *>(rhs),
414         OperationEquivalence::ignoreValueEquivalence, nullptr,
415         OperationEquivalence::IgnoreLocations);
416   }
417 };
418 
419 /// Utility-class for transforming a region to only have one single block for
420 /// every return-like operation.
421 class ReturnLikeExitCombiner {
422 public:
ReturnLikeExitCombiner(Region & topLevelRegion,CFGToSCFInterface & interface)423   ReturnLikeExitCombiner(Region &topLevelRegion, CFGToSCFInterface &interface)
424       : topLevelRegion(topLevelRegion), interface(interface) {}
425 
426   /// Transforms `returnLikeOp` to a branch to the only block in the
427   /// region with an instance of `returnLikeOp`s kind.
combineExit(Operation * returnLikeOp,function_ref<Value (unsigned)> getSwitchValue)428   void combineExit(Operation *returnLikeOp,
429                    function_ref<Value(unsigned)> getSwitchValue) {
430     auto [iter, inserted] =
431         returnLikeToCombinedExit.insert({returnLikeOp, nullptr});
432     if (!inserted && iter->first == returnLikeOp)
433       return;
434 
435     Block *exitBlock = iter->second;
436     if (inserted) {
437       exitBlock = new Block;
438       iter->second = exitBlock;
439       topLevelRegion.push_back(exitBlock);
440       exitBlock->addArguments(
441           returnLikeOp->getOperandTypes(),
442           SmallVector<Location>(returnLikeOp->getNumOperands(),
443                                 returnLikeOp->getLoc()));
444     }
445 
446     auto builder = OpBuilder::atBlockTerminator(returnLikeOp->getBlock());
447     interface.createSingleDestinationBranch(returnLikeOp->getLoc(), builder,
448                                             getSwitchValue(0), exitBlock,
449                                             returnLikeOp->getOperands());
450 
451     if (!inserted) {
452       returnLikeOp->erase();
453       return;
454     }
455 
456     returnLikeOp->moveBefore(exitBlock, exitBlock->end());
457     returnLikeOp->setOperands(exitBlock->getArguments());
458   }
459 
460 private:
461   /// Mapping of return-like operation to block. All return-like operations
462   /// of the same kind with the same attributes, properties and types are seen
463   /// as equivalent. First occurrence seen is kept in the map.
464   llvm::SmallDenseMap<Operation *, Block *, 4, ReturnLikeOpEquivalence>
465       returnLikeToCombinedExit;
466   Region &topLevelRegion;
467   CFGToSCFInterface &interface;
468 };
469 
470 } // namespace
471 
472 /// Returns a range of all edges from `block` to each of its successors.
successorEdges(Block * block)473 static auto successorEdges(Block *block) {
474   return llvm::map_range(llvm::seq(block->getNumSuccessors()),
475                          [=](unsigned index) { return Edge(block, index); });
476 }
477 
478 /// Calculates entry, exit and back edges of the given cycle.
479 static CycleEdges
calculateCycleEdges(const llvm::SmallSetVector<Block *,4> & cycles)480 calculateCycleEdges(const llvm::SmallSetVector<Block *, 4> &cycles) {
481   CycleEdges result;
482   SmallPtrSet<Block *, 8> entryBlocks;
483 
484   // First identify all exit and entry edges by checking whether any successors
485   // or predecessors are from outside the cycles.
486   for (Block *block : cycles) {
487     for (auto pred = block->pred_begin(); pred != block->pred_end(); pred++) {
488       if (cycles.contains(*pred))
489         continue;
490 
491       result.entryEdges.emplace_back(*pred, pred.getSuccessorIndex());
492       entryBlocks.insert(block);
493     }
494 
495     for (auto &&[succIndex, succ] : llvm::enumerate(block->getSuccessors())) {
496       if (cycles.contains(succ))
497         continue;
498 
499       result.exitEdges.emplace_back(block, succIndex);
500     }
501   }
502 
503   // With the entry blocks identified, find all the back edges.
504   for (Block *block : cycles) {
505     for (auto &&[succIndex, succ] : llvm::enumerate(block->getSuccessors())) {
506       if (!entryBlocks.contains(succ))
507         continue;
508 
509       result.backEdges.emplace_back(block, succIndex);
510     }
511   }
512 
513   return result;
514 }
515 
516 /// Creates a single entry block out of multiple entry edges using an edge
517 /// multiplexer and returns it.
518 static EdgeMultiplexer
createSingleEntryBlock(Location loc,ArrayRef<Edge> entryEdges,function_ref<Value (unsigned)> getSwitchValue,function_ref<Value (Type)> getUndefValue,CFGToSCFInterface & interface)519 createSingleEntryBlock(Location loc, ArrayRef<Edge> entryEdges,
520                        function_ref<Value(unsigned)> getSwitchValue,
521                        function_ref<Value(Type)> getUndefValue,
522                        CFGToSCFInterface &interface) {
523   auto result = EdgeMultiplexer::create(
524       loc, llvm::map_to_vector(entryEdges, std::mem_fn(&Edge::getSuccessor)),
525       getSwitchValue, getUndefValue);
526 
527   // Redirect the edges prior to creating the switch op.
528   // We guarantee that predecessors are up to date.
529   for (Edge edge : entryEdges)
530     result.redirectEdge(edge);
531 
532   auto builder = OpBuilder::atBlockBegin(result.getMultiplexerBlock());
533   result.createSwitch(loc, builder, interface);
534 
535   return result;
536 }
537 
538 namespace {
539 /// Special loop properties of a structured loop.
540 /// A structured loop is a loop satisfying all of the following:
541 /// * Has at most one entry, one exit and one back edge.
542 /// * The back edge originates from the same block as the exit edge.
543 struct StructuredLoopProperties {
544   /// Block containing both the single exit edge and the single back edge.
545   Block *latch;
546   /// Loop condition of type equal to a value returned by `getSwitchValue`.
547   Value condition;
548   /// Exit block which is the only successor of the loop.
549   Block *exitBlock;
550 };
551 } // namespace
552 
553 /// Transforms a loop into a structured loop with only a single back edge and
554 /// exiting edge, originating from the same block.
createSingleExitingLatch(Location loc,ArrayRef<Edge> backEdges,ArrayRef<Edge> exitEdges,function_ref<Value (unsigned)> getSwitchValue,function_ref<Value (Type)> getUndefValue,CFGToSCFInterface & interface,ReturnLikeExitCombiner & exitCombiner)555 static FailureOr<StructuredLoopProperties> createSingleExitingLatch(
556     Location loc, ArrayRef<Edge> backEdges, ArrayRef<Edge> exitEdges,
557     function_ref<Value(unsigned)> getSwitchValue,
558     function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface,
559     ReturnLikeExitCombiner &exitCombiner) {
560   assert(llvm::all_equal(
561              llvm::map_range(backEdges, std::mem_fn(&Edge::getSuccessor))) &&
562          "All repetition edges must lead to the single loop header");
563 
564   // First create the multiplexer block, which will be our latch, for all back
565   // edges and exit edges. We pass an additional argument to the multiplexer
566   // block which indicates whether the latch was reached from what was
567   // originally a back edge or an exit block.
568   // This is later used to branch using the new only back edge.
569   SmallVector<Block *> successors;
570   llvm::append_range(
571       successors, llvm::map_range(backEdges, std::mem_fn(&Edge::getSuccessor)));
572   llvm::append_range(
573       successors, llvm::map_range(exitEdges, std::mem_fn(&Edge::getSuccessor)));
574   auto multiplexer =
575       EdgeMultiplexer::create(loc, successors, getSwitchValue, getUndefValue,
576                               /*extraArgs=*/getSwitchValue(0).getType());
577 
578   auto *latchBlock = multiplexer.getMultiplexerBlock();
579 
580   // Create a separate exit block that comes right after the latch.
581   auto *exitBlock = new Block;
582   exitBlock->insertAfter(latchBlock);
583 
584   // Since this is a loop, all back edges point to the same loop header.
585   Block *loopHeader = backEdges.front().getSuccessor();
586 
587   // Redirect the edges prior to creating the switch op.
588   // We guarantee that predecessors are up to date.
589 
590   // Redirecting back edges with `shouldRepeat` as 1.
591   for (Edge backEdge : backEdges)
592     multiplexer.redirectEdge(backEdge, /*extraArgs=*/getSwitchValue(1));
593 
594   // Redirecting exits edges with `shouldRepeat` as 0.
595   for (Edge exitEdge : exitEdges)
596     multiplexer.redirectEdge(exitEdge, /*extraArgs=*/getSwitchValue(0));
597 
598   // Create the new only back edge to the loop header. Branch to the
599   // exit block otherwise.
600   Value shouldRepeat = latchBlock->getArguments().back();
601   {
602     auto builder = OpBuilder::atBlockBegin(latchBlock);
603     interface.createConditionalBranch(
604         loc, builder, shouldRepeat, loopHeader,
605         latchBlock->getArguments().take_front(loopHeader->getNumArguments()),
606         /*falseDest=*/exitBlock,
607         /*falseArgs=*/{});
608   }
609 
610   {
611     auto builder = OpBuilder::atBlockBegin(exitBlock);
612     if (!exitEdges.empty()) {
613       // Create the switch dispatching to what were originally the multiple exit
614       // blocks. The loop header has to explicitly be excluded in the below
615       // switch as we would otherwise be creating a new loop again. All back
616       // edges leading to the loop header have already been handled in the
617       // switch above. The remaining edges can only jump to blocks outside the
618       // loop.
619 
620       SmallPtrSet<Block *, 1> excluded = {loopHeader};
621       multiplexer.createSwitch(loc, builder, interface, excluded);
622     } else {
623       // A loop without an exit edge is a statically known infinite loop.
624       // Since structured control flow ops are not terminator ops, the caller
625       // has to create a fitting return-like unreachable terminator operation.
626       FailureOr<Operation *> terminator = interface.createUnreachableTerminator(
627           loc, builder, *latchBlock->getParent());
628       if (failed(terminator))
629         return failure();
630       // Transform the just created transform operation in the case that an
631       // occurrence of it existed in input IR.
632       exitCombiner.combineExit(*terminator, getSwitchValue);
633     }
634   }
635 
636   return StructuredLoopProperties{latchBlock, /*condition=*/shouldRepeat,
637                                   exitBlock};
638 }
639 
640 /// Transforms a structured loop into a loop in reduce form.
641 ///
642 /// Reduce form is defined as a structured loop where:
643 /// (0) No values defined within the loop body are used outside the loop body.
644 /// (1) The block arguments and successor operands of the exit block are equal
645 ///     to the block arguments of the loop header and the successor operands
646 ///     of the back edge.
647 ///
648 /// This is required for many structured control flow ops as they tend
649 /// to not have separate "loop result arguments" and "loop iteration arguments"
650 /// at the end of the block. Rather, the "loop iteration arguments" from the
651 /// last iteration are the result of the loop.
652 ///
653 /// Note that the requirement of (0) is shared with LCSSA form in LLVM. However,
654 /// due to this being a structured loop instead of a general loop, we do not
655 /// require complicated dominance algorithms nor SSA updating making this
656 /// implementation easier than creating a generic LCSSA transformation pass.
657 static SmallVector<Value>
transformToReduceLoop(Block * loopHeader,Block * exitBlock,const llvm::SmallSetVector<Block *,4> & loopBlocks,function_ref<Value (Type)> getUndefValue,DominanceInfo & dominanceInfo)658 transformToReduceLoop(Block *loopHeader, Block *exitBlock,
659                       const llvm::SmallSetVector<Block *, 4> &loopBlocks,
660                       function_ref<Value(Type)> getUndefValue,
661                       DominanceInfo &dominanceInfo) {
662   Block *latch = exitBlock->getSinglePredecessor();
663   assert(latch &&
664          "Exit block must have only latch as predecessor at this point");
665   assert(exitBlock->getNumArguments() == 0 &&
666          "Exit block mustn't have any block arguments at this point");
667 
668   unsigned loopHeaderIndex = 0;
669   unsigned exitBlockIndex = 1;
670   if (latch->getSuccessor(loopHeaderIndex) != loopHeader)
671     std::swap(loopHeaderIndex, exitBlockIndex);
672 
673   assert(latch->getSuccessor(loopHeaderIndex) == loopHeader);
674   assert(latch->getSuccessor(exitBlockIndex) == exitBlock);
675 
676   MutableOperandRange exitBlockSuccessorOperands =
677       getMutableSuccessorOperands(latch, exitBlockIndex);
678   // Save the values as a vector, not a `MutableOperandRange` as the latter gets
679   // invalidated when mutating the operands through a different
680   // `MutableOperandRange` of the same operation.
681   SmallVector<Value> loopHeaderSuccessorOperands =
682       llvm::to_vector(getSuccessorOperands(latch, loopHeaderIndex));
683 
684   // Add all values used in the next iteration to the exit block. Replace
685   // any uses that are outside the loop with the newly created exit block.
686   for (Value arg : loopHeaderSuccessorOperands) {
687     BlockArgument exitArg = exitBlock->addArgument(arg.getType(), arg.getLoc());
688     exitBlockSuccessorOperands.append(arg);
689     arg.replaceUsesWithIf(exitArg, [&](OpOperand &use) {
690       return !loopBlocks.contains(use.getOwner()->getBlock());
691     });
692   }
693 
694   // Loop below might add block arguments to the latch and loop header.
695   // Save the block arguments prior to the loop to not process these.
696   SmallVector<BlockArgument> latchBlockArgumentsPrior =
697       llvm::to_vector(latch->getArguments());
698   SmallVector<BlockArgument> loopHeaderArgumentsPrior =
699       llvm::to_vector(loopHeader->getArguments());
700 
701   // Go over all values defined within the loop body. If any of them are used
702   // outside the loop body, create a block argument on the exit block and loop
703   // header and replace the outside uses with the exit block argument.
704   // The loop header block argument is added to satisfy requirement (1) in the
705   // reduce form condition.
706   for (Block *loopBlock : loopBlocks) {
707     // Cache dominance queries for loopBlock.
708     // There are likely to be many duplicate queries as there can be many value
709     // definitions within a block.
710     llvm::SmallDenseMap<Block *, bool> dominanceCache;
711     // Returns true if `loopBlock` dominates `block`.
712     auto loopBlockDominates = [&](Block *block) {
713       auto [iter, inserted] = dominanceCache.insert({block, false});
714       if (!inserted)
715         return iter->second;
716       iter->second = dominanceInfo.dominates(loopBlock, block);
717       return iter->second;
718     };
719 
720     auto checkValue = [&](Value value) {
721       Value blockArgument;
722       for (OpOperand &use : llvm::make_early_inc_range(value.getUses())) {
723         // Go through all the parent blocks and find the one part of the region
724         // of the loop. If the block is part of the loop, then the value does
725         // not escape the loop through this use.
726         Block *currBlock = use.getOwner()->getBlock();
727         while (currBlock && currBlock->getParent() != loopHeader->getParent())
728           currBlock = currBlock->getParentOp()->getBlock();
729         if (loopBlocks.contains(currBlock))
730           continue;
731 
732         // Block argument is only created the first time it is required.
733         if (!blockArgument) {
734           blockArgument =
735               exitBlock->addArgument(value.getType(), value.getLoc());
736           loopHeader->addArgument(value.getType(), value.getLoc());
737 
738           // `value` might be defined in a block that does not dominate `latch`
739           // but previously dominated an exit block with a use.
740           // In this case, add a block argument to the latch and go through all
741           // predecessors. If the value dominates the predecessor, pass the
742           // value as a successor operand, otherwise pass undef.
743           // The above is unnecessary if the value is a block argument of the
744           // latch or if `value` dominates all predecessors.
745           Value argument = value;
746           if (value.getParentBlock() != latch &&
747               llvm::any_of(latch->getPredecessors(), [&](Block *pred) {
748                 return !loopBlockDominates(pred);
749               })) {
750             argument = latch->addArgument(value.getType(), value.getLoc());
751             for (auto iter = latch->pred_begin(); iter != latch->pred_end();
752                  ++iter) {
753               Value succOperand = value;
754               if (!loopBlockDominates(*iter))
755                 succOperand = getUndefValue(value.getType());
756 
757               getMutableSuccessorOperands(*iter, iter.getSuccessorIndex())
758                   .append(succOperand);
759             }
760           }
761 
762           loopHeaderSuccessorOperands.push_back(argument);
763           for (Edge edge : successorEdges(latch))
764             edge.getMutableSuccessorOperands().append(argument);
765         }
766 
767         use.set(blockArgument);
768       }
769     };
770 
771     if (loopBlock == latch)
772       llvm::for_each(latchBlockArgumentsPrior, checkValue);
773     else if (loopBlock == loopHeader)
774       llvm::for_each(loopHeaderArgumentsPrior, checkValue);
775     else
776       llvm::for_each(loopBlock->getArguments(), checkValue);
777 
778     for (Operation &op : *loopBlock)
779       llvm::for_each(op.getResults(), checkValue);
780   }
781 
782   // New block arguments may have been added to the loop header.
783   // Adjust the entry edges to pass undef values to these.
784   for (auto iter = loopHeader->pred_begin(); iter != loopHeader->pred_end();
785        ++iter) {
786     // Latch successor arguments have already been handled.
787     if (*iter == latch)
788       continue;
789 
790     MutableOperandRange succOps =
791         getMutableSuccessorOperands(*iter, iter.getSuccessorIndex());
792     succOps.append(llvm::map_to_vector(
793         loopHeader->getArguments().drop_front(succOps.size()),
794         [&](BlockArgument arg) { return getUndefValue(arg.getType()); }));
795   }
796 
797   return loopHeaderSuccessorOperands;
798 }
799 
800 /// Transforms all outer-most cycles in the region with the region entry
801 /// `regionEntry` into structured loops. Returns the entry blocks of any newly
802 /// created regions potentially requiring further transformations.
transformCyclesToSCFLoops(Block * regionEntry,function_ref<Value (unsigned)> getSwitchValue,function_ref<Value (Type)> getUndefValue,CFGToSCFInterface & interface,DominanceInfo & dominanceInfo,ReturnLikeExitCombiner & exitCombiner)803 static FailureOr<SmallVector<Block *>> transformCyclesToSCFLoops(
804     Block *regionEntry, function_ref<Value(unsigned)> getSwitchValue,
805     function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface,
806     DominanceInfo &dominanceInfo, ReturnLikeExitCombiner &exitCombiner) {
807   SmallVector<Block *> newSubRegions;
808   auto scc = llvm::scc_begin(regionEntry);
809   while (!scc.isAtEnd()) {
810     if (!scc.hasCycle()) {
811       ++scc;
812       continue;
813     }
814 
815     // Save the set and increment the SCC iterator early to avoid our
816     // modifications breaking the SCC iterator.
817     llvm::SmallSetVector<Block *, 4> cycleBlockSet(scc->begin(), scc->end());
818     ++scc;
819 
820     CycleEdges edges = calculateCycleEdges(cycleBlockSet);
821     Block *loopHeader = edges.entryEdges.front().getSuccessor();
822     // First turn the cycle into a loop by creating a single entry block if
823     // needed.
824     if (edges.entryEdges.size() > 1) {
825       SmallVector<Edge> edgesToEntryBlocks;
826       llvm::append_range(edgesToEntryBlocks, edges.entryEdges);
827       llvm::append_range(edgesToEntryBlocks, edges.backEdges);
828 
829       EdgeMultiplexer multiplexer = createSingleEntryBlock(
830           loopHeader->getTerminator()->getLoc(), edgesToEntryBlocks,
831           getSwitchValue, getUndefValue, interface);
832 
833       loopHeader = multiplexer.getMultiplexerBlock();
834     }
835     cycleBlockSet.insert(loopHeader);
836 
837     // Then turn it into a structured loop by creating a single latch.
838     FailureOr<StructuredLoopProperties> loopProperties =
839         createSingleExitingLatch(
840             edges.backEdges.front().getFromBlock()->getTerminator()->getLoc(),
841             edges.backEdges, edges.exitEdges, getSwitchValue, getUndefValue,
842             interface, exitCombiner);
843     if (failed(loopProperties))
844       return failure();
845 
846     Block *latchBlock = loopProperties->latch;
847     Block *exitBlock = loopProperties->exitBlock;
848     cycleBlockSet.insert(latchBlock);
849     cycleBlockSet.insert(loopHeader);
850 
851     // Finally, turn it into reduce form.
852     SmallVector<Value> iterationValues = transformToReduceLoop(
853         loopHeader, exitBlock, cycleBlockSet, getUndefValue, dominanceInfo);
854 
855     // Create a block acting as replacement for the loop header and insert
856     // the structured loop into it.
857     auto *newLoopParentBlock = new Block;
858     newLoopParentBlock->insertBefore(loopHeader);
859     addBlockArgumentsFromOther(newLoopParentBlock, loopHeader);
860 
861     Region::BlockListType &blocks = regionEntry->getParent()->getBlocks();
862     Region loopBody;
863     // Make sure the loop header is the entry block.
864     loopBody.push_back(blocks.remove(loopHeader));
865     for (Block *block : cycleBlockSet)
866       if (block != latchBlock && block != loopHeader)
867         loopBody.push_back(blocks.remove(block));
868     // And the latch is the last block.
869     loopBody.push_back(blocks.remove(latchBlock));
870 
871     Operation *oldTerminator = latchBlock->getTerminator();
872     oldTerminator->remove();
873 
874     auto builder = OpBuilder::atBlockBegin(newLoopParentBlock);
875     FailureOr<Operation *> structuredLoopOp =
876         interface.createStructuredDoWhileLoopOp(
877             builder, oldTerminator, newLoopParentBlock->getArguments(),
878             loopProperties->condition, iterationValues, std::move(loopBody));
879     if (failed(structuredLoopOp))
880       return failure();
881     oldTerminator->erase();
882 
883     newSubRegions.push_back(loopHeader);
884 
885     for (auto &&[oldValue, newValue] : llvm::zip(
886              exitBlock->getArguments(), (*structuredLoopOp)->getResults()))
887       oldValue.replaceAllUsesWith(newValue);
888 
889     loopHeader->replaceAllUsesWith(newLoopParentBlock);
890     // Merge the exit block right after the loop operation.
891     newLoopParentBlock->getOperations().splice(newLoopParentBlock->end(),
892                                                exitBlock->getOperations());
893     exitBlock->erase();
894   }
895   return newSubRegions;
896 }
897 
898 /// Makes sure the branch region only has a single exit. This is required by the
899 /// recursive part of the algorithm, as it expects the CFG to be single-entry
900 /// and single-exit. This is done by simply creating an empty block if there
901 /// is more than one block with an edge to the continuation block. All blocks
902 /// with edges to the continuation are then redirected to this block. A region
903 /// terminator is later placed into the block.
createSingleExitBranchRegion(ArrayRef<Block * > branchRegion,Block * continuation,SmallVectorImpl<std::pair<Block *,SmallVector<Value>>> & createdEmptyBlocks,Region & conditionalRegion)904 static void createSingleExitBranchRegion(
905     ArrayRef<Block *> branchRegion, Block *continuation,
906     SmallVectorImpl<std::pair<Block *, SmallVector<Value>>> &createdEmptyBlocks,
907     Region &conditionalRegion) {
908   Block *singleExitBlock = nullptr;
909   std::optional<Edge> previousEdgeToContinuation;
910   Region::BlockListType &parentBlockList =
911       branchRegion.front()->getParent()->getBlocks();
912   for (Block *block : branchRegion) {
913     for (Edge edge : successorEdges(block)) {
914       if (edge.getSuccessor() != continuation)
915         continue;
916 
917       if (!previousEdgeToContinuation) {
918         previousEdgeToContinuation = edge;
919         continue;
920       }
921 
922       // If this is not the first edge to the continuation we create the
923       // single exit block and redirect the edges.
924       if (!singleExitBlock) {
925         singleExitBlock = new Block;
926         addBlockArgumentsFromOther(singleExitBlock, continuation);
927         previousEdgeToContinuation->setSuccessor(singleExitBlock);
928         createdEmptyBlocks.emplace_back(singleExitBlock,
929                                         singleExitBlock->getArguments());
930       }
931 
932       edge.setSuccessor(singleExitBlock);
933     }
934 
935     conditionalRegion.push_back(parentBlockList.remove(block));
936   }
937 
938   if (singleExitBlock)
939     conditionalRegion.push_back(singleExitBlock);
940 }
941 
942 /// Returns true if this block is an exit block of the region.
isRegionExitBlock(Block * block)943 static bool isRegionExitBlock(Block *block) {
944   return block->getNumSuccessors() == 0;
945 }
946 
947 /// Transforms the first occurrence of conditional control flow in `regionEntry`
948 /// into conditionally executed regions. Returns the entry block of the created
949 /// regions and the region after the conditional control flow.
transformToStructuredCFBranches(Block * regionEntry,function_ref<Value (unsigned)> getSwitchValue,function_ref<Value (Type)> getUndefValue,CFGToSCFInterface & interface,DominanceInfo & dominanceInfo)950 static FailureOr<SmallVector<Block *>> transformToStructuredCFBranches(
951     Block *regionEntry, function_ref<Value(unsigned)> getSwitchValue,
952     function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface,
953     DominanceInfo &dominanceInfo) {
954   // Trivial region.
955   if (regionEntry->getNumSuccessors() == 0)
956     return SmallVector<Block *>{};
957 
958   if (regionEntry->getNumSuccessors() == 1) {
959     // Single successor we can just splice together.
960     Block *successor = regionEntry->getSuccessor(0);
961     for (auto &&[oldValue, newValue] : llvm::zip(
962              successor->getArguments(), getSuccessorOperands(regionEntry, 0)))
963       oldValue.replaceAllUsesWith(newValue);
964     regionEntry->getTerminator()->erase();
965 
966     regionEntry->getOperations().splice(regionEntry->end(),
967                                         successor->getOperations());
968     successor->erase();
969     return SmallVector<Block *>{regionEntry};
970   }
971 
972   // Split the CFG into "#numSuccessor + 1" regions.
973   // For every edge to a successor, the blocks it solely dominates are
974   // determined and become the region following that edge.
975   // The last region is the continuation that follows the branch regions.
976   SmallPtrSet<Block *, 8> notContinuation;
977   notContinuation.insert(regionEntry);
978   SmallVector<SmallVector<Block *>> successorBranchRegions(
979       regionEntry->getNumSuccessors());
980   for (auto &&[blockList, succ] :
981        llvm::zip(successorBranchRegions, regionEntry->getSuccessors())) {
982     // If the region entry is not the only predecessor, then the edge does not
983     // dominate the block it leads to.
984     if (succ->getSinglePredecessor() != regionEntry)
985       continue;
986 
987     // Otherwise get all blocks it dominates in DFS/pre-order.
988     DominanceInfoNode *node = dominanceInfo.getNode(succ);
989     for (DominanceInfoNode *curr : llvm::depth_first(node)) {
990       blockList.push_back(curr->getBlock());
991       notContinuation.insert(curr->getBlock());
992     }
993   }
994 
995   // Finds all relevant edges and checks the shape of the control flow graph at
996   // this point.
997   // Branch regions may either:
998   // * Be post-dominated by the continuation
999   // * Be post-dominated by a return-like op
1000   // * Dominate a return-like op and have an edge to the continuation.
1001   //
1002   // The control flow graph may then be one of three cases:
1003   // 1) All branch regions are post-dominated by the continuation. This is the
1004   // usual case. If there are multiple entry blocks into the continuation a
1005   // single entry block has to be created. A structured control flow op
1006   // can then be created from the branch regions.
1007   //
1008   // 2) No branch region has an edge to a continuation:
1009   //                                 +-----+
1010   //                           +-----+ bb0 +----+
1011   //                           v     +-----+    v
1012   //                Region 1 +-+--+    ...     +-+--+ Region n
1013   //                         |ret1|            |ret2|
1014   //                         +----+            +----+
1015   //
1016   // This can only occur if every region ends with a different kind of
1017   // return-like op. In that case the control flow operation must stay as we are
1018   // unable to create a single exit-block. We can nevertheless process all its
1019   // successors as they single-entry, single-exit regions.
1020   //
1021   // 3) Only some branch regions are post-dominated by the continuation.
1022   // The other branch regions may either be post-dominated by a return-like op
1023   // or lead to either the continuation or return-like op.
1024   // In this case we also create a single entry block like in 1) that also
1025   // includes all edges to the return-like op:
1026   //                                 +-----+
1027   //                           +-----+ bb0 +----+
1028   //                           v     +-----+    v
1029   //             Region 1    +-+-+    ...     +-+-+ Region n
1030   //                         +---+            +---+
1031   //                  +---+  |...              ...
1032   //                  |ret|<-+ |                |
1033   //                  +---+    |      +---+     |
1034   //                           +---->++   ++<---+
1035   //                                 |     |
1036   //                                 ++   ++ Region T
1037   //                                  +---+
1038   // This transforms to:
1039   //                                 +-----+
1040   //                           +-----+ bb0 +----+
1041   //                           v     +-----+    v
1042   //                Region 1 +-+-+    ...     +-+-+ Region n
1043   //                         +---+            +---+
1044   //                          ...    +-----+   ...
1045   //                           +---->+ bbM +<---+
1046   //                                 +-----+
1047   //                           +-----+  |
1048   //                           |        v
1049   //                  +---+    |      +---+
1050   //                  |ret+<---+     ++   ++
1051   //                  +---+          |     |
1052   //                                 ++   ++ Region T
1053   //                                  +---+
1054   //
1055   // bb0 to bbM is now a single-entry, single-exit region that applies to case
1056   // 1). The control flow op at the end of bbM will trigger case 2.
1057   SmallVector<Edge> continuationEdges;
1058   bool continuationPostDominatesAllRegions = true;
1059   bool noSuccessorHasContinuationEdge = true;
1060   for (auto &&[entryEdge, branchRegion] :
1061        llvm::zip(successorEdges(regionEntry), successorBranchRegions)) {
1062 
1063     // If the branch region is empty then the branch target itself is part of
1064     // the continuation.
1065     if (branchRegion.empty()) {
1066       continuationEdges.push_back(entryEdge);
1067       noSuccessorHasContinuationEdge = false;
1068       continue;
1069     }
1070 
1071     for (Block *block : branchRegion) {
1072       if (isRegionExitBlock(block)) {
1073         // If a return-like op is part of the branch region then the
1074         // continuation no longer post-dominates the branch region.
1075         // Add all its incoming edges to edge list to create the single-exit
1076         // block for all branch regions.
1077         continuationPostDominatesAllRegions = false;
1078         for (auto iter = block->pred_begin(); iter != block->pred_end();
1079              ++iter) {
1080           continuationEdges.emplace_back(*iter, iter.getSuccessorIndex());
1081         }
1082         continue;
1083       }
1084 
1085       for (Edge edge : successorEdges(block)) {
1086         if (notContinuation.contains(edge.getSuccessor()))
1087           continue;
1088 
1089         continuationEdges.push_back(edge);
1090         noSuccessorHasContinuationEdge = false;
1091       }
1092     }
1093   }
1094 
1095   // case 2) Keep the control flow op but process its successors further.
1096   if (noSuccessorHasContinuationEdge)
1097     return llvm::to_vector(regionEntry->getSuccessors());
1098 
1099   Block *continuation = llvm::find_singleton<Block>(
1100       continuationEdges, [](Edge edge, bool) { return edge.getSuccessor(); },
1101       /*AllowRepeats=*/true);
1102 
1103   // In case 3) or if not all continuation edges have the same entry block,
1104   // create a single entry block as continuation for all branch regions.
1105   if (!continuation || !continuationPostDominatesAllRegions) {
1106     EdgeMultiplexer multiplexer = createSingleEntryBlock(
1107         continuationEdges.front().getFromBlock()->getTerminator()->getLoc(),
1108         continuationEdges, getSwitchValue, getUndefValue, interface);
1109     continuation = multiplexer.getMultiplexerBlock();
1110   }
1111 
1112   // Trigger reprocess of case 3) after creating the single entry block.
1113   if (!continuationPostDominatesAllRegions) {
1114     // Unlike in the general case, we are explicitly revisiting the same region
1115     // entry again after having changed its control flow edges and dominance.
1116     // We have to therefore explicitly invalidate the dominance tree.
1117     dominanceInfo.invalidate(regionEntry->getParent());
1118     return SmallVector<Block *>{regionEntry};
1119   }
1120 
1121   SmallVector<Block *> newSubRegions;
1122 
1123   // Empty blocks with the values they return to the parent op.
1124   SmallVector<std::pair<Block *, SmallVector<Value>>> createdEmptyBlocks;
1125 
1126   // Create the branch regions.
1127   std::vector<Region> conditionalRegions(successorBranchRegions.size());
1128   for (auto &&[branchRegion, entryEdge, conditionalRegion] :
1129        llvm::zip(successorBranchRegions, successorEdges(regionEntry),
1130                  conditionalRegions)) {
1131     if (branchRegion.empty()) {
1132       // If no block is part of the branch region, we create a dummy block to
1133       // place the region terminator into.
1134       createdEmptyBlocks.emplace_back(
1135           new Block, llvm::to_vector(entryEdge.getSuccessorOperands()));
1136       conditionalRegion.push_back(createdEmptyBlocks.back().first);
1137       continue;
1138     }
1139 
1140     createSingleExitBranchRegion(branchRegion, continuation, createdEmptyBlocks,
1141                                  conditionalRegion);
1142 
1143     // The entries of the branch regions may only have redundant block arguments
1144     // since the edge to the branch region is always dominating.
1145     Block *subRegionEntryBlock = &conditionalRegion.front();
1146     for (auto &&[oldValue, newValue] :
1147          llvm::zip(subRegionEntryBlock->getArguments(),
1148                    entryEdge.getSuccessorOperands()))
1149       oldValue.replaceAllUsesWith(newValue);
1150 
1151     subRegionEntryBlock->eraseArguments(0,
1152                                         subRegionEntryBlock->getNumArguments());
1153     newSubRegions.push_back(subRegionEntryBlock);
1154   }
1155 
1156   Operation *structuredCondOp;
1157   {
1158     auto opBuilder = OpBuilder::atBlockTerminator(regionEntry);
1159     FailureOr<Operation *> result = interface.createStructuredBranchRegionOp(
1160         opBuilder, regionEntry->getTerminator(),
1161         continuation->getArgumentTypes(), conditionalRegions);
1162     if (failed(result))
1163       return failure();
1164     structuredCondOp = *result;
1165     regionEntry->getTerminator()->erase();
1166   }
1167 
1168   for (auto &&[block, valueRange] : createdEmptyBlocks) {
1169     auto builder = OpBuilder::atBlockEnd(block);
1170     LogicalResult result = interface.createStructuredBranchRegionTerminatorOp(
1171         structuredCondOp->getLoc(), builder, structuredCondOp, nullptr,
1172         valueRange);
1173     if (failed(result))
1174       return failure();
1175   }
1176 
1177   // Any leftover users of the continuation must be from unconditional branches
1178   // in a branch region. There can only be at most one per branch region as
1179   // all branch regions have been made single-entry single-exit above.
1180   // Replace them with the region terminator.
1181   for (Operation *user : llvm::make_early_inc_range(continuation->getUsers())) {
1182     assert(user->getNumSuccessors() == 1);
1183     auto builder = OpBuilder::atBlockTerminator(user->getBlock());
1184     LogicalResult result = interface.createStructuredBranchRegionTerminatorOp(
1185         user->getLoc(), builder, structuredCondOp, user,
1186         getMutableSuccessorOperands(user->getBlock(), 0).getAsOperandRange());
1187     if (failed(result))
1188       return failure();
1189     user->erase();
1190   }
1191 
1192   for (auto &&[oldValue, newValue] :
1193        llvm::zip(continuation->getArguments(), structuredCondOp->getResults()))
1194     oldValue.replaceAllUsesWith(newValue);
1195 
1196   // Splice together the continuations operations with the region entry.
1197   regionEntry->getOperations().splice(regionEntry->end(),
1198                                       continuation->getOperations());
1199 
1200   continuation->erase();
1201 
1202   // After splicing the continuation, the region has to be reprocessed as it has
1203   // new successors.
1204   newSubRegions.push_back(regionEntry);
1205 
1206   return newSubRegions;
1207 }
1208 
1209 /// Transforms the region to only have a single block for every kind of
1210 /// return-like operation that all previous occurrences of the return-like op
1211 /// branch to. If the region only contains a single kind of return-like
1212 /// operation, it creates a single-entry and single-exit region.
createSingleExitBlocksForReturnLike(Region & region,function_ref<Value (unsigned)> getSwitchValue,CFGToSCFInterface & interface)1213 static ReturnLikeExitCombiner createSingleExitBlocksForReturnLike(
1214     Region &region, function_ref<Value(unsigned)> getSwitchValue,
1215     CFGToSCFInterface &interface) {
1216   ReturnLikeExitCombiner exitCombiner(region, interface);
1217 
1218   for (Block &block : region.getBlocks()) {
1219     if (block.getNumSuccessors() != 0)
1220       continue;
1221     exitCombiner.combineExit(block.getTerminator(), getSwitchValue);
1222   }
1223 
1224   return exitCombiner;
1225 }
1226 
1227 /// Checks all preconditions of the transformation prior to any transformations.
1228 /// Returns failure if any precondition is violated.
checkTransformationPreconditions(Region & region)1229 static LogicalResult checkTransformationPreconditions(Region &region) {
1230   for (Block &block : region.getBlocks())
1231     if (block.hasNoPredecessors() && !block.isEntryBlock())
1232       return block.front().emitOpError(
1233           "transformation does not support unreachable blocks");
1234 
1235   WalkResult result = region.walk([](Operation *operation) {
1236     if (operation->getNumSuccessors() == 0)
1237       return WalkResult::advance();
1238 
1239     // This transformation requires all ops with successors to implement the
1240     // branch op interface. It is impossible to adjust their block arguments
1241     // otherwise.
1242     auto branchOpInterface = dyn_cast<BranchOpInterface>(operation);
1243     if (!branchOpInterface) {
1244       operation->emitOpError("transformation does not support terminators with "
1245                              "successors not implementing BranchOpInterface");
1246       return WalkResult::interrupt();
1247     }
1248     // Branch operations must have no side effects. Replacing them would not be
1249     // valid otherwise.
1250     if (!isMemoryEffectFree(branchOpInterface)) {
1251       branchOpInterface->emitOpError(
1252           "transformation does not support terminators with side effects");
1253       return WalkResult::interrupt();
1254     }
1255 
1256     for (unsigned index : llvm::seq(operation->getNumSuccessors())) {
1257       SuccessorOperands succOps = branchOpInterface.getSuccessorOperands(index);
1258 
1259       // We cannot support operations with operation-produced successor operands
1260       // as it is currently not possible to pass them to any block arguments
1261       // other than the first. This breaks creating multiplexer blocks and would
1262       // likely need special handling elsewhere too.
1263       if (succOps.getProducedOperandCount() == 0)
1264         continue;
1265 
1266       branchOpInterface->emitOpError("transformation does not support "
1267                                      "operations with operation-produced "
1268                                      "successor operands");
1269       return WalkResult::interrupt();
1270     }
1271     return WalkResult::advance();
1272   });
1273   return failure(result.wasInterrupted());
1274 }
1275 
transformCFGToSCF(Region & region,CFGToSCFInterface & interface,DominanceInfo & dominanceInfo)1276 FailureOr<bool> mlir::transformCFGToSCF(Region &region,
1277                                         CFGToSCFInterface &interface,
1278                                         DominanceInfo &dominanceInfo) {
1279   if (region.empty() || region.hasOneBlock())
1280     return false;
1281 
1282   if (failed(checkTransformationPreconditions(region)))
1283     return failure();
1284 
1285   DenseMap<Type, Value> typedUndefCache;
1286   auto getUndefValue = [&](Type type) {
1287     auto [iter, inserted] = typedUndefCache.insert({type, nullptr});
1288     if (!inserted)
1289       return iter->second;
1290 
1291     auto constantBuilder = OpBuilder::atBlockBegin(&region.front());
1292 
1293     iter->second =
1294         interface.getUndefValue(region.getLoc(), constantBuilder, type);
1295     return iter->second;
1296   };
1297 
1298   // The transformation only creates all values in the range of 0 to
1299   // max(#numSuccessors). Therefore using a vector instead of a map.
1300   SmallVector<Value> switchValueCache;
1301   auto getSwitchValue = [&](unsigned value) {
1302     if (value < switchValueCache.size())
1303       if (switchValueCache[value])
1304         return switchValueCache[value];
1305 
1306     auto constantBuilder = OpBuilder::atBlockBegin(&region.front());
1307 
1308     switchValueCache.resize(
1309         std::max<size_t>(switchValueCache.size(), value + 1));
1310 
1311     switchValueCache[value] =
1312         interface.getCFGSwitchValue(region.getLoc(), constantBuilder, value);
1313     return switchValueCache[value];
1314   };
1315 
1316   ReturnLikeExitCombiner exitCombiner =
1317       createSingleExitBlocksForReturnLike(region, getSwitchValue, interface);
1318 
1319   // Invalidate any dominance tree on the region as the exit combiner has
1320   // added new blocks and edges.
1321   dominanceInfo.invalidate(&region);
1322 
1323   SmallVector<Block *> workList = {&region.front()};
1324   while (!workList.empty()) {
1325     Block *current = workList.pop_back_val();
1326 
1327     // Turn all top-level cycles in the CFG to structured control flow first.
1328     // After this transformation, the remaining CFG ops form a DAG.
1329     FailureOr<SmallVector<Block *>> newRegions =
1330         transformCyclesToSCFLoops(current, getSwitchValue, getUndefValue,
1331                                   interface, dominanceInfo, exitCombiner);
1332     if (failed(newRegions))
1333       return failure();
1334 
1335     // Add the newly created subregions to the worklist. These are the
1336     // bodies of the loops.
1337     llvm::append_range(workList, *newRegions);
1338     // Invalidate the dominance tree as blocks have been moved, created and
1339     // added during the cycle to structured loop transformation.
1340     if (!newRegions->empty())
1341       dominanceInfo.invalidate(current->getParent());
1342 
1343     newRegions = transformToStructuredCFBranches(
1344         current, getSwitchValue, getUndefValue, interface, dominanceInfo);
1345     if (failed(newRegions))
1346       return failure();
1347     // Invalidating the dominance tree is generally not required by the
1348     // transformation above as the new region entries correspond to unaffected
1349     // subtrees in the dominator tree. Only its parent nodes have changed but
1350     // won't be visited again.
1351     llvm::append_range(workList, *newRegions);
1352   }
1353 
1354   return true;
1355 }
1356