xref: /llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp (revision b9cb9b3f0d1e891b385eb53f8414b29554fd9234)
1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file GVNSink.cpp
10 /// This pass attempts to sink instructions into successors, reducing static
11 /// instruction count and enabling if-conversion.
12 ///
13 /// We use a variant of global value numbering to decide what can be sunk.
14 /// Consider:
15 ///
16 /// [ %a1 = add i32 %b, 1  ]   [ %c1 = add i32 %d, 1  ]
17 /// [ %a2 = xor i32 %a1, 1 ]   [ %c2 = xor i32 %c1, 1 ]
18 ///                  \           /
19 ///            [ %e = phi i32 %a2, %c2 ]
20 ///            [ add i32 %e, 4         ]
21 ///
22 ///
23 /// GVN would number %a1 and %c1 differently because they compute different
24 /// results - the VN of an instruction is a function of its opcode and the
25 /// transitive closure of its operands. This is the key property for hoisting
26 /// and CSE.
27 ///
28 /// What we want when sinking however is for a numbering that is a function of
29 /// the *uses* of an instruction, which allows us to answer the question "if I
30 /// replace %a1 with %c1, will it contribute in an equivalent way to all
31 /// successive instructions?". The PostValueTable class in GVN provides this
32 /// mapping.
33 //
34 //===----------------------------------------------------------------------===//
35 
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/DenseSet.h"
39 #include "llvm/ADT/Hashing.h"
40 #include "llvm/ADT/PostOrderIterator.h"
41 #include "llvm/ADT/STLExtras.h"
42 #include "llvm/ADT/SmallPtrSet.h"
43 #include "llvm/ADT/SmallVector.h"
44 #include "llvm/ADT/Statistic.h"
45 #include "llvm/Analysis/GlobalsModRef.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/CFG.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/PassManager.h"
54 #include "llvm/IR/Type.h"
55 #include "llvm/IR/Use.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/Support/Allocator.h"
58 #include "llvm/Support/ArrayRecycler.h"
59 #include "llvm/Support/AtomicOrdering.h"
60 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/Compiler.h"
62 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/raw_ostream.h"
64 #include "llvm/Transforms/Scalar/GVN.h"
65 #include "llvm/Transforms/Scalar/GVNExpression.h"
66 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
67 #include "llvm/Transforms/Utils/Local.h"
68 #include <algorithm>
69 #include <cassert>
70 #include <cstddef>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 #define DEBUG_TYPE "gvn-sink"
78 
79 STATISTIC(NumRemoved, "Number of instructions removed");
80 
81 namespace llvm {
82 namespace GVNExpression {
83 
84 LLVM_DUMP_METHOD void Expression::dump() const {
85   print(dbgs());
86   dbgs() << "\n";
87 }
88 
89 } // end namespace GVNExpression
90 } // end namespace llvm
91 
92 namespace {
93 
94 static bool isMemoryInst(const Instruction *I) {
95   return isa<LoadInst>(I) || isa<StoreInst>(I) ||
96          (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
97          (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
98 }
99 
100 /// Iterates through instructions in a set of blocks in reverse order from the
101 /// first non-terminator. For example (assume all blocks have size n):
102 ///   LockstepReverseIterator I([B1, B2, B3]);
103 ///   *I-- = [B1[n], B2[n], B3[n]];
104 ///   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
105 ///   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
106 ///   ...
107 ///
108 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
109 /// to
110 /// determine which blocks are still going and the order they appear in the
111 /// list returned by operator*.
112 class LockstepReverseIterator {
113   ArrayRef<BasicBlock *> Blocks;
114   SmallSetVector<BasicBlock *, 4> ActiveBlocks;
115   SmallVector<Instruction *, 4> Insts;
116   bool Fail;
117 
118 public:
119   LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
120     reset();
121   }
122 
123   void reset() {
124     Fail = false;
125     ActiveBlocks.clear();
126     for (BasicBlock *BB : Blocks)
127       ActiveBlocks.insert(BB);
128     Insts.clear();
129     for (BasicBlock *BB : Blocks) {
130       if (BB->size() <= 1) {
131         // Block wasn't big enough - only contained a terminator.
132         ActiveBlocks.remove(BB);
133         continue;
134       }
135       Insts.push_back(BB->getTerminator()->getPrevNonDebugInstruction());
136     }
137     if (Insts.empty())
138       Fail = true;
139   }
140 
141   bool isValid() const { return !Fail; }
142   ArrayRef<Instruction *> operator*() const { return Insts; }
143 
144   // Note: This needs to return a SmallSetVector as the elements of
145   // ActiveBlocks will be later copied to Blocks using std::copy. The
146   // resultant order of elements in Blocks needs to be deterministic.
147   // Using SmallPtrSet instead causes non-deterministic order while
148   // copying. And we cannot simply sort Blocks as they need to match the
149   // corresponding Values.
150   SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
151 
152   void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
153     for (auto II = Insts.begin(); II != Insts.end();) {
154       if (!Blocks.contains((*II)->getParent())) {
155         ActiveBlocks.remove((*II)->getParent());
156         II = Insts.erase(II);
157       } else {
158         ++II;
159       }
160     }
161   }
162 
163   void operator--() {
164     if (Fail)
165       return;
166     SmallVector<Instruction *, 4> NewInsts;
167     for (auto *Inst : Insts) {
168       if (Inst == &Inst->getParent()->front())
169         ActiveBlocks.remove(Inst->getParent());
170       else
171         NewInsts.push_back(Inst->getPrevNonDebugInstruction());
172     }
173     if (NewInsts.empty()) {
174       Fail = true;
175       return;
176     }
177     Insts = NewInsts;
178   }
179 };
180 
181 //===----------------------------------------------------------------------===//
182 
183 /// Candidate solution for sinking. There may be different ways to
184 /// sink instructions, differing in the number of instructions sunk,
185 /// the number of predecessors sunk from and the number of PHIs
186 /// required.
187 struct SinkingInstructionCandidate {
188   unsigned NumBlocks;
189   unsigned NumInstructions;
190   unsigned NumPHIs;
191   unsigned NumMemoryInsts;
192   int Cost = -1;
193   SmallVector<BasicBlock *, 4> Blocks;
194 
195   void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
196     unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
197     unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
198     Cost = (NumInstructions * (NumBlocks - 1)) -
199            (NumExtraPHIs *
200             NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
201            - SplitEdgeCost;
202   }
203 
204   bool operator>(const SinkingInstructionCandidate &Other) const {
205     return Cost > Other.Cost;
206   }
207 };
208 
209 #ifndef NDEBUG
210 raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
211   OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
212      << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
213   return OS;
214 }
215 #endif
216 
217 //===----------------------------------------------------------------------===//
218 
219 /// Describes a PHI node that may or may not exist. These track the PHIs
220 /// that must be created if we sunk a sequence of instructions. It provides
221 /// a hash function for efficient equality comparisons.
222 class ModelledPHI {
223   SmallVector<Value *, 4> Values;
224   SmallVector<BasicBlock *, 4> Blocks;
225 
226 public:
227   ModelledPHI() = default;
228 
229   ModelledPHI(const PHINode *PN,
230               const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
231     // BasicBlock comes first so we sort by basic block pointer order,
232     // then by value pointer order. No need to call `verifyModelledPHI`
233     // As the Values and Blocks are populated in a deterministic order.
234     using OpsType = std::pair<BasicBlock *, Value *>;
235     SmallVector<OpsType, 4> Ops;
236     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
237       Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
238 
239     auto ComesBefore = [BlockOrder](OpsType O1, OpsType O2) {
240       return BlockOrder.lookup(O1.first) < BlockOrder.lookup(O2.first);
241     };
242     // Sort in a deterministic order.
243     llvm::sort(Ops, ComesBefore);
244 
245     for (auto &P : Ops) {
246       Blocks.push_back(P.first);
247       Values.push_back(P.second);
248     }
249   }
250 
251   /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
252   /// without the same ID.
253   /// \note This is specifically for DenseMapInfo - do not use this!
254   static ModelledPHI createDummy(size_t ID) {
255     ModelledPHI M;
256     M.Values.push_back(reinterpret_cast<Value*>(ID));
257     return M;
258   }
259 
260   void
261   verifyModelledPHI(const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
262     assert(Values.size() > 1 && Blocks.size() > 1 &&
263            "Modelling PHI with less than 2 values");
264     auto ComesBefore = [BlockOrder](const BasicBlock *BB1,
265                                     const BasicBlock *BB2) {
266       return BlockOrder.lookup(BB1) < BlockOrder.lookup(BB2);
267     };
268     assert(llvm::is_sorted(Blocks, ComesBefore));
269     int C = 0;
270     for (const Value *V : Values) {
271       if (!isa<UndefValue>(V)) {
272         assert(cast<Instruction>(V)->getParent() == Blocks[C]);
273         (void)C;
274       }
275       C++;
276     }
277   }
278   /// Create a PHI from an array of incoming values and incoming blocks.
279   ModelledPHI(SmallVectorImpl<Instruction *> &V,
280               SmallSetVector<BasicBlock *, 4> &B,
281               const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
282     // The order of Values and Blocks are already ordered by the caller.
283     llvm::copy(V, std::back_inserter(Values));
284     llvm::copy(B, std::back_inserter(Blocks));
285     verifyModelledPHI(BlockOrder);
286   }
287 
288   /// Create a PHI from [I[OpNum] for I in Insts].
289   /// TODO: Figure out a way to verifyModelledPHI in this constructor.
290   ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum,
291               SmallSetVector<BasicBlock *, 4> &B) {
292     llvm::copy(B, std::back_inserter(Blocks));
293     for (auto *I : Insts)
294       Values.push_back(I->getOperand(OpNum));
295   }
296 
297   /// Restrict the PHI's contents down to only \c NewBlocks.
298   /// \c NewBlocks must be a subset of \c this->Blocks.
299   void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
300     auto BI = Blocks.begin();
301     auto VI = Values.begin();
302     while (BI != Blocks.end()) {
303       assert(VI != Values.end());
304       if (!NewBlocks.contains(*BI)) {
305         BI = Blocks.erase(BI);
306         VI = Values.erase(VI);
307       } else {
308         ++BI;
309         ++VI;
310       }
311     }
312     assert(Blocks.size() == NewBlocks.size());
313   }
314 
315   ArrayRef<Value *> getValues() const { return Values; }
316 
317   bool areAllIncomingValuesSame() const {
318     return llvm::all_equal(Values);
319   }
320 
321   bool areAllIncomingValuesSameType() const {
322     return llvm::all_of(
323         Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
324   }
325 
326   bool areAnyIncomingValuesConstant() const {
327     return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
328   }
329 
330   // Hash functor
331   unsigned hash() const {
332     // Is deterministic because Values are saved in a specific order.
333     return (unsigned)hash_combine_range(Values.begin(), Values.end());
334   }
335 
336   bool operator==(const ModelledPHI &Other) const {
337     return Values == Other.Values && Blocks == Other.Blocks;
338   }
339 };
340 
341 template <typename ModelledPHI> struct DenseMapInfo {
342   static inline ModelledPHI &getEmptyKey() {
343     static ModelledPHI Dummy = ModelledPHI::createDummy(0);
344     return Dummy;
345   }
346 
347   static inline ModelledPHI &getTombstoneKey() {
348     static ModelledPHI Dummy = ModelledPHI::createDummy(1);
349     return Dummy;
350   }
351 
352   static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
353 
354   static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
355     return LHS == RHS;
356   }
357 };
358 
359 using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
360 
361 //===----------------------------------------------------------------------===//
362 //                             ValueTable
363 //===----------------------------------------------------------------------===//
364 // This is a value number table where the value number is a function of the
365 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
366 // that the program would be equivalent if we replaced A with PHI(A, B).
367 //===----------------------------------------------------------------------===//
368 
369 /// A GVN expression describing how an instruction is used. The operands
370 /// field of BasicExpression is used to store uses, not operands.
371 ///
372 /// This class also contains fields for discriminators used when determining
373 /// equivalence of instructions with sideeffects.
374 class InstructionUseExpr : public GVNExpression::BasicExpression {
375   unsigned MemoryUseOrder = -1;
376   bool Volatile = false;
377   ArrayRef<int> ShuffleMask;
378 
379 public:
380   InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
381                      BumpPtrAllocator &A)
382       : GVNExpression::BasicExpression(I->getNumUses()) {
383     allocateOperands(R, A);
384     setOpcode(I->getOpcode());
385     setType(I->getType());
386 
387     if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
388       ShuffleMask = SVI->getShuffleMask().copy(A);
389 
390     for (auto &U : I->uses())
391       op_push_back(U.getUser());
392     llvm::sort(op_begin(), op_end());
393   }
394 
395   void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
396   void setVolatile(bool V) { Volatile = V; }
397 
398   hash_code getHashValue() const override {
399     return hash_combine(GVNExpression::BasicExpression::getHashValue(),
400                         MemoryUseOrder, Volatile, ShuffleMask);
401   }
402 
403   template <typename Function> hash_code getHashValue(Function MapFn) {
404     hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
405                                ShuffleMask);
406     for (auto *V : operands())
407       H = hash_combine(H, MapFn(V));
408     return H;
409   }
410 };
411 
412 using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
413 
414 class ValueTable {
415   DenseMap<Value *, uint32_t> ValueNumbering;
416   DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
417   DenseMap<size_t, uint32_t> HashNumbering;
418   BumpPtrAllocator Allocator;
419   ArrayRecycler<Value *> Recycler;
420   uint32_t nextValueNumber = 1;
421   BasicBlocksSet ReachableBBs;
422 
423   /// Create an expression for I based on its opcode and its uses. If I
424   /// touches or reads memory, the expression is also based upon its memory
425   /// order - see \c getMemoryUseOrder().
426   InstructionUseExpr *createExpr(Instruction *I) {
427     InstructionUseExpr *E =
428         new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
429     if (isMemoryInst(I))
430       E->setMemoryUseOrder(getMemoryUseOrder(I));
431 
432     if (CmpInst *C = dyn_cast<CmpInst>(I)) {
433       CmpInst::Predicate Predicate = C->getPredicate();
434       E->setOpcode((C->getOpcode() << 8) | Predicate);
435     }
436     return E;
437   }
438 
439   /// Helper to compute the value number for a memory instruction
440   /// (LoadInst/StoreInst), including checking the memory ordering and
441   /// volatility.
442   template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
443     if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
444       return nullptr;
445     InstructionUseExpr *E = createExpr(I);
446     E->setVolatile(I->isVolatile());
447     return E;
448   }
449 
450 public:
451   ValueTable() = default;
452 
453   /// Set basic blocks reachable from entry block.
454   void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
455     this->ReachableBBs = ReachableBBs;
456   }
457 
458   /// Returns the value number for the specified value, assigning
459   /// it a new number if it did not have one before.
460   uint32_t lookupOrAdd(Value *V) {
461     auto VI = ValueNumbering.find(V);
462     if (VI != ValueNumbering.end())
463       return VI->second;
464 
465     if (!isa<Instruction>(V)) {
466       ValueNumbering[V] = nextValueNumber;
467       return nextValueNumber++;
468     }
469 
470     Instruction *I = cast<Instruction>(V);
471     if (!ReachableBBs.contains(I->getParent()))
472       return ~0U;
473 
474     InstructionUseExpr *exp = nullptr;
475     switch (I->getOpcode()) {
476     case Instruction::Load:
477       exp = createMemoryExpr(cast<LoadInst>(I));
478       break;
479     case Instruction::Store:
480       exp = createMemoryExpr(cast<StoreInst>(I));
481       break;
482     case Instruction::Call:
483     case Instruction::Invoke:
484     case Instruction::FNeg:
485     case Instruction::Add:
486     case Instruction::FAdd:
487     case Instruction::Sub:
488     case Instruction::FSub:
489     case Instruction::Mul:
490     case Instruction::FMul:
491     case Instruction::UDiv:
492     case Instruction::SDiv:
493     case Instruction::FDiv:
494     case Instruction::URem:
495     case Instruction::SRem:
496     case Instruction::FRem:
497     case Instruction::Shl:
498     case Instruction::LShr:
499     case Instruction::AShr:
500     case Instruction::And:
501     case Instruction::Or:
502     case Instruction::Xor:
503     case Instruction::ICmp:
504     case Instruction::FCmp:
505     case Instruction::Trunc:
506     case Instruction::ZExt:
507     case Instruction::SExt:
508     case Instruction::FPToUI:
509     case Instruction::FPToSI:
510     case Instruction::UIToFP:
511     case Instruction::SIToFP:
512     case Instruction::FPTrunc:
513     case Instruction::FPExt:
514     case Instruction::PtrToInt:
515     case Instruction::IntToPtr:
516     case Instruction::BitCast:
517     case Instruction::AddrSpaceCast:
518     case Instruction::Select:
519     case Instruction::ExtractElement:
520     case Instruction::InsertElement:
521     case Instruction::ShuffleVector:
522     case Instruction::InsertValue:
523     case Instruction::GetElementPtr:
524       exp = createExpr(I);
525       break;
526     default:
527       break;
528     }
529 
530     if (!exp) {
531       ValueNumbering[V] = nextValueNumber;
532       return nextValueNumber++;
533     }
534 
535     uint32_t e = ExpressionNumbering[exp];
536     if (!e) {
537       hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
538       auto [I, Inserted] = HashNumbering.try_emplace(H, nextValueNumber);
539       e = I->second;
540       if (Inserted)
541         ExpressionNumbering[exp] = nextValueNumber++;
542     }
543     ValueNumbering[V] = e;
544     return e;
545   }
546 
547   /// Returns the value number of the specified value. Fails if the value has
548   /// not yet been numbered.
549   uint32_t lookup(Value *V) const {
550     auto VI = ValueNumbering.find(V);
551     assert(VI != ValueNumbering.end() && "Value not numbered?");
552     return VI->second;
553   }
554 
555   /// Removes all value numberings and resets the value table.
556   void clear() {
557     ValueNumbering.clear();
558     ExpressionNumbering.clear();
559     HashNumbering.clear();
560     Recycler.clear(Allocator);
561     nextValueNumber = 1;
562   }
563 
564   /// \c Inst uses or touches memory. Return an ID describing the memory state
565   /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
566   /// the exact same memory operations happen after I1 and I2.
567   ///
568   /// This is a very hard problem in general, so we use domain-specific
569   /// knowledge that we only ever check for equivalence between blocks sharing a
570   /// single immediate successor that is common, and when determining if I1 ==
571   /// I2 we will have already determined that next(I1) == next(I2). This
572   /// inductive property allows us to simply return the value number of the next
573   /// instruction that defines memory.
574   uint32_t getMemoryUseOrder(Instruction *Inst) {
575     auto *BB = Inst->getParent();
576     for (auto I = std::next(Inst->getIterator()), E = BB->end();
577          I != E && !I->isTerminator(); ++I) {
578       if (!isMemoryInst(&*I))
579         continue;
580       if (isa<LoadInst>(&*I))
581         continue;
582       CallInst *CI = dyn_cast<CallInst>(&*I);
583       if (CI && CI->onlyReadsMemory())
584         continue;
585       InvokeInst *II = dyn_cast<InvokeInst>(&*I);
586       if (II && II->onlyReadsMemory())
587         continue;
588       return lookupOrAdd(&*I);
589     }
590     return 0;
591   }
592 };
593 
594 //===----------------------------------------------------------------------===//
595 
596 class GVNSink {
597 public:
598   GVNSink() {}
599 
600   bool run(Function &F) {
601     LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
602                       << "\n");
603 
604     unsigned NumSunk = 0;
605     ReversePostOrderTraversal<Function*> RPOT(&F);
606     VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
607     // Populate reverse post-order to order basic blocks in deterministic
608     // order. Any arbitrary ordering will work in this case as long as they are
609     // deterministic. The node ordering of newly created basic blocks
610     // are irrelevant because RPOT(for computing sinkable candidates) is also
611     // obtained ahead of time and only their order are relevant for this pass.
612     unsigned NodeOrdering = 0;
613     RPOTOrder[*RPOT.begin()] = ++NodeOrdering;
614     for (auto *BB : RPOT)
615       if (!pred_empty(BB))
616         RPOTOrder[BB] = ++NodeOrdering;
617     for (auto *N : RPOT)
618       NumSunk += sinkBB(N);
619 
620     return NumSunk > 0;
621   }
622 
623 private:
624   ValueTable VN;
625   DenseMap<const BasicBlock *, unsigned> RPOTOrder;
626 
627   bool shouldAvoidSinkingInstruction(Instruction *I) {
628     // These instructions may change or break semantics if moved.
629     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
630         I->getType()->isTokenTy())
631       return true;
632     return false;
633   }
634 
635   /// The main heuristic function. Analyze the set of instructions pointed to by
636   /// LRI and return a candidate solution if these instructions can be sunk, or
637   /// std::nullopt otherwise.
638   std::optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
639       LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
640       ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
641 
642   /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
643   void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
644                           SmallPtrSetImpl<Value *> &PHIContents) {
645     for (PHINode &PN : BB->phis()) {
646       auto MPHI = ModelledPHI(&PN, RPOTOrder);
647       PHIs.insert(MPHI);
648       for (auto *V : MPHI.getValues())
649         PHIContents.insert(V);
650     }
651   }
652 
653   /// The main instruction sinking driver. Set up state and try and sink
654   /// instructions into BBEnd from its predecessors.
655   unsigned sinkBB(BasicBlock *BBEnd);
656 
657   /// Perform the actual mechanics of sinking an instruction from Blocks into
658   /// BBEnd, which is their only successor.
659   void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
660 
661   /// Remove PHIs that all have the same incoming value.
662   void foldPointlessPHINodes(BasicBlock *BB) {
663     auto I = BB->begin();
664     while (PHINode *PN = dyn_cast<PHINode>(I++)) {
665       if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
666             return V == PN->getIncomingValue(0);
667           }))
668         continue;
669       if (PN->getIncomingValue(0) != PN)
670         PN->replaceAllUsesWith(PN->getIncomingValue(0));
671       else
672         PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
673       PN->eraseFromParent();
674     }
675   }
676 };
677 
678 std::optional<SinkingInstructionCandidate>
679 GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,
680                                       unsigned &InstNum,
681                                       unsigned &MemoryInstNum,
682                                       ModelledPHISet &NeededPHIs,
683                                       SmallPtrSetImpl<Value *> &PHIContents) {
684   auto Insts = *LRI;
685   LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
686                                                                   : Insts) {
687     I->dump();
688   } dbgs() << " ]\n";);
689 
690   DenseMap<uint32_t, unsigned> VNums;
691   for (auto *I : Insts) {
692     uint32_t N = VN.lookupOrAdd(I);
693     LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
694     if (N == ~0U)
695       return std::nullopt;
696     VNums[N]++;
697   }
698   unsigned VNumToSink = llvm::max_element(VNums, llvm::less_second())->first;
699 
700   if (VNums[VNumToSink] == 1)
701     // Can't sink anything!
702     return std::nullopt;
703 
704   // Now restrict the number of incoming blocks down to only those with
705   // VNumToSink.
706   auto &ActivePreds = LRI.getActiveBlocks();
707   unsigned InitialActivePredSize = ActivePreds.size();
708   SmallVector<Instruction *, 4> NewInsts;
709   for (auto *I : Insts) {
710     if (VN.lookup(I) != VNumToSink)
711       ActivePreds.remove(I->getParent());
712     else
713       NewInsts.push_back(I);
714   }
715   for (auto *I : NewInsts)
716     if (shouldAvoidSinkingInstruction(I))
717       return std::nullopt;
718 
719   // If we've restricted the incoming blocks, restrict all needed PHIs also
720   // to that set.
721   bool RecomputePHIContents = false;
722   if (ActivePreds.size() != InitialActivePredSize) {
723     ModelledPHISet NewNeededPHIs;
724     for (auto P : NeededPHIs) {
725       P.restrictToBlocks(ActivePreds);
726       NewNeededPHIs.insert(P);
727     }
728     NeededPHIs = NewNeededPHIs;
729     LRI.restrictToBlocks(ActivePreds);
730     RecomputePHIContents = true;
731   }
732 
733   // The sunk instruction's results.
734   ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder);
735 
736   // Does sinking this instruction render previous PHIs redundant?
737   if (NeededPHIs.erase(NewPHI))
738     RecomputePHIContents = true;
739 
740   if (RecomputePHIContents) {
741     // The needed PHIs have changed, so recompute the set of all needed
742     // values.
743     PHIContents.clear();
744     for (auto &PHI : NeededPHIs)
745       PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
746   }
747 
748   // Is this instruction required by a later PHI that doesn't match this PHI?
749   // if so, we can't sink this instruction.
750   for (auto *V : NewPHI.getValues())
751     if (PHIContents.count(V))
752       // V exists in this PHI, but the whole PHI is different to NewPHI
753       // (else it would have been removed earlier). We cannot continue
754       // because this isn't representable.
755       return std::nullopt;
756 
757   // Which operands need PHIs?
758   // FIXME: If any of these fail, we should partition up the candidates to
759   // try and continue making progress.
760   Instruction *I0 = NewInsts[0];
761 
762   auto isNotSameOperation = [&I0](Instruction *I) {
763     return !I0->isSameOperationAs(I);
764   };
765 
766   if (any_of(NewInsts, isNotSameOperation))
767     return std::nullopt;
768 
769   for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
770     ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
771     if (PHI.areAllIncomingValuesSame())
772       continue;
773     if (!canReplaceOperandWithVariable(I0, OpNum))
774       // We can 't create a PHI from this instruction!
775       return std::nullopt;
776     if (NeededPHIs.count(PHI))
777       continue;
778     if (!PHI.areAllIncomingValuesSameType())
779       return std::nullopt;
780     // Don't create indirect calls! The called value is the final operand.
781     if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
782         PHI.areAnyIncomingValuesConstant())
783       return std::nullopt;
784 
785     NeededPHIs.reserve(NeededPHIs.size());
786     NeededPHIs.insert(PHI);
787     PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
788   }
789 
790   if (isMemoryInst(NewInsts[0]))
791     ++MemoryInstNum;
792 
793   SinkingInstructionCandidate Cand;
794   Cand.NumInstructions = ++InstNum;
795   Cand.NumMemoryInsts = MemoryInstNum;
796   Cand.NumBlocks = ActivePreds.size();
797   Cand.NumPHIs = NeededPHIs.size();
798   append_range(Cand.Blocks, ActivePreds);
799 
800   return Cand;
801 }
802 
803 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
804   LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
805              BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
806   SmallVector<BasicBlock *, 4> Preds;
807   for (auto *B : predecessors(BBEnd)) {
808     // Bailout on basic blocks without predecessor(PR42346).
809     if (!RPOTOrder.count(B))
810       return 0;
811     auto *T = B->getTerminator();
812     if (isa<BranchInst>(T) || isa<SwitchInst>(T))
813       Preds.push_back(B);
814     else
815       return 0;
816   }
817   if (Preds.size() < 2)
818     return 0;
819   auto ComesBefore = [this](const BasicBlock *BB1, const BasicBlock *BB2) {
820     return RPOTOrder.lookup(BB1) < RPOTOrder.lookup(BB2);
821   };
822   // Sort in a deterministic order.
823   llvm::sort(Preds, ComesBefore);
824 
825   unsigned NumOrigPreds = Preds.size();
826   // We can only sink instructions through unconditional branches.
827   llvm::erase_if(Preds, [](BasicBlock *BB) {
828     return BB->getTerminator()->getNumSuccessors() != 1;
829   });
830 
831   LockstepReverseIterator LRI(Preds);
832   SmallVector<SinkingInstructionCandidate, 4> Candidates;
833   unsigned InstNum = 0, MemoryInstNum = 0;
834   ModelledPHISet NeededPHIs;
835   SmallPtrSet<Value *, 4> PHIContents;
836   analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
837   unsigned NumOrigPHIs = NeededPHIs.size();
838 
839   while (LRI.isValid()) {
840     auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
841                                              NeededPHIs, PHIContents);
842     if (!Cand)
843       break;
844     Cand->calculateCost(NumOrigPHIs, Preds.size());
845     Candidates.emplace_back(*Cand);
846     --LRI;
847   }
848 
849   llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
850   LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
851                                                          : Candidates) dbgs()
852                                                     << "  " << C << "\n";);
853 
854   // Pick the top candidate, as long it is positive!
855   if (Candidates.empty() || Candidates.front().Cost <= 0)
856     return 0;
857   auto C = Candidates.front();
858 
859   LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
860   BasicBlock *InsertBB = BBEnd;
861   if (C.Blocks.size() < NumOrigPreds) {
862     LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
863                BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
864     InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
865     if (!InsertBB) {
866       LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
867       // Edge couldn't be split.
868       return 0;
869     }
870   }
871 
872   for (unsigned I = 0; I < C.NumInstructions; ++I)
873     sinkLastInstruction(C.Blocks, InsertBB);
874 
875   return C.NumInstructions;
876 }
877 
878 void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
879                                   BasicBlock *BBEnd) {
880   SmallVector<Instruction *, 4> Insts;
881   for (BasicBlock *BB : Blocks)
882     Insts.push_back(BB->getTerminator()->getPrevNonDebugInstruction());
883   Instruction *I0 = Insts.front();
884 
885   SmallVector<Value *, 4> NewOperands;
886   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
887     bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
888       return I->getOperand(O) != I0->getOperand(O);
889     });
890     if (!NeedPHI) {
891       NewOperands.push_back(I0->getOperand(O));
892       continue;
893     }
894 
895     // Create a new PHI in the successor block and populate it.
896     auto *Op = I0->getOperand(O);
897     assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
898     auto *PN =
899         PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
900     PN->insertBefore(BBEnd->begin());
901     for (auto *I : Insts)
902       PN->addIncoming(I->getOperand(O), I->getParent());
903     NewOperands.push_back(PN);
904   }
905 
906   // Arbitrarily use I0 as the new "common" instruction; remap its operands
907   // and move it to the start of the successor block.
908   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
909     I0->getOperandUse(O).set(NewOperands[O]);
910   I0->moveBefore(&*BBEnd->getFirstInsertionPt());
911 
912   // Update metadata and IR flags.
913   for (auto *I : Insts)
914     if (I != I0) {
915       combineMetadataForCSE(I0, I, true);
916       I0->andIRFlags(I);
917     }
918 
919   for (auto *I : Insts)
920     if (I != I0) {
921       I->replaceAllUsesWith(I0);
922       I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
923     }
924   foldPointlessPHINodes(BBEnd);
925 
926   // Finally nuke all instructions apart from the common instruction.
927   for (auto *I : Insts)
928     if (I != I0)
929       I->eraseFromParent();
930 
931   NumRemoved += Insts.size() - 1;
932 }
933 
934 } // end anonymous namespace
935 
936 PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
937   GVNSink G;
938   if (!G.run(F))
939     return PreservedAnalyses::all();
940 
941   return PreservedAnalyses::none();
942 }
943