xref: /llvm-project/llvm/lib/Transforms/Scalar/GVNSink.cpp (revision ba4764c2cc14b0b495af539a913de10cf8268420)
1 //===- GVNSink.cpp - sink expressions into successors ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file GVNSink.cpp
10 /// This pass attempts to sink instructions into successors, reducing static
11 /// instruction count and enabling if-conversion.
12 ///
13 /// We use a variant of global value numbering to decide what can be sunk.
14 /// Consider:
15 ///
16 /// [ %a1 = add i32 %b, 1  ]   [ %c1 = add i32 %d, 1  ]
17 /// [ %a2 = xor i32 %a1, 1 ]   [ %c2 = xor i32 %c1, 1 ]
18 ///                  \           /
19 ///            [ %e = phi i32 %a2, %c2 ]
20 ///            [ add i32 %e, 4         ]
21 ///
22 ///
23 /// GVN would number %a1 and %c1 differently because they compute different
24 /// results - the VN of an instruction is a function of its opcode and the
25 /// transitive closure of its operands. This is the key property for hoisting
26 /// and CSE.
27 ///
28 /// What we want when sinking however is for a numbering that is a function of
29 /// the *uses* of an instruction, which allows us to answer the question "if I
30 /// replace %a1 with %c1, will it contribute in an equivalent way to all
31 /// successive instructions?". The PostValueTable class in GVN provides this
32 /// mapping.
33 //
34 //===----------------------------------------------------------------------===//
35 
36 #include "llvm/ADT/ArrayRef.h"
37 #include "llvm/ADT/DenseMap.h"
38 #include "llvm/ADT/DenseMapInfo.h"
39 #include "llvm/ADT/DenseSet.h"
40 #include "llvm/ADT/Hashing.h"
41 #include "llvm/ADT/None.h"
42 #include "llvm/ADT/Optional.h"
43 #include "llvm/ADT/PostOrderIterator.h"
44 #include "llvm/ADT/STLExtras.h"
45 #include "llvm/ADT/SmallPtrSet.h"
46 #include "llvm/ADT/SmallVector.h"
47 #include "llvm/ADT/Statistic.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Analysis/GlobalsModRef.h"
50 #include "llvm/IR/BasicBlock.h"
51 #include "llvm/IR/CFG.h"
52 #include "llvm/IR/Constants.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/InstrTypes.h"
55 #include "llvm/IR/Instruction.h"
56 #include "llvm/IR/Instructions.h"
57 #include "llvm/IR/PassManager.h"
58 #include "llvm/IR/Type.h"
59 #include "llvm/IR/Use.h"
60 #include "llvm/IR/Value.h"
61 #include "llvm/InitializePasses.h"
62 #include "llvm/Pass.h"
63 #include "llvm/Support/Allocator.h"
64 #include "llvm/Support/ArrayRecycler.h"
65 #include "llvm/Support/AtomicOrdering.h"
66 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/Compiler.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include "llvm/Transforms/Scalar.h"
71 #include "llvm/Transforms/Scalar/GVN.h"
72 #include "llvm/Transforms/Scalar/GVNExpression.h"
73 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
74 #include "llvm/Transforms/Utils/Local.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstddef>
78 #include <cstdint>
79 #include <iterator>
80 #include <utility>
81 
82 using namespace llvm;
83 
84 #define DEBUG_TYPE "gvn-sink"
85 
86 STATISTIC(NumRemoved, "Number of instructions removed");
87 
88 namespace llvm {
89 namespace GVNExpression {
90 
91 LLVM_DUMP_METHOD void Expression::dump() const {
92   print(dbgs());
93   dbgs() << "\n";
94 }
95 
96 } // end namespace GVNExpression
97 } // end namespace llvm
98 
99 namespace {
100 
101 static bool isMemoryInst(const Instruction *I) {
102   return isa<LoadInst>(I) || isa<StoreInst>(I) ||
103          (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
104          (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
105 }
106 
107 /// Iterates through instructions in a set of blocks in reverse order from the
108 /// first non-terminator. For example (assume all blocks have size n):
109 ///   LockstepReverseIterator I([B1, B2, B3]);
110 ///   *I-- = [B1[n], B2[n], B3[n]];
111 ///   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
112 ///   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
113 ///   ...
114 ///
115 /// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
116 /// to
117 /// determine which blocks are still going and the order they appear in the
118 /// list returned by operator*.
119 class LockstepReverseIterator {
120   ArrayRef<BasicBlock *> Blocks;
121   SmallSetVector<BasicBlock *, 4> ActiveBlocks;
122   SmallVector<Instruction *, 4> Insts;
123   bool Fail;
124 
125 public:
126   LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
127     reset();
128   }
129 
130   void reset() {
131     Fail = false;
132     ActiveBlocks.clear();
133     for (BasicBlock *BB : Blocks)
134       ActiveBlocks.insert(BB);
135     Insts.clear();
136     for (BasicBlock *BB : Blocks) {
137       if (BB->size() <= 1) {
138         // Block wasn't big enough - only contained a terminator.
139         ActiveBlocks.remove(BB);
140         continue;
141       }
142       Insts.push_back(BB->getTerminator()->getPrevNode());
143     }
144     if (Insts.empty())
145       Fail = true;
146   }
147 
148   bool isValid() const { return !Fail; }
149   ArrayRef<Instruction *> operator*() const { return Insts; }
150 
151   // Note: This needs to return a SmallSetVector as the elements of
152   // ActiveBlocks will be later copied to Blocks using std::copy. The
153   // resultant order of elements in Blocks needs to be deterministic.
154   // Using SmallPtrSet instead causes non-deterministic order while
155   // copying. And we cannot simply sort Blocks as they need to match the
156   // corresponding Values.
157   SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
158 
159   void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
160     for (auto II = Insts.begin(); II != Insts.end();) {
161       if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
162           Blocks.end()) {
163         ActiveBlocks.remove((*II)->getParent());
164         II = Insts.erase(II);
165       } else {
166         ++II;
167       }
168     }
169   }
170 
171   void operator--() {
172     if (Fail)
173       return;
174     SmallVector<Instruction *, 4> NewInsts;
175     for (auto *Inst : Insts) {
176       if (Inst == &Inst->getParent()->front())
177         ActiveBlocks.remove(Inst->getParent());
178       else
179         NewInsts.push_back(Inst->getPrevNode());
180     }
181     if (NewInsts.empty()) {
182       Fail = true;
183       return;
184     }
185     Insts = NewInsts;
186   }
187 };
188 
189 //===----------------------------------------------------------------------===//
190 
191 /// Candidate solution for sinking. There may be different ways to
192 /// sink instructions, differing in the number of instructions sunk,
193 /// the number of predecessors sunk from and the number of PHIs
194 /// required.
195 struct SinkingInstructionCandidate {
196   unsigned NumBlocks;
197   unsigned NumInstructions;
198   unsigned NumPHIs;
199   unsigned NumMemoryInsts;
200   int Cost = -1;
201   SmallVector<BasicBlock *, 4> Blocks;
202 
203   void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
204     unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
205     unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
206     Cost = (NumInstructions * (NumBlocks - 1)) -
207            (NumExtraPHIs *
208             NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
209            - SplitEdgeCost;
210   }
211 
212   bool operator>(const SinkingInstructionCandidate &Other) const {
213     return Cost > Other.Cost;
214   }
215 };
216 
217 #ifndef NDEBUG
218 raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
219   OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
220      << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
221   return OS;
222 }
223 #endif
224 
225 //===----------------------------------------------------------------------===//
226 
227 /// Describes a PHI node that may or may not exist. These track the PHIs
228 /// that must be created if we sunk a sequence of instructions. It provides
229 /// a hash function for efficient equality comparisons.
230 class ModelledPHI {
231   SmallVector<Value *, 4> Values;
232   SmallVector<BasicBlock *, 4> Blocks;
233 
234 public:
235   ModelledPHI() = default;
236 
237   ModelledPHI(const PHINode *PN) {
238     // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
239     SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
240     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
241       Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
242     llvm::sort(Ops);
243     for (auto &P : Ops) {
244       Blocks.push_back(P.first);
245       Values.push_back(P.second);
246     }
247   }
248 
249   /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
250   /// without the same ID.
251   /// \note This is specifically for DenseMapInfo - do not use this!
252   static ModelledPHI createDummy(size_t ID) {
253     ModelledPHI M;
254     M.Values.push_back(reinterpret_cast<Value*>(ID));
255     return M;
256   }
257 
258   /// Create a PHI from an array of incoming values and incoming blocks.
259   template <typename VArray, typename BArray>
260   ModelledPHI(const VArray &V, const BArray &B) {
261     llvm::copy(V, std::back_inserter(Values));
262     llvm::copy(B, std::back_inserter(Blocks));
263   }
264 
265   /// Create a PHI from [I[OpNum] for I in Insts].
266   template <typename BArray>
267   ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
268     llvm::copy(B, std::back_inserter(Blocks));
269     for (auto *I : Insts)
270       Values.push_back(I->getOperand(OpNum));
271   }
272 
273   /// Restrict the PHI's contents down to only \c NewBlocks.
274   /// \c NewBlocks must be a subset of \c this->Blocks.
275   void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
276     auto BI = Blocks.begin();
277     auto VI = Values.begin();
278     while (BI != Blocks.end()) {
279       assert(VI != Values.end());
280       if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
281           NewBlocks.end()) {
282         BI = Blocks.erase(BI);
283         VI = Values.erase(VI);
284       } else {
285         ++BI;
286         ++VI;
287       }
288     }
289     assert(Blocks.size() == NewBlocks.size());
290   }
291 
292   ArrayRef<Value *> getValues() const { return Values; }
293 
294   bool areAllIncomingValuesSame() const {
295     return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; });
296   }
297 
298   bool areAllIncomingValuesSameType() const {
299     return llvm::all_of(
300         Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
301   }
302 
303   bool areAnyIncomingValuesConstant() const {
304     return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
305   }
306 
307   // Hash functor
308   unsigned hash() const {
309       return (unsigned)hash_combine_range(Values.begin(), Values.end());
310   }
311 
312   bool operator==(const ModelledPHI &Other) const {
313     return Values == Other.Values && Blocks == Other.Blocks;
314   }
315 };
316 
317 template <typename ModelledPHI> struct DenseMapInfo {
318   static inline ModelledPHI &getEmptyKey() {
319     static ModelledPHI Dummy = ModelledPHI::createDummy(0);
320     return Dummy;
321   }
322 
323   static inline ModelledPHI &getTombstoneKey() {
324     static ModelledPHI Dummy = ModelledPHI::createDummy(1);
325     return Dummy;
326   }
327 
328   static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
329 
330   static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
331     return LHS == RHS;
332   }
333 };
334 
335 using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
336 
337 //===----------------------------------------------------------------------===//
338 //                             ValueTable
339 //===----------------------------------------------------------------------===//
340 // This is a value number table where the value number is a function of the
341 // *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
342 // that the program would be equivalent if we replaced A with PHI(A, B).
343 //===----------------------------------------------------------------------===//
344 
345 /// A GVN expression describing how an instruction is used. The operands
346 /// field of BasicExpression is used to store uses, not operands.
347 ///
348 /// This class also contains fields for discriminators used when determining
349 /// equivalence of instructions with sideeffects.
350 class InstructionUseExpr : public GVNExpression::BasicExpression {
351   unsigned MemoryUseOrder = -1;
352   bool Volatile = false;
353   ArrayRef<int> ShuffleMask;
354 
355 public:
356   InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
357                      BumpPtrAllocator &A)
358       : GVNExpression::BasicExpression(I->getNumUses()) {
359     allocateOperands(R, A);
360     setOpcode(I->getOpcode());
361     setType(I->getType());
362 
363     if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
364       ArrayRef<int> OrigMask = SVI->getShuffleMask();
365       int *Mask = A.Allocate<int>(OrigMask.size());
366       llvm::copy(OrigMask, Mask);
367       ShuffleMask = ArrayRef<int>(Mask, OrigMask.size());
368     }
369 
370     for (auto &U : I->uses())
371       op_push_back(U.getUser());
372     llvm::sort(op_begin(), op_end());
373   }
374 
375   void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
376   void setVolatile(bool V) { Volatile = V; }
377 
378   hash_code getHashValue() const override {
379     return hash_combine(GVNExpression::BasicExpression::getHashValue(),
380                         MemoryUseOrder, Volatile, ShuffleMask);
381   }
382 
383   template <typename Function> hash_code getHashValue(Function MapFn) {
384     hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
385                                ShuffleMask);
386     for (auto *V : operands())
387       H = hash_combine(H, MapFn(V));
388     return H;
389   }
390 };
391 
392 class ValueTable {
393   DenseMap<Value *, uint32_t> ValueNumbering;
394   DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
395   DenseMap<size_t, uint32_t> HashNumbering;
396   BumpPtrAllocator Allocator;
397   ArrayRecycler<Value *> Recycler;
398   uint32_t nextValueNumber = 1;
399 
400   /// Create an expression for I based on its opcode and its uses. If I
401   /// touches or reads memory, the expression is also based upon its memory
402   /// order - see \c getMemoryUseOrder().
403   InstructionUseExpr *createExpr(Instruction *I) {
404     InstructionUseExpr *E =
405         new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
406     if (isMemoryInst(I))
407       E->setMemoryUseOrder(getMemoryUseOrder(I));
408 
409     if (CmpInst *C = dyn_cast<CmpInst>(I)) {
410       CmpInst::Predicate Predicate = C->getPredicate();
411       E->setOpcode((C->getOpcode() << 8) | Predicate);
412     }
413     return E;
414   }
415 
416   /// Helper to compute the value number for a memory instruction
417   /// (LoadInst/StoreInst), including checking the memory ordering and
418   /// volatility.
419   template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
420     if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
421       return nullptr;
422     InstructionUseExpr *E = createExpr(I);
423     E->setVolatile(I->isVolatile());
424     return E;
425   }
426 
427 public:
428   ValueTable() = default;
429 
430   /// Returns the value number for the specified value, assigning
431   /// it a new number if it did not have one before.
432   uint32_t lookupOrAdd(Value *V) {
433     auto VI = ValueNumbering.find(V);
434     if (VI != ValueNumbering.end())
435       return VI->second;
436 
437     if (!isa<Instruction>(V)) {
438       ValueNumbering[V] = nextValueNumber;
439       return nextValueNumber++;
440     }
441 
442     Instruction *I = cast<Instruction>(V);
443     InstructionUseExpr *exp = nullptr;
444     switch (I->getOpcode()) {
445     case Instruction::Load:
446       exp = createMemoryExpr(cast<LoadInst>(I));
447       break;
448     case Instruction::Store:
449       exp = createMemoryExpr(cast<StoreInst>(I));
450       break;
451     case Instruction::Call:
452     case Instruction::Invoke:
453     case Instruction::FNeg:
454     case Instruction::Add:
455     case Instruction::FAdd:
456     case Instruction::Sub:
457     case Instruction::FSub:
458     case Instruction::Mul:
459     case Instruction::FMul:
460     case Instruction::UDiv:
461     case Instruction::SDiv:
462     case Instruction::FDiv:
463     case Instruction::URem:
464     case Instruction::SRem:
465     case Instruction::FRem:
466     case Instruction::Shl:
467     case Instruction::LShr:
468     case Instruction::AShr:
469     case Instruction::And:
470     case Instruction::Or:
471     case Instruction::Xor:
472     case Instruction::ICmp:
473     case Instruction::FCmp:
474     case Instruction::Trunc:
475     case Instruction::ZExt:
476     case Instruction::SExt:
477     case Instruction::FPToUI:
478     case Instruction::FPToSI:
479     case Instruction::UIToFP:
480     case Instruction::SIToFP:
481     case Instruction::FPTrunc:
482     case Instruction::FPExt:
483     case Instruction::PtrToInt:
484     case Instruction::IntToPtr:
485     case Instruction::BitCast:
486     case Instruction::AddrSpaceCast:
487     case Instruction::Select:
488     case Instruction::ExtractElement:
489     case Instruction::InsertElement:
490     case Instruction::ShuffleVector:
491     case Instruction::InsertValue:
492     case Instruction::GetElementPtr:
493       exp = createExpr(I);
494       break;
495     default:
496       break;
497     }
498 
499     if (!exp) {
500       ValueNumbering[V] = nextValueNumber;
501       return nextValueNumber++;
502     }
503 
504     uint32_t e = ExpressionNumbering[exp];
505     if (!e) {
506       hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
507       auto I = HashNumbering.find(H);
508       if (I != HashNumbering.end()) {
509         e = I->second;
510       } else {
511         e = nextValueNumber++;
512         HashNumbering[H] = e;
513         ExpressionNumbering[exp] = e;
514       }
515     }
516     ValueNumbering[V] = e;
517     return e;
518   }
519 
520   /// Returns the value number of the specified value. Fails if the value has
521   /// not yet been numbered.
522   uint32_t lookup(Value *V) const {
523     auto VI = ValueNumbering.find(V);
524     assert(VI != ValueNumbering.end() && "Value not numbered?");
525     return VI->second;
526   }
527 
528   /// Removes all value numberings and resets the value table.
529   void clear() {
530     ValueNumbering.clear();
531     ExpressionNumbering.clear();
532     HashNumbering.clear();
533     Recycler.clear(Allocator);
534     nextValueNumber = 1;
535   }
536 
537   /// \c Inst uses or touches memory. Return an ID describing the memory state
538   /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
539   /// the exact same memory operations happen after I1 and I2.
540   ///
541   /// This is a very hard problem in general, so we use domain-specific
542   /// knowledge that we only ever check for equivalence between blocks sharing a
543   /// single immediate successor that is common, and when determining if I1 ==
544   /// I2 we will have already determined that next(I1) == next(I2). This
545   /// inductive property allows us to simply return the value number of the next
546   /// instruction that defines memory.
547   uint32_t getMemoryUseOrder(Instruction *Inst) {
548     auto *BB = Inst->getParent();
549     for (auto I = std::next(Inst->getIterator()), E = BB->end();
550          I != E && !I->isTerminator(); ++I) {
551       if (!isMemoryInst(&*I))
552         continue;
553       if (isa<LoadInst>(&*I))
554         continue;
555       CallInst *CI = dyn_cast<CallInst>(&*I);
556       if (CI && CI->onlyReadsMemory())
557         continue;
558       InvokeInst *II = dyn_cast<InvokeInst>(&*I);
559       if (II && II->onlyReadsMemory())
560         continue;
561       return lookupOrAdd(&*I);
562     }
563     return 0;
564   }
565 };
566 
567 //===----------------------------------------------------------------------===//
568 
569 class GVNSink {
570 public:
571   GVNSink() = default;
572 
573   bool run(Function &F) {
574     LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
575                       << "\n");
576 
577     unsigned NumSunk = 0;
578     ReversePostOrderTraversal<Function*> RPOT(&F);
579     for (auto *N : RPOT)
580       NumSunk += sinkBB(N);
581 
582     return NumSunk > 0;
583   }
584 
585 private:
586   ValueTable VN;
587 
588   bool isInstructionBlacklisted(Instruction *I) {
589     // These instructions may change or break semantics if moved.
590     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
591         I->getType()->isTokenTy())
592       return true;
593     return false;
594   }
595 
596   /// The main heuristic function. Analyze the set of instructions pointed to by
597   /// LRI and return a candidate solution if these instructions can be sunk, or
598   /// None otherwise.
599   Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
600       LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
601       ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
602 
603   /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
604   void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
605                           SmallPtrSetImpl<Value *> &PHIContents) {
606     for (PHINode &PN : BB->phis()) {
607       auto MPHI = ModelledPHI(&PN);
608       PHIs.insert(MPHI);
609       for (auto *V : MPHI.getValues())
610         PHIContents.insert(V);
611     }
612   }
613 
614   /// The main instruction sinking driver. Set up state and try and sink
615   /// instructions into BBEnd from its predecessors.
616   unsigned sinkBB(BasicBlock *BBEnd);
617 
618   /// Perform the actual mechanics of sinking an instruction from Blocks into
619   /// BBEnd, which is their only successor.
620   void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
621 
622   /// Remove PHIs that all have the same incoming value.
623   void foldPointlessPHINodes(BasicBlock *BB) {
624     auto I = BB->begin();
625     while (PHINode *PN = dyn_cast<PHINode>(I++)) {
626       if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
627             return V == PN->getIncomingValue(0);
628           }))
629         continue;
630       if (PN->getIncomingValue(0) != PN)
631         PN->replaceAllUsesWith(PN->getIncomingValue(0));
632       else
633         PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
634       PN->eraseFromParent();
635     }
636   }
637 };
638 
639 Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
640   LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
641   ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
642   auto Insts = *LRI;
643   LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
644                                                                   : Insts) {
645     I->dump();
646   } dbgs() << " ]\n";);
647 
648   DenseMap<uint32_t, unsigned> VNums;
649   for (auto *I : Insts) {
650     uint32_t N = VN.lookupOrAdd(I);
651     LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
652     if (N == ~0U)
653       return None;
654     VNums[N]++;
655   }
656   unsigned VNumToSink =
657       std::max_element(VNums.begin(), VNums.end(),
658                        [](const std::pair<uint32_t, unsigned> &I,
659                           const std::pair<uint32_t, unsigned> &J) {
660                          return I.second < J.second;
661                        })
662           ->first;
663 
664   if (VNums[VNumToSink] == 1)
665     // Can't sink anything!
666     return None;
667 
668   // Now restrict the number of incoming blocks down to only those with
669   // VNumToSink.
670   auto &ActivePreds = LRI.getActiveBlocks();
671   unsigned InitialActivePredSize = ActivePreds.size();
672   SmallVector<Instruction *, 4> NewInsts;
673   for (auto *I : Insts) {
674     if (VN.lookup(I) != VNumToSink)
675       ActivePreds.remove(I->getParent());
676     else
677       NewInsts.push_back(I);
678   }
679   for (auto *I : NewInsts)
680     if (isInstructionBlacklisted(I))
681       return None;
682 
683   // If we've restricted the incoming blocks, restrict all needed PHIs also
684   // to that set.
685   bool RecomputePHIContents = false;
686   if (ActivePreds.size() != InitialActivePredSize) {
687     ModelledPHISet NewNeededPHIs;
688     for (auto P : NeededPHIs) {
689       P.restrictToBlocks(ActivePreds);
690       NewNeededPHIs.insert(P);
691     }
692     NeededPHIs = NewNeededPHIs;
693     LRI.restrictToBlocks(ActivePreds);
694     RecomputePHIContents = true;
695   }
696 
697   // The sunk instruction's results.
698   ModelledPHI NewPHI(NewInsts, ActivePreds);
699 
700   // Does sinking this instruction render previous PHIs redundant?
701   if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
702     NeededPHIs.erase(NewPHI);
703     RecomputePHIContents = true;
704   }
705 
706   if (RecomputePHIContents) {
707     // The needed PHIs have changed, so recompute the set of all needed
708     // values.
709     PHIContents.clear();
710     for (auto &PHI : NeededPHIs)
711       PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
712   }
713 
714   // Is this instruction required by a later PHI that doesn't match this PHI?
715   // if so, we can't sink this instruction.
716   for (auto *V : NewPHI.getValues())
717     if (PHIContents.count(V))
718       // V exists in this PHI, but the whole PHI is different to NewPHI
719       // (else it would have been removed earlier). We cannot continue
720       // because this isn't representable.
721       return None;
722 
723   // Which operands need PHIs?
724   // FIXME: If any of these fail, we should partition up the candidates to
725   // try and continue making progress.
726   Instruction *I0 = NewInsts[0];
727 
728   // If all instructions that are going to participate don't have the same
729   // number of operands, we can't do any useful PHI analysis for all operands.
730   auto hasDifferentNumOperands = [&I0](Instruction *I) {
731     return I->getNumOperands() != I0->getNumOperands();
732   };
733   if (any_of(NewInsts, hasDifferentNumOperands))
734     return None;
735 
736   for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
737     ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
738     if (PHI.areAllIncomingValuesSame())
739       continue;
740     if (!canReplaceOperandWithVariable(I0, OpNum))
741       // We can 't create a PHI from this instruction!
742       return None;
743     if (NeededPHIs.count(PHI))
744       continue;
745     if (!PHI.areAllIncomingValuesSameType())
746       return None;
747     // Don't create indirect calls! The called value is the final operand.
748     if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
749         PHI.areAnyIncomingValuesConstant())
750       return None;
751 
752     NeededPHIs.reserve(NeededPHIs.size());
753     NeededPHIs.insert(PHI);
754     PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
755   }
756 
757   if (isMemoryInst(NewInsts[0]))
758     ++MemoryInstNum;
759 
760   SinkingInstructionCandidate Cand;
761   Cand.NumInstructions = ++InstNum;
762   Cand.NumMemoryInsts = MemoryInstNum;
763   Cand.NumBlocks = ActivePreds.size();
764   Cand.NumPHIs = NeededPHIs.size();
765   for (auto *C : ActivePreds)
766     Cand.Blocks.push_back(C);
767 
768   return Cand;
769 }
770 
771 unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
772   LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
773              BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
774   SmallVector<BasicBlock *, 4> Preds;
775   for (auto *B : predecessors(BBEnd)) {
776     auto *T = B->getTerminator();
777     if (isa<BranchInst>(T) || isa<SwitchInst>(T))
778       Preds.push_back(B);
779     else
780       return 0;
781   }
782   if (Preds.size() < 2)
783     return 0;
784   llvm::sort(Preds);
785 
786   unsigned NumOrigPreds = Preds.size();
787   // We can only sink instructions through unconditional branches.
788   for (auto I = Preds.begin(); I != Preds.end();) {
789     if ((*I)->getTerminator()->getNumSuccessors() != 1)
790       I = Preds.erase(I);
791     else
792       ++I;
793   }
794 
795   LockstepReverseIterator LRI(Preds);
796   SmallVector<SinkingInstructionCandidate, 4> Candidates;
797   unsigned InstNum = 0, MemoryInstNum = 0;
798   ModelledPHISet NeededPHIs;
799   SmallPtrSet<Value *, 4> PHIContents;
800   analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
801   unsigned NumOrigPHIs = NeededPHIs.size();
802 
803   while (LRI.isValid()) {
804     auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
805                                              NeededPHIs, PHIContents);
806     if (!Cand)
807       break;
808     Cand->calculateCost(NumOrigPHIs, Preds.size());
809     Candidates.emplace_back(*Cand);
810     --LRI;
811   }
812 
813   llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
814   LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
815                                                          : Candidates) dbgs()
816                                                     << "  " << C << "\n";);
817 
818   // Pick the top candidate, as long it is positive!
819   if (Candidates.empty() || Candidates.front().Cost <= 0)
820     return 0;
821   auto C = Candidates.front();
822 
823   LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
824   BasicBlock *InsertBB = BBEnd;
825   if (C.Blocks.size() < NumOrigPreds) {
826     LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
827                BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
828     InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
829     if (!InsertBB) {
830       LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
831       // Edge couldn't be split.
832       return 0;
833     }
834   }
835 
836   for (unsigned I = 0; I < C.NumInstructions; ++I)
837     sinkLastInstruction(C.Blocks, InsertBB);
838 
839   return C.NumInstructions;
840 }
841 
842 void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
843                                   BasicBlock *BBEnd) {
844   SmallVector<Instruction *, 4> Insts;
845   for (BasicBlock *BB : Blocks)
846     Insts.push_back(BB->getTerminator()->getPrevNode());
847   Instruction *I0 = Insts.front();
848 
849   SmallVector<Value *, 4> NewOperands;
850   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
851     bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
852       return I->getOperand(O) != I0->getOperand(O);
853     });
854     if (!NeedPHI) {
855       NewOperands.push_back(I0->getOperand(O));
856       continue;
857     }
858 
859     // Create a new PHI in the successor block and populate it.
860     auto *Op = I0->getOperand(O);
861     assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
862     auto *PN = PHINode::Create(Op->getType(), Insts.size(),
863                                Op->getName() + ".sink", &BBEnd->front());
864     for (auto *I : Insts)
865       PN->addIncoming(I->getOperand(O), I->getParent());
866     NewOperands.push_back(PN);
867   }
868 
869   // Arbitrarily use I0 as the new "common" instruction; remap its operands
870   // and move it to the start of the successor block.
871   for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
872     I0->getOperandUse(O).set(NewOperands[O]);
873   I0->moveBefore(&*BBEnd->getFirstInsertionPt());
874 
875   // Update metadata and IR flags.
876   for (auto *I : Insts)
877     if (I != I0) {
878       combineMetadataForCSE(I0, I, true);
879       I0->andIRFlags(I);
880     }
881 
882   for (auto *I : Insts)
883     if (I != I0)
884       I->replaceAllUsesWith(I0);
885   foldPointlessPHINodes(BBEnd);
886 
887   // Finally nuke all instructions apart from the common instruction.
888   for (auto *I : Insts)
889     if (I != I0)
890       I->eraseFromParent();
891 
892   NumRemoved += Insts.size() - 1;
893 }
894 
895 ////////////////////////////////////////////////////////////////////////////////
896 // Pass machinery / boilerplate
897 
898 class GVNSinkLegacyPass : public FunctionPass {
899 public:
900   static char ID;
901 
902   GVNSinkLegacyPass() : FunctionPass(ID) {
903     initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
904   }
905 
906   bool runOnFunction(Function &F) override {
907     if (skipFunction(F))
908       return false;
909     GVNSink G;
910     return G.run(F);
911   }
912 
913   void getAnalysisUsage(AnalysisUsage &AU) const override {
914     AU.addPreserved<GlobalsAAWrapperPass>();
915   }
916 };
917 
918 } // end anonymous namespace
919 
920 PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
921   GVNSink G;
922   if (!G.run(F))
923     return PreservedAnalyses::all();
924 
925   PreservedAnalyses PA;
926   PA.preserve<GlobalsAA>();
927   return PA;
928 }
929 
930 char GVNSinkLegacyPass::ID = 0;
931 
932 INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
933                       "Early GVN sinking of Expressions", false, false)
934 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
935 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
936 INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
937                     "Early GVN sinking of Expressions", false, false)
938 
939 FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
940