xref: /llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp (revision 6013f45f920f8b208e945007697a5601f6f956fc)
1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs a simple dominator tree walk that eliminates trivially
11 // redundant instructions.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Scalar/EarlyCSE.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/ScopedHashTable.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/GlobalsModRef.h"
20 #include "llvm/Analysis/AssumptionCache.h"
21 #include "llvm/Analysis/InstructionSimplify.h"
22 #include "llvm/Analysis/TargetLibraryInfo.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/PatternMatch.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/RecyclingAllocator.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/Transforms/Scalar.h"
34 #include "llvm/Transforms/Utils/Local.h"
35 #include <deque>
36 using namespace llvm;
37 using namespace llvm::PatternMatch;
38 
39 #define DEBUG_TYPE "early-cse"
40 
41 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
42 STATISTIC(NumCSE,      "Number of instructions CSE'd");
43 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
44 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
45 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
46 
47 //===----------------------------------------------------------------------===//
48 // SimpleValue
49 //===----------------------------------------------------------------------===//
50 
51 namespace {
52 /// \brief Struct representing the available values in the scoped hash table.
53 struct SimpleValue {
54   Instruction *Inst;
55 
56   SimpleValue(Instruction *I) : Inst(I) {
57     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
58   }
59 
60   bool isSentinel() const {
61     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
62            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
63   }
64 
65   static bool canHandle(Instruction *Inst) {
66     // This can only handle non-void readnone functions.
67     if (CallInst *CI = dyn_cast<CallInst>(Inst))
68       return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
69     return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
70            isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
71            isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
72            isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
73            isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
74   }
75 };
76 }
77 
78 namespace llvm {
79 template <> struct DenseMapInfo<SimpleValue> {
80   static inline SimpleValue getEmptyKey() {
81     return DenseMapInfo<Instruction *>::getEmptyKey();
82   }
83   static inline SimpleValue getTombstoneKey() {
84     return DenseMapInfo<Instruction *>::getTombstoneKey();
85   }
86   static unsigned getHashValue(SimpleValue Val);
87   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
88 };
89 }
90 
91 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
92   Instruction *Inst = Val.Inst;
93   // Hash in all of the operands as pointers.
94   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
95     Value *LHS = BinOp->getOperand(0);
96     Value *RHS = BinOp->getOperand(1);
97     if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
98       std::swap(LHS, RHS);
99 
100     if (isa<OverflowingBinaryOperator>(BinOp)) {
101       // Hash the overflow behavior
102       unsigned Overflow =
103           BinOp->hasNoSignedWrap() * OverflowingBinaryOperator::NoSignedWrap |
104           BinOp->hasNoUnsignedWrap() *
105               OverflowingBinaryOperator::NoUnsignedWrap;
106       return hash_combine(BinOp->getOpcode(), Overflow, LHS, RHS);
107     }
108 
109     return hash_combine(BinOp->getOpcode(), LHS, RHS);
110   }
111 
112   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
113     Value *LHS = CI->getOperand(0);
114     Value *RHS = CI->getOperand(1);
115     CmpInst::Predicate Pred = CI->getPredicate();
116     if (Inst->getOperand(0) > Inst->getOperand(1)) {
117       std::swap(LHS, RHS);
118       Pred = CI->getSwappedPredicate();
119     }
120     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
121   }
122 
123   if (CastInst *CI = dyn_cast<CastInst>(Inst))
124     return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
125 
126   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
127     return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
128                         hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
129 
130   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
131     return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
132                         IVI->getOperand(1),
133                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
134 
135   assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
136           isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
137           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
138           isa<ShuffleVectorInst>(Inst)) &&
139          "Invalid/unknown instruction");
140 
141   // Mix in the opcode.
142   return hash_combine(
143       Inst->getOpcode(),
144       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
145 }
146 
147 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
148   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
149 
150   if (LHS.isSentinel() || RHS.isSentinel())
151     return LHSI == RHSI;
152 
153   if (LHSI->getOpcode() != RHSI->getOpcode())
154     return false;
155   if (LHSI->isIdenticalToWhenDefined(RHSI))
156     return true;
157 
158   // If we're not strictly identical, we still might be a commutable instruction
159   if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
160     if (!LHSBinOp->isCommutative())
161       return false;
162 
163     assert(isa<BinaryOperator>(RHSI) &&
164            "same opcode, but different instruction type?");
165     BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
166 
167     // Commuted equality
168     return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
169            LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
170   }
171   if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
172     assert(isa<CmpInst>(RHSI) &&
173            "same opcode, but different instruction type?");
174     CmpInst *RHSCmp = cast<CmpInst>(RHSI);
175     // Commuted equality
176     return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
177            LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
178            LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
179   }
180 
181   return false;
182 }
183 
184 //===----------------------------------------------------------------------===//
185 // CallValue
186 //===----------------------------------------------------------------------===//
187 
188 namespace {
189 /// \brief Struct representing the available call values in the scoped hash
190 /// table.
191 struct CallValue {
192   Instruction *Inst;
193 
194   CallValue(Instruction *I) : Inst(I) {
195     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
196   }
197 
198   bool isSentinel() const {
199     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
200            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
201   }
202 
203   static bool canHandle(Instruction *Inst) {
204     // Don't value number anything that returns void.
205     if (Inst->getType()->isVoidTy())
206       return false;
207 
208     CallInst *CI = dyn_cast<CallInst>(Inst);
209     if (!CI || !CI->onlyReadsMemory())
210       return false;
211     return true;
212   }
213 };
214 }
215 
216 namespace llvm {
217 template <> struct DenseMapInfo<CallValue> {
218   static inline CallValue getEmptyKey() {
219     return DenseMapInfo<Instruction *>::getEmptyKey();
220   }
221   static inline CallValue getTombstoneKey() {
222     return DenseMapInfo<Instruction *>::getTombstoneKey();
223   }
224   static unsigned getHashValue(CallValue Val);
225   static bool isEqual(CallValue LHS, CallValue RHS);
226 };
227 }
228 
229 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
230   Instruction *Inst = Val.Inst;
231   // Hash all of the operands as pointers and mix in the opcode.
232   return hash_combine(
233       Inst->getOpcode(),
234       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
235 }
236 
237 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
238   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
239   if (LHS.isSentinel() || RHS.isSentinel())
240     return LHSI == RHSI;
241   return LHSI->isIdenticalTo(RHSI);
242 }
243 
244 //===----------------------------------------------------------------------===//
245 // EarlyCSE implementation
246 //===----------------------------------------------------------------------===//
247 
248 namespace {
249 /// \brief A simple and fast domtree-based CSE pass.
250 ///
251 /// This pass does a simple depth-first walk over the dominator tree,
252 /// eliminating trivially redundant instructions and using instsimplify to
253 /// canonicalize things as it goes. It is intended to be fast and catch obvious
254 /// cases so that instcombine and other passes are more effective. It is
255 /// expected that a later pass of GVN will catch the interesting/hard cases.
256 class EarlyCSE {
257 public:
258   const TargetLibraryInfo &TLI;
259   const TargetTransformInfo &TTI;
260   DominatorTree &DT;
261   AssumptionCache &AC;
262   typedef RecyclingAllocator<
263       BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy;
264   typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
265                           AllocatorTy> ScopedHTType;
266 
267   /// \brief A scoped hash table of the current values of all of our simple
268   /// scalar expressions.
269   ///
270   /// As we walk down the domtree, we look to see if instructions are in this:
271   /// if so, we replace them with what we find, otherwise we insert them so
272   /// that dominated values can succeed in their lookup.
273   ScopedHTType AvailableValues;
274 
275   /// A scoped hash table of the current values of previously encounted memory
276   /// locations.
277   ///
278   /// This allows us to get efficient access to dominating loads or stores when
279   /// we have a fully redundant load.  In addition to the most recent load, we
280   /// keep track of a generation count of the read, which is compared against
281   /// the current generation count.  The current generation count is incremented
282   /// after every possibly writing memory operation, which ensures that we only
283   /// CSE loads with other loads that have no intervening store.  Ordering
284   /// events (such as fences or atomic instructions) increment the generation
285   /// count as well; essentially, we model these as writes to all possible
286   /// locations.  Note that atomic and/or volatile loads and stores can be
287   /// present the table; it is the responsibility of the consumer to inspect
288   /// the atomicity/volatility if needed.
289   struct LoadValue {
290     Value *Data;
291     unsigned Generation;
292     int MatchingId;
293     bool IsAtomic;
294     LoadValue()
295       : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {}
296     LoadValue(Value *Data, unsigned Generation, unsigned MatchingId,
297               bool IsAtomic)
298       : Data(Data), Generation(Generation), MatchingId(MatchingId),
299         IsAtomic(IsAtomic) {}
300   };
301   typedef RecyclingAllocator<BumpPtrAllocator,
302                              ScopedHashTableVal<Value *, LoadValue>>
303       LoadMapAllocator;
304   typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
305                           LoadMapAllocator> LoadHTType;
306   LoadHTType AvailableLoads;
307 
308   /// \brief A scoped hash table of the current values of read-only call
309   /// values.
310   ///
311   /// It uses the same generation count as loads.
312   typedef ScopedHashTable<CallValue, std::pair<Value *, unsigned>> CallHTType;
313   CallHTType AvailableCalls;
314 
315   /// \brief This is the current generation of the memory value.
316   unsigned CurrentGeneration;
317 
318   /// \brief Set up the EarlyCSE runner for a particular function.
319   EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
320            DominatorTree &DT, AssumptionCache &AC)
321       : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
322 
323   bool run();
324 
325 private:
326   // Almost a POD, but needs to call the constructors for the scoped hash
327   // tables so that a new scope gets pushed on. These are RAII so that the
328   // scope gets popped when the NodeScope is destroyed.
329   class NodeScope {
330   public:
331     NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
332               CallHTType &AvailableCalls)
333         : Scope(AvailableValues), LoadScope(AvailableLoads),
334           CallScope(AvailableCalls) {}
335 
336   private:
337     NodeScope(const NodeScope &) = delete;
338     void operator=(const NodeScope &) = delete;
339 
340     ScopedHTType::ScopeTy Scope;
341     LoadHTType::ScopeTy LoadScope;
342     CallHTType::ScopeTy CallScope;
343   };
344 
345   // Contains all the needed information to create a stack for doing a depth
346   // first tranversal of the tree. This includes scopes for values, loads, and
347   // calls as well as the generation. There is a child iterator so that the
348   // children do not need to be store spearately.
349   class StackNode {
350   public:
351     StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
352               CallHTType &AvailableCalls, unsigned cg, DomTreeNode *n,
353               DomTreeNode::iterator child, DomTreeNode::iterator end)
354         : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
355           EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls),
356           Processed(false) {}
357 
358     // Accessors.
359     unsigned currentGeneration() { return CurrentGeneration; }
360     unsigned childGeneration() { return ChildGeneration; }
361     void childGeneration(unsigned generation) { ChildGeneration = generation; }
362     DomTreeNode *node() { return Node; }
363     DomTreeNode::iterator childIter() { return ChildIter; }
364     DomTreeNode *nextChild() {
365       DomTreeNode *child = *ChildIter;
366       ++ChildIter;
367       return child;
368     }
369     DomTreeNode::iterator end() { return EndIter; }
370     bool isProcessed() { return Processed; }
371     void process() { Processed = true; }
372 
373   private:
374     StackNode(const StackNode &) = delete;
375     void operator=(const StackNode &) = delete;
376 
377     // Members.
378     unsigned CurrentGeneration;
379     unsigned ChildGeneration;
380     DomTreeNode *Node;
381     DomTreeNode::iterator ChildIter;
382     DomTreeNode::iterator EndIter;
383     NodeScope Scopes;
384     bool Processed;
385   };
386 
387   /// \brief Wrapper class to handle memory instructions, including loads,
388   /// stores and intrinsic loads and stores defined by the target.
389   class ParseMemoryInst {
390   public:
391     ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
392       : IsTargetMemInst(false), Inst(Inst) {
393       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
394         if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
395           IsTargetMemInst = true;
396     }
397     bool isLoad() const {
398       if (IsTargetMemInst) return Info.ReadMem;
399       return isa<LoadInst>(Inst);
400     }
401     bool isStore() const {
402       if (IsTargetMemInst) return Info.WriteMem;
403       return isa<StoreInst>(Inst);
404     }
405     bool isAtomic() const {
406       if (IsTargetMemInst) {
407         assert(Info.IsSimple && "need to refine IsSimple in TTI");
408         return false;
409       }
410       return Inst->isAtomic();
411     }
412     bool isUnordered() const {
413       if (IsTargetMemInst) {
414         assert(Info.IsSimple && "need to refine IsSimple in TTI");
415         return true;
416       }
417       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
418         return LI->isUnordered();
419       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
420         return SI->isUnordered();
421       }
422       // Conservative answer
423       return !Inst->isAtomic();
424     }
425 
426     bool isVolatile() const {
427       if (IsTargetMemInst) {
428         assert(Info.IsSimple && "need to refine IsSimple in TTI");
429         return false;
430       }
431       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
432         return LI->isVolatile();
433       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
434         return SI->isVolatile();
435       }
436       // Conservative answer
437       return true;
438     }
439 
440 
441     bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
442       return (getPointerOperand() == Inst.getPointerOperand() &&
443               getMatchingId() == Inst.getMatchingId());
444     }
445     bool isValid() const { return getPointerOperand() != nullptr; }
446 
447     // For regular (non-intrinsic) loads/stores, this is set to -1. For
448     // intrinsic loads/stores, the id is retrieved from the corresponding
449     // field in the MemIntrinsicInfo structure.  That field contains
450     // non-negative values only.
451     int getMatchingId() const {
452       if (IsTargetMemInst) return Info.MatchingId;
453       return -1;
454     }
455     Value *getPointerOperand() const {
456       if (IsTargetMemInst) return Info.PtrVal;
457       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
458         return LI->getPointerOperand();
459       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
460         return SI->getPointerOperand();
461       }
462       return nullptr;
463     }
464     bool mayReadFromMemory() const {
465       if (IsTargetMemInst) return Info.ReadMem;
466       return Inst->mayReadFromMemory();
467     }
468     bool mayWriteToMemory() const {
469       if (IsTargetMemInst) return Info.WriteMem;
470       return Inst->mayWriteToMemory();
471     }
472 
473   private:
474     bool IsTargetMemInst;
475     MemIntrinsicInfo Info;
476     Instruction *Inst;
477   };
478 
479   bool processNode(DomTreeNode *Node);
480 
481   Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
482     if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
483       return LI;
484     else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
485       return SI->getValueOperand();
486     assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
487     return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
488                                                  ExpectedType);
489   }
490 };
491 }
492 
493 bool EarlyCSE::processNode(DomTreeNode *Node) {
494   BasicBlock *BB = Node->getBlock();
495 
496   // If this block has a single predecessor, then the predecessor is the parent
497   // of the domtree node and all of the live out memory values are still current
498   // in this block.  If this block has multiple predecessors, then they could
499   // have invalidated the live-out memory values of our parent value.  For now,
500   // just be conservative and invalidate memory if this block has multiple
501   // predecessors.
502   if (!BB->getSinglePredecessor())
503     ++CurrentGeneration;
504 
505   // If this node has a single predecessor which ends in a conditional branch,
506   // we can infer the value of the branch condition given that we took this
507   // path.  We need the single predecessor to ensure there's not another path
508   // which reaches this block where the condition might hold a different
509   // value.  Since we're adding this to the scoped hash table (like any other
510   // def), it will have been popped if we encounter a future merge block.
511   if (BasicBlock *Pred = BB->getSinglePredecessor())
512     if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
513       if (BI->isConditional())
514         if (auto *CondInst = dyn_cast<Instruction>(BI->getCondition()))
515           if (SimpleValue::canHandle(CondInst)) {
516             assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
517             auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ?
518               ConstantInt::getTrue(BB->getContext()) :
519               ConstantInt::getFalse(BB->getContext());
520             AvailableValues.insert(CondInst, ConditionalConstant);
521             DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
522                   << CondInst->getName() << "' as " << *ConditionalConstant
523                   << " in " << BB->getName() << "\n");
524             // Replace all dominated uses with the known value
525             replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
526                                      BasicBlockEdge(Pred, BB));
527           }
528 
529   /// LastStore - Keep track of the last non-volatile store that we saw... for
530   /// as long as there in no instruction that reads memory.  If we see a store
531   /// to the same location, we delete the dead store.  This zaps trivial dead
532   /// stores which can occur in bitfield code among other things.
533   Instruction *LastStore = nullptr;
534 
535   bool Changed = false;
536   const DataLayout &DL = BB->getModule()->getDataLayout();
537 
538   // See if any instructions in the block can be eliminated.  If so, do it.  If
539   // not, add them to AvailableValues.
540   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
541     Instruction *Inst = &*I++;
542 
543     // Dead instructions should just be removed.
544     if (isInstructionTriviallyDead(Inst, &TLI)) {
545       DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
546       Inst->eraseFromParent();
547       Changed = true;
548       ++NumSimplify;
549       continue;
550     }
551 
552     // Skip assume intrinsics, they don't really have side effects (although
553     // they're marked as such to ensure preservation of control dependencies),
554     // and this pass will not disturb any of the assumption's control
555     // dependencies.
556     if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
557       DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
558       continue;
559     }
560 
561     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
562     // its simpler value.
563     if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
564       DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
565       Inst->replaceAllUsesWith(V);
566       Inst->eraseFromParent();
567       Changed = true;
568       ++NumSimplify;
569       continue;
570     }
571 
572     // If this is a simple instruction that we can value number, process it.
573     if (SimpleValue::canHandle(Inst)) {
574       // See if the instruction has an available value.  If so, use it.
575       if (Value *V = AvailableValues.lookup(Inst)) {
576         DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V << '\n');
577         if (auto *I = dyn_cast<Instruction>(V))
578           I->andIRFlags(Inst);
579         Inst->replaceAllUsesWith(V);
580         Inst->eraseFromParent();
581         Changed = true;
582         ++NumCSE;
583         continue;
584       }
585 
586       // Otherwise, just remember that this value is available.
587       AvailableValues.insert(Inst, Inst);
588       continue;
589     }
590 
591     ParseMemoryInst MemInst(Inst, TTI);
592     // If this is a non-volatile load, process it.
593     if (MemInst.isValid() && MemInst.isLoad()) {
594       // (conservatively) we can't peak past the ordering implied by this
595       // operation, but we can add this load to our set of available values
596       if (MemInst.isVolatile() || !MemInst.isUnordered()) {
597         LastStore = nullptr;
598         ++CurrentGeneration;
599       }
600 
601       // If we have an available version of this load, and if it is the right
602       // generation, replace this instruction.
603       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
604       if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
605           InVal.MatchingId == MemInst.getMatchingId() &&
606           // We don't yet handle removing loads with ordering of any kind.
607           !MemInst.isVolatile() && MemInst.isUnordered() &&
608           // We can't replace an atomic load with one which isn't also atomic.
609           InVal.IsAtomic >= MemInst.isAtomic()) {
610         Value *Op = getOrCreateResult(InVal.Data, Inst->getType());
611         if (Op != nullptr) {
612           DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
613                        << "  to: " << *InVal.Data << '\n');
614           if (!Inst->use_empty())
615             Inst->replaceAllUsesWith(Op);
616           Inst->eraseFromParent();
617           Changed = true;
618           ++NumCSELoad;
619           continue;
620         }
621       }
622 
623       // Otherwise, remember that we have this instruction.
624       AvailableLoads.insert(
625           MemInst.getPointerOperand(),
626           LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
627                     MemInst.isAtomic()));
628       LastStore = nullptr;
629       continue;
630     }
631 
632     // If this instruction may read from memory, forget LastStore.
633     // Load/store intrinsics will indicate both a read and a write to
634     // memory.  The target may override this (e.g. so that a store intrinsic
635     // does not read  from memory, and thus will be treated the same as a
636     // regular store for commoning purposes).
637     if (Inst->mayReadFromMemory() &&
638         !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
639       LastStore = nullptr;
640 
641     // If this is a read-only call, process it.
642     if (CallValue::canHandle(Inst)) {
643       // If we have an available version of this call, and if it is the right
644       // generation, replace this instruction.
645       std::pair<Value *, unsigned> InVal = AvailableCalls.lookup(Inst);
646       if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
647         DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
648                      << "  to: " << *InVal.first << '\n');
649         if (!Inst->use_empty())
650           Inst->replaceAllUsesWith(InVal.first);
651         Inst->eraseFromParent();
652         Changed = true;
653         ++NumCSECall;
654         continue;
655       }
656 
657       // Otherwise, remember that we have this instruction.
658       AvailableCalls.insert(
659           Inst, std::pair<Value *, unsigned>(Inst, CurrentGeneration));
660       continue;
661     }
662 
663     // A release fence requires that all stores complete before it, but does
664     // not prevent the reordering of following loads 'before' the fence.  As a
665     // result, we don't need to consider it as writing to memory and don't need
666     // to advance the generation.  We do need to prevent DSE across the fence,
667     // but that's handled above.
668     if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
669       if (FI->getOrdering() == AtomicOrdering::Release) {
670         assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
671         continue;
672       }
673 
674     // write back DSE - If we write back the same value we just loaded from
675     // the same location and haven't passed any intervening writes or ordering
676     // operations, we can remove the write.  The primary benefit is in allowing
677     // the available load table to remain valid and value forward past where
678     // the store originally was.
679     if (MemInst.isValid() && MemInst.isStore()) {
680       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
681       if (InVal.Data &&
682           InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
683           InVal.Generation == CurrentGeneration &&
684           InVal.MatchingId == MemInst.getMatchingId() &&
685           // We don't yet handle removing stores with ordering of any kind.
686           !MemInst.isVolatile() && MemInst.isUnordered()) {
687         assert((!LastStore ||
688                 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
689                 MemInst.getPointerOperand()) &&
690                "can't have an intervening store!");
691         DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
692         Inst->eraseFromParent();
693         Changed = true;
694         ++NumDSE;
695         // We can avoid incrementing the generation count since we were able
696         // to eliminate this store.
697         continue;
698       }
699     }
700 
701     // Okay, this isn't something we can CSE at all.  Check to see if it is
702     // something that could modify memory.  If so, our available memory values
703     // cannot be used so bump the generation count.
704     if (Inst->mayWriteToMemory()) {
705       ++CurrentGeneration;
706 
707       if (MemInst.isValid() && MemInst.isStore()) {
708         // We do a trivial form of DSE if there are two stores to the same
709         // location with no intervening loads.  Delete the earlier store.
710         // At the moment, we don't remove ordered stores, but do remove
711         // unordered atomic stores.  There's no special requirement (for
712         // unordered atomics) about removing atomic stores only in favor of
713         // other atomic stores since we we're going to execute the non-atomic
714         // one anyway and the atomic one might never have become visible.
715         if (LastStore) {
716           ParseMemoryInst LastStoreMemInst(LastStore, TTI);
717           assert(LastStoreMemInst.isUnordered() &&
718                  !LastStoreMemInst.isVolatile() &&
719                  "Violated invariant");
720           if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
721             DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
722                          << "  due to: " << *Inst << '\n');
723             LastStore->eraseFromParent();
724             Changed = true;
725             ++NumDSE;
726             LastStore = nullptr;
727           }
728           // fallthrough - we can exploit information about this store
729         }
730 
731         // Okay, we just invalidated anything we knew about loaded values.  Try
732         // to salvage *something* by remembering that the stored value is a live
733         // version of the pointer.  It is safe to forward from volatile stores
734         // to non-volatile loads, so we don't have to check for volatility of
735         // the store.
736         AvailableLoads.insert(
737             MemInst.getPointerOperand(),
738             LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
739                       MemInst.isAtomic()));
740 
741         // Remember that this was the last unordered store we saw for DSE. We
742         // don't yet handle DSE on ordered or volatile stores since we don't
743         // have a good way to model the ordering requirement for following
744         // passes  once the store is removed.  We could insert a fence, but
745         // since fences are slightly stronger than stores in their ordering,
746         // it's not clear this is a profitable transform. Another option would
747         // be to merge the ordering with that of the post dominating store.
748         if (MemInst.isUnordered() && !MemInst.isVolatile())
749           LastStore = Inst;
750         else
751           LastStore = nullptr;
752       }
753     }
754   }
755 
756   return Changed;
757 }
758 
759 bool EarlyCSE::run() {
760   // Note, deque is being used here because there is significant performance
761   // gains over vector when the container becomes very large due to the
762   // specific access patterns. For more information see the mailing list
763   // discussion on this:
764   // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
765   std::deque<StackNode *> nodesToProcess;
766 
767   bool Changed = false;
768 
769   // Process the root node.
770   nodesToProcess.push_back(new StackNode(
771       AvailableValues, AvailableLoads, AvailableCalls, CurrentGeneration,
772       DT.getRootNode(), DT.getRootNode()->begin(), DT.getRootNode()->end()));
773 
774   // Save the current generation.
775   unsigned LiveOutGeneration = CurrentGeneration;
776 
777   // Process the stack.
778   while (!nodesToProcess.empty()) {
779     // Grab the first item off the stack. Set the current generation, remove
780     // the node from the stack, and process it.
781     StackNode *NodeToProcess = nodesToProcess.back();
782 
783     // Initialize class members.
784     CurrentGeneration = NodeToProcess->currentGeneration();
785 
786     // Check if the node needs to be processed.
787     if (!NodeToProcess->isProcessed()) {
788       // Process the node.
789       Changed |= processNode(NodeToProcess->node());
790       NodeToProcess->childGeneration(CurrentGeneration);
791       NodeToProcess->process();
792     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
793       // Push the next child onto the stack.
794       DomTreeNode *child = NodeToProcess->nextChild();
795       nodesToProcess.push_back(
796           new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
797                         NodeToProcess->childGeneration(), child, child->begin(),
798                         child->end()));
799     } else {
800       // It has been processed, and there are no more children to process,
801       // so delete it and pop it off the stack.
802       delete NodeToProcess;
803       nodesToProcess.pop_back();
804     }
805   } // while (!nodes...)
806 
807   // Reset the current generation.
808   CurrentGeneration = LiveOutGeneration;
809 
810   return Changed;
811 }
812 
813 PreservedAnalyses EarlyCSEPass::run(Function &F,
814                                     AnalysisManager<Function> &AM) {
815   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
816   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
817   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
818   auto &AC = AM.getResult<AssumptionAnalysis>(F);
819 
820   EarlyCSE CSE(TLI, TTI, DT, AC);
821 
822   if (!CSE.run())
823     return PreservedAnalyses::all();
824 
825   // CSE preserves the dominator tree because it doesn't mutate the CFG.
826   // FIXME: Bundle this with other CFG-preservation.
827   PreservedAnalyses PA;
828   PA.preserve<DominatorTreeAnalysis>();
829   return PA;
830 }
831 
832 namespace {
833 /// \brief A simple and fast domtree-based CSE pass.
834 ///
835 /// This pass does a simple depth-first walk over the dominator tree,
836 /// eliminating trivially redundant instructions and using instsimplify to
837 /// canonicalize things as it goes. It is intended to be fast and catch obvious
838 /// cases so that instcombine and other passes are more effective. It is
839 /// expected that a later pass of GVN will catch the interesting/hard cases.
840 class EarlyCSELegacyPass : public FunctionPass {
841 public:
842   static char ID;
843 
844   EarlyCSELegacyPass() : FunctionPass(ID) {
845     initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
846   }
847 
848   bool runOnFunction(Function &F) override {
849     if (skipOptnoneFunction(F))
850       return false;
851 
852     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
853     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
854     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
855     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
856 
857     EarlyCSE CSE(TLI, TTI, DT, AC);
858 
859     return CSE.run();
860   }
861 
862   void getAnalysisUsage(AnalysisUsage &AU) const override {
863     AU.addRequired<AssumptionCacheTracker>();
864     AU.addRequired<DominatorTreeWrapperPass>();
865     AU.addRequired<TargetLibraryInfoWrapperPass>();
866     AU.addRequired<TargetTransformInfoWrapperPass>();
867     AU.addPreserved<GlobalsAAWrapperPass>();
868     AU.setPreservesCFG();
869   }
870 };
871 }
872 
873 char EarlyCSELegacyPass::ID = 0;
874 
875 FunctionPass *llvm::createEarlyCSEPass() { return new EarlyCSELegacyPass(); }
876 
877 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
878                       false)
879 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
880 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
881 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
882 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
883 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
884