xref: /llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp (revision 8d84605f25d91c63c2c9e2c8f42575da520f17a3)
1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs a simple dominator tree walk that eliminates trivially
11 // redundant instructions.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Scalar/EarlyCSE.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/ScopedHashTable.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/AssumptionCache.h"
20 #include "llvm/Analysis/GlobalsModRef.h"
21 #include "llvm/Analysis/InstructionSimplify.h"
22 #include "llvm/Analysis/TargetLibraryInfo.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/IR/IntrinsicInst.h"
28 #include "llvm/IR/PatternMatch.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/RecyclingAllocator.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/Transforms/Scalar.h"
34 #include "llvm/Transforms/Utils/Local.h"
35 #include "llvm/Transforms/Utils/MemorySSA.h"
36 #include <deque>
37 using namespace llvm;
38 using namespace llvm::PatternMatch;
39 
40 #define DEBUG_TYPE "early-cse"
41 
42 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
43 STATISTIC(NumCSE,      "Number of instructions CSE'd");
44 STATISTIC(NumCSECVP,   "Number of compare instructions CVP'd");
45 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
46 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
47 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
48 
49 //===----------------------------------------------------------------------===//
50 // SimpleValue
51 //===----------------------------------------------------------------------===//
52 
53 namespace {
54 /// \brief Struct representing the available values in the scoped hash table.
55 struct SimpleValue {
56   Instruction *Inst;
57 
58   SimpleValue(Instruction *I) : Inst(I) {
59     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
60   }
61 
62   bool isSentinel() const {
63     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
64            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
65   }
66 
67   static bool canHandle(Instruction *Inst) {
68     // This can only handle non-void readnone functions.
69     if (CallInst *CI = dyn_cast<CallInst>(Inst))
70       return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
71     return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
72            isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
73            isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
74            isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
75            isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
76   }
77 };
78 }
79 
80 namespace llvm {
81 template <> struct DenseMapInfo<SimpleValue> {
82   static inline SimpleValue getEmptyKey() {
83     return DenseMapInfo<Instruction *>::getEmptyKey();
84   }
85   static inline SimpleValue getTombstoneKey() {
86     return DenseMapInfo<Instruction *>::getTombstoneKey();
87   }
88   static unsigned getHashValue(SimpleValue Val);
89   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
90 };
91 }
92 
93 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
94   Instruction *Inst = Val.Inst;
95   // Hash in all of the operands as pointers.
96   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
97     Value *LHS = BinOp->getOperand(0);
98     Value *RHS = BinOp->getOperand(1);
99     if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
100       std::swap(LHS, RHS);
101 
102     return hash_combine(BinOp->getOpcode(), LHS, RHS);
103   }
104 
105   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
106     Value *LHS = CI->getOperand(0);
107     Value *RHS = CI->getOperand(1);
108     CmpInst::Predicate Pred = CI->getPredicate();
109     if (Inst->getOperand(0) > Inst->getOperand(1)) {
110       std::swap(LHS, RHS);
111       Pred = CI->getSwappedPredicate();
112     }
113     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
114   }
115 
116   if (CastInst *CI = dyn_cast<CastInst>(Inst))
117     return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
118 
119   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
120     return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
121                         hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
122 
123   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
124     return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
125                         IVI->getOperand(1),
126                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
127 
128   assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
129           isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
130           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
131           isa<ShuffleVectorInst>(Inst)) &&
132          "Invalid/unknown instruction");
133 
134   // Mix in the opcode.
135   return hash_combine(
136       Inst->getOpcode(),
137       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
138 }
139 
140 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
141   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
142 
143   if (LHS.isSentinel() || RHS.isSentinel())
144     return LHSI == RHSI;
145 
146   if (LHSI->getOpcode() != RHSI->getOpcode())
147     return false;
148   if (LHSI->isIdenticalToWhenDefined(RHSI))
149     return true;
150 
151   // If we're not strictly identical, we still might be a commutable instruction
152   if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
153     if (!LHSBinOp->isCommutative())
154       return false;
155 
156     assert(isa<BinaryOperator>(RHSI) &&
157            "same opcode, but different instruction type?");
158     BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
159 
160     // Commuted equality
161     return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
162            LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
163   }
164   if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
165     assert(isa<CmpInst>(RHSI) &&
166            "same opcode, but different instruction type?");
167     CmpInst *RHSCmp = cast<CmpInst>(RHSI);
168     // Commuted equality
169     return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
170            LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
171            LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
172   }
173 
174   return false;
175 }
176 
177 //===----------------------------------------------------------------------===//
178 // CallValue
179 //===----------------------------------------------------------------------===//
180 
181 namespace {
182 /// \brief Struct representing the available call values in the scoped hash
183 /// table.
184 struct CallValue {
185   Instruction *Inst;
186 
187   CallValue(Instruction *I) : Inst(I) {
188     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
189   }
190 
191   bool isSentinel() const {
192     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
193            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
194   }
195 
196   static bool canHandle(Instruction *Inst) {
197     // Don't value number anything that returns void.
198     if (Inst->getType()->isVoidTy())
199       return false;
200 
201     CallInst *CI = dyn_cast<CallInst>(Inst);
202     if (!CI || !CI->onlyReadsMemory())
203       return false;
204     return true;
205   }
206 };
207 }
208 
209 namespace llvm {
210 template <> struct DenseMapInfo<CallValue> {
211   static inline CallValue getEmptyKey() {
212     return DenseMapInfo<Instruction *>::getEmptyKey();
213   }
214   static inline CallValue getTombstoneKey() {
215     return DenseMapInfo<Instruction *>::getTombstoneKey();
216   }
217   static unsigned getHashValue(CallValue Val);
218   static bool isEqual(CallValue LHS, CallValue RHS);
219 };
220 }
221 
222 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
223   Instruction *Inst = Val.Inst;
224   // Hash all of the operands as pointers and mix in the opcode.
225   return hash_combine(
226       Inst->getOpcode(),
227       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
228 }
229 
230 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
231   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
232   if (LHS.isSentinel() || RHS.isSentinel())
233     return LHSI == RHSI;
234   return LHSI->isIdenticalTo(RHSI);
235 }
236 
237 //===----------------------------------------------------------------------===//
238 // EarlyCSE implementation
239 //===----------------------------------------------------------------------===//
240 
241 namespace {
242 /// \brief A simple and fast domtree-based CSE pass.
243 ///
244 /// This pass does a simple depth-first walk over the dominator tree,
245 /// eliminating trivially redundant instructions and using instsimplify to
246 /// canonicalize things as it goes. It is intended to be fast and catch obvious
247 /// cases so that instcombine and other passes are more effective. It is
248 /// expected that a later pass of GVN will catch the interesting/hard cases.
249 class EarlyCSE {
250 public:
251   const TargetLibraryInfo &TLI;
252   const TargetTransformInfo &TTI;
253   DominatorTree &DT;
254   AssumptionCache &AC;
255   MemorySSA *MSSA;
256   typedef RecyclingAllocator<
257       BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy;
258   typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
259                           AllocatorTy> ScopedHTType;
260 
261   /// \brief A scoped hash table of the current values of all of our simple
262   /// scalar expressions.
263   ///
264   /// As we walk down the domtree, we look to see if instructions are in this:
265   /// if so, we replace them with what we find, otherwise we insert them so
266   /// that dominated values can succeed in their lookup.
267   ScopedHTType AvailableValues;
268 
269   /// A scoped hash table of the current values of previously encounted memory
270   /// locations.
271   ///
272   /// This allows us to get efficient access to dominating loads or stores when
273   /// we have a fully redundant load.  In addition to the most recent load, we
274   /// keep track of a generation count of the read, which is compared against
275   /// the current generation count.  The current generation count is incremented
276   /// after every possibly writing memory operation, which ensures that we only
277   /// CSE loads with other loads that have no intervening store.  Ordering
278   /// events (such as fences or atomic instructions) increment the generation
279   /// count as well; essentially, we model these as writes to all possible
280   /// locations.  Note that atomic and/or volatile loads and stores can be
281   /// present the table; it is the responsibility of the consumer to inspect
282   /// the atomicity/volatility if needed.
283   struct LoadValue {
284     Instruction *DefInst;
285     unsigned Generation;
286     int MatchingId;
287     bool IsAtomic;
288     bool IsInvariant;
289     LoadValue()
290         : DefInst(nullptr), Generation(0), MatchingId(-1), IsAtomic(false),
291           IsInvariant(false) {}
292     LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
293               bool IsAtomic, bool IsInvariant)
294         : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
295           IsAtomic(IsAtomic), IsInvariant(IsInvariant) {}
296   };
297   typedef RecyclingAllocator<BumpPtrAllocator,
298                              ScopedHashTableVal<Value *, LoadValue>>
299       LoadMapAllocator;
300   typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
301                           LoadMapAllocator> LoadHTType;
302   LoadHTType AvailableLoads;
303 
304   /// \brief A scoped hash table of the current values of read-only call
305   /// values.
306   ///
307   /// It uses the same generation count as loads.
308   typedef ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>
309       CallHTType;
310   CallHTType AvailableCalls;
311 
312   /// \brief This is the current generation of the memory value.
313   unsigned CurrentGeneration;
314 
315   /// \brief Set up the EarlyCSE runner for a particular function.
316   EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
317            DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA)
318       : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), CurrentGeneration(0) {}
319 
320   bool run();
321 
322 private:
323   // Almost a POD, but needs to call the constructors for the scoped hash
324   // tables so that a new scope gets pushed on. These are RAII so that the
325   // scope gets popped when the NodeScope is destroyed.
326   class NodeScope {
327   public:
328     NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
329               CallHTType &AvailableCalls)
330         : Scope(AvailableValues), LoadScope(AvailableLoads),
331           CallScope(AvailableCalls) {}
332 
333   private:
334     NodeScope(const NodeScope &) = delete;
335     void operator=(const NodeScope &) = delete;
336 
337     ScopedHTType::ScopeTy Scope;
338     LoadHTType::ScopeTy LoadScope;
339     CallHTType::ScopeTy CallScope;
340   };
341 
342   // Contains all the needed information to create a stack for doing a depth
343   // first tranversal of the tree. This includes scopes for values, loads, and
344   // calls as well as the generation. There is a child iterator so that the
345   // children do not need to be store separately.
346   class StackNode {
347   public:
348     StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
349               CallHTType &AvailableCalls, unsigned cg, DomTreeNode *n,
350               DomTreeNode::iterator child, DomTreeNode::iterator end)
351         : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
352           EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls),
353           Processed(false) {}
354 
355     // Accessors.
356     unsigned currentGeneration() { return CurrentGeneration; }
357     unsigned childGeneration() { return ChildGeneration; }
358     void childGeneration(unsigned generation) { ChildGeneration = generation; }
359     DomTreeNode *node() { return Node; }
360     DomTreeNode::iterator childIter() { return ChildIter; }
361     DomTreeNode *nextChild() {
362       DomTreeNode *child = *ChildIter;
363       ++ChildIter;
364       return child;
365     }
366     DomTreeNode::iterator end() { return EndIter; }
367     bool isProcessed() { return Processed; }
368     void process() { Processed = true; }
369 
370   private:
371     StackNode(const StackNode &) = delete;
372     void operator=(const StackNode &) = delete;
373 
374     // Members.
375     unsigned CurrentGeneration;
376     unsigned ChildGeneration;
377     DomTreeNode *Node;
378     DomTreeNode::iterator ChildIter;
379     DomTreeNode::iterator EndIter;
380     NodeScope Scopes;
381     bool Processed;
382   };
383 
384   /// \brief Wrapper class to handle memory instructions, including loads,
385   /// stores and intrinsic loads and stores defined by the target.
386   class ParseMemoryInst {
387   public:
388     ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
389       : IsTargetMemInst(false), Inst(Inst) {
390       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
391         if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
392           IsTargetMemInst = true;
393     }
394     bool isLoad() const {
395       if (IsTargetMemInst) return Info.ReadMem;
396       return isa<LoadInst>(Inst);
397     }
398     bool isStore() const {
399       if (IsTargetMemInst) return Info.WriteMem;
400       return isa<StoreInst>(Inst);
401     }
402     bool isAtomic() const {
403       if (IsTargetMemInst) {
404         assert(Info.IsSimple && "need to refine IsSimple in TTI");
405         return false;
406       }
407       return Inst->isAtomic();
408     }
409     bool isUnordered() const {
410       if (IsTargetMemInst) {
411         assert(Info.IsSimple && "need to refine IsSimple in TTI");
412         return true;
413       }
414       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
415         return LI->isUnordered();
416       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
417         return SI->isUnordered();
418       }
419       // Conservative answer
420       return !Inst->isAtomic();
421     }
422 
423     bool isVolatile() const {
424       if (IsTargetMemInst) {
425         assert(Info.IsSimple && "need to refine IsSimple in TTI");
426         return false;
427       }
428       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
429         return LI->isVolatile();
430       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
431         return SI->isVolatile();
432       }
433       // Conservative answer
434       return true;
435     }
436 
437     bool isInvariantLoad() const {
438       if (auto *LI = dyn_cast<LoadInst>(Inst))
439         return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
440       return false;
441     }
442 
443     bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
444       return (getPointerOperand() == Inst.getPointerOperand() &&
445               getMatchingId() == Inst.getMatchingId());
446     }
447     bool isValid() const { return getPointerOperand() != nullptr; }
448 
449     // For regular (non-intrinsic) loads/stores, this is set to -1. For
450     // intrinsic loads/stores, the id is retrieved from the corresponding
451     // field in the MemIntrinsicInfo structure.  That field contains
452     // non-negative values only.
453     int getMatchingId() const {
454       if (IsTargetMemInst) return Info.MatchingId;
455       return -1;
456     }
457     Value *getPointerOperand() const {
458       if (IsTargetMemInst) return Info.PtrVal;
459       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
460         return LI->getPointerOperand();
461       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
462         return SI->getPointerOperand();
463       }
464       return nullptr;
465     }
466     bool mayReadFromMemory() const {
467       if (IsTargetMemInst) return Info.ReadMem;
468       return Inst->mayReadFromMemory();
469     }
470     bool mayWriteToMemory() const {
471       if (IsTargetMemInst) return Info.WriteMem;
472       return Inst->mayWriteToMemory();
473     }
474 
475   private:
476     bool IsTargetMemInst;
477     MemIntrinsicInfo Info;
478     Instruction *Inst;
479   };
480 
481   bool processNode(DomTreeNode *Node);
482 
483   Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
484     if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
485       return LI;
486     else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
487       return SI->getValueOperand();
488     assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
489     return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
490                                                  ExpectedType);
491   }
492 
493   bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
494                            Instruction *EarlierInst, Instruction *LaterInst);
495 
496   void removeMSSA(Instruction *Inst) {
497     if (!MSSA)
498       return;
499     // FIXME: Removing a store here can leave MemorySSA in an unoptimized state
500     // by creating MemoryPhis that have identical arguments and by creating
501     // MemoryUses whose defining access is not an actual clobber.
502     if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst))
503       MSSA->removeMemoryAccess(MA);
504   }
505 };
506 }
507 
508 /// Determine if the memory referenced by LaterInst is from the same heap version
509 /// as EarlierInst.
510 /// This is currently called in two scenarios:
511 ///
512 ///   load p
513 ///   ...
514 ///   load p
515 ///
516 /// and
517 ///
518 ///   x = load p
519 ///   ...
520 ///   store x, p
521 ///
522 /// in both cases we want to verify that there are no possible writes to the
523 /// memory referenced by p between the earlier and later instruction.
524 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
525                                    unsigned LaterGeneration,
526                                    Instruction *EarlierInst,
527                                    Instruction *LaterInst) {
528   // Check the simple memory generation tracking first.
529   if (EarlierGeneration == LaterGeneration)
530     return true;
531 
532   if (!MSSA)
533     return false;
534 
535   // Since we know LaterDef dominates LaterInst and EarlierInst dominates
536   // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
537   // EarlierInst and LaterInst and neither can any other write that potentially
538   // clobbers LaterInst.
539   // FIXME: This is currently fairly expensive since it does an AA check even
540   // for MemoryUses that were already optimized by MemorySSA construction.
541   // Re-visit once MemorySSA optimized use tracking change has been committed.
542   MemoryAccess *LaterDef =
543       MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
544   return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst));
545 }
546 
547 bool EarlyCSE::processNode(DomTreeNode *Node) {
548   bool Changed = false;
549   BasicBlock *BB = Node->getBlock();
550 
551   // If this block has a single predecessor, then the predecessor is the parent
552   // of the domtree node and all of the live out memory values are still current
553   // in this block.  If this block has multiple predecessors, then they could
554   // have invalidated the live-out memory values of our parent value.  For now,
555   // just be conservative and invalidate memory if this block has multiple
556   // predecessors.
557   if (!BB->getSinglePredecessor())
558     ++CurrentGeneration;
559 
560   // If this node has a single predecessor which ends in a conditional branch,
561   // we can infer the value of the branch condition given that we took this
562   // path.  We need the single predecessor to ensure there's not another path
563   // which reaches this block where the condition might hold a different
564   // value.  Since we're adding this to the scoped hash table (like any other
565   // def), it will have been popped if we encounter a future merge block.
566   if (BasicBlock *Pred = BB->getSinglePredecessor())
567     if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
568       if (BI->isConditional())
569         if (auto *CondInst = dyn_cast<Instruction>(BI->getCondition()))
570           if (SimpleValue::canHandle(CondInst)) {
571             assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
572             auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ?
573               ConstantInt::getTrue(BB->getContext()) :
574               ConstantInt::getFalse(BB->getContext());
575             AvailableValues.insert(CondInst, ConditionalConstant);
576             DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
577                   << CondInst->getName() << "' as " << *ConditionalConstant
578                   << " in " << BB->getName() << "\n");
579             // Replace all dominated uses with the known value.
580             if (unsigned Count =
581                     replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
582                                              BasicBlockEdge(Pred, BB))) {
583               Changed = true;
584               NumCSECVP = NumCSECVP + Count;
585             }
586           }
587 
588   /// LastStore - Keep track of the last non-volatile store that we saw... for
589   /// as long as there in no instruction that reads memory.  If we see a store
590   /// to the same location, we delete the dead store.  This zaps trivial dead
591   /// stores which can occur in bitfield code among other things.
592   Instruction *LastStore = nullptr;
593 
594   const DataLayout &DL = BB->getModule()->getDataLayout();
595 
596   // See if any instructions in the block can be eliminated.  If so, do it.  If
597   // not, add them to AvailableValues.
598   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
599     Instruction *Inst = &*I++;
600 
601     // Dead instructions should just be removed.
602     if (isInstructionTriviallyDead(Inst, &TLI)) {
603       DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
604       removeMSSA(Inst);
605       Inst->eraseFromParent();
606       Changed = true;
607       ++NumSimplify;
608       continue;
609     }
610 
611     // Skip assume intrinsics, they don't really have side effects (although
612     // they're marked as such to ensure preservation of control dependencies),
613     // and this pass will not disturb any of the assumption's control
614     // dependencies.
615     if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
616       DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
617       continue;
618     }
619 
620     // Skip invariant.start intrinsics since they only read memory, and we can
621     // forward values across it. Also, we dont need to consume the last store
622     // since the semantics of invariant.start allow us to perform DSE of the
623     // last store, if there was a store following invariant.start. Consider:
624     //
625     // store 30, i8* p
626     // invariant.start(p)
627     // store 40, i8* p
628     // We can DSE the store to 30, since the store 40 to invariant location p
629     // causes undefined behaviour.
630     if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>()))
631       continue;
632 
633     if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
634       if (auto *CondI =
635               dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
636         // The condition we're on guarding here is true for all dominated
637         // locations.
638         if (SimpleValue::canHandle(CondI))
639           AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
640       }
641 
642       // Guard intrinsics read all memory, but don't write any memory.
643       // Accordingly, don't update the generation but consume the last store (to
644       // avoid an incorrect DSE).
645       LastStore = nullptr;
646       continue;
647     }
648 
649     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
650     // its simpler value.
651     if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
652       DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
653       bool Killed = false;
654       if (!Inst->use_empty()) {
655         Inst->replaceAllUsesWith(V);
656         Changed = true;
657       }
658       if (isInstructionTriviallyDead(Inst, &TLI)) {
659         removeMSSA(Inst);
660         Inst->eraseFromParent();
661         Changed = true;
662         Killed = true;
663       }
664       if (Changed)
665         ++NumSimplify;
666       if (Killed)
667         continue;
668     }
669 
670     // If this is a simple instruction that we can value number, process it.
671     if (SimpleValue::canHandle(Inst)) {
672       // See if the instruction has an available value.  If so, use it.
673       if (Value *V = AvailableValues.lookup(Inst)) {
674         DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V << '\n');
675         if (auto *I = dyn_cast<Instruction>(V))
676           I->andIRFlags(Inst);
677         Inst->replaceAllUsesWith(V);
678         removeMSSA(Inst);
679         Inst->eraseFromParent();
680         Changed = true;
681         ++NumCSE;
682         continue;
683       }
684 
685       // Otherwise, just remember that this value is available.
686       AvailableValues.insert(Inst, Inst);
687       continue;
688     }
689 
690     ParseMemoryInst MemInst(Inst, TTI);
691     // If this is a non-volatile load, process it.
692     if (MemInst.isValid() && MemInst.isLoad()) {
693       // (conservatively) we can't peak past the ordering implied by this
694       // operation, but we can add this load to our set of available values
695       if (MemInst.isVolatile() || !MemInst.isUnordered()) {
696         LastStore = nullptr;
697         ++CurrentGeneration;
698       }
699 
700       // If we have an available version of this load, and if it is the right
701       // generation or the load is known to be from an invariant location,
702       // replace this instruction.
703       //
704       // If either the dominating load or the current load are invariant, then
705       // we can assume the current load loads the same value as the dominating
706       // load.
707       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
708       if (InVal.DefInst != nullptr &&
709           InVal.MatchingId == MemInst.getMatchingId() &&
710           // We don't yet handle removing loads with ordering of any kind.
711           !MemInst.isVolatile() && MemInst.isUnordered() &&
712           // We can't replace an atomic load with one which isn't also atomic.
713           InVal.IsAtomic >= MemInst.isAtomic() &&
714           (InVal.IsInvariant || MemInst.isInvariantLoad() ||
715            isSameMemGeneration(InVal.Generation, CurrentGeneration,
716                                InVal.DefInst, Inst))) {
717         Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
718         if (Op != nullptr) {
719           DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
720                        << "  to: " << *InVal.DefInst << '\n');
721           if (!Inst->use_empty())
722             Inst->replaceAllUsesWith(Op);
723           removeMSSA(Inst);
724           Inst->eraseFromParent();
725           Changed = true;
726           ++NumCSELoad;
727           continue;
728         }
729       }
730 
731       // Otherwise, remember that we have this instruction.
732       AvailableLoads.insert(
733           MemInst.getPointerOperand(),
734           LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
735                     MemInst.isAtomic(), MemInst.isInvariantLoad()));
736       LastStore = nullptr;
737       continue;
738     }
739 
740     // If this instruction may read from memory, forget LastStore.
741     // Load/store intrinsics will indicate both a read and a write to
742     // memory.  The target may override this (e.g. so that a store intrinsic
743     // does not read  from memory, and thus will be treated the same as a
744     // regular store for commoning purposes).
745     if (Inst->mayReadFromMemory() &&
746         !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
747       LastStore = nullptr;
748 
749     // If this is a read-only call, process it.
750     if (CallValue::canHandle(Inst)) {
751       // If we have an available version of this call, and if it is the right
752       // generation, replace this instruction.
753       std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
754       if (InVal.first != nullptr &&
755           isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
756                               Inst)) {
757         DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
758                      << "  to: " << *InVal.first << '\n');
759         if (!Inst->use_empty())
760           Inst->replaceAllUsesWith(InVal.first);
761         removeMSSA(Inst);
762         Inst->eraseFromParent();
763         Changed = true;
764         ++NumCSECall;
765         continue;
766       }
767 
768       // Otherwise, remember that we have this instruction.
769       AvailableCalls.insert(
770           Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration));
771       continue;
772     }
773 
774     // A release fence requires that all stores complete before it, but does
775     // not prevent the reordering of following loads 'before' the fence.  As a
776     // result, we don't need to consider it as writing to memory and don't need
777     // to advance the generation.  We do need to prevent DSE across the fence,
778     // but that's handled above.
779     if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
780       if (FI->getOrdering() == AtomicOrdering::Release) {
781         assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
782         continue;
783       }
784 
785     // write back DSE - If we write back the same value we just loaded from
786     // the same location and haven't passed any intervening writes or ordering
787     // operations, we can remove the write.  The primary benefit is in allowing
788     // the available load table to remain valid and value forward past where
789     // the store originally was.
790     if (MemInst.isValid() && MemInst.isStore()) {
791       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
792       if (InVal.DefInst &&
793           InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
794           InVal.MatchingId == MemInst.getMatchingId() &&
795           // We don't yet handle removing stores with ordering of any kind.
796           !MemInst.isVolatile() && MemInst.isUnordered() &&
797           isSameMemGeneration(InVal.Generation, CurrentGeneration,
798                               InVal.DefInst, Inst)) {
799         // It is okay to have a LastStore to a different pointer here if MemorySSA
800         // tells us that the load and store are from the same memory generation.
801         // In that case, LastStore should keep its present value since we're
802         // removing the current store.
803         assert((!LastStore ||
804                 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
805                     MemInst.getPointerOperand() ||
806                 MSSA) &&
807                "can't have an intervening store if not using MemorySSA!");
808         DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
809         removeMSSA(Inst);
810         Inst->eraseFromParent();
811         Changed = true;
812         ++NumDSE;
813         // We can avoid incrementing the generation count since we were able
814         // to eliminate this store.
815         continue;
816       }
817     }
818 
819     // Okay, this isn't something we can CSE at all.  Check to see if it is
820     // something that could modify memory.  If so, our available memory values
821     // cannot be used so bump the generation count.
822     if (Inst->mayWriteToMemory()) {
823       ++CurrentGeneration;
824 
825       if (MemInst.isValid() && MemInst.isStore()) {
826         // We do a trivial form of DSE if there are two stores to the same
827         // location with no intervening loads.  Delete the earlier store.
828         // At the moment, we don't remove ordered stores, but do remove
829         // unordered atomic stores.  There's no special requirement (for
830         // unordered atomics) about removing atomic stores only in favor of
831         // other atomic stores since we we're going to execute the non-atomic
832         // one anyway and the atomic one might never have become visible.
833         if (LastStore) {
834           ParseMemoryInst LastStoreMemInst(LastStore, TTI);
835           assert(LastStoreMemInst.isUnordered() &&
836                  !LastStoreMemInst.isVolatile() &&
837                  "Violated invariant");
838           if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
839             DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
840                          << "  due to: " << *Inst << '\n');
841             removeMSSA(LastStore);
842             LastStore->eraseFromParent();
843             Changed = true;
844             ++NumDSE;
845             LastStore = nullptr;
846           }
847           // fallthrough - we can exploit information about this store
848         }
849 
850         // Okay, we just invalidated anything we knew about loaded values.  Try
851         // to salvage *something* by remembering that the stored value is a live
852         // version of the pointer.  It is safe to forward from volatile stores
853         // to non-volatile loads, so we don't have to check for volatility of
854         // the store.
855         AvailableLoads.insert(
856             MemInst.getPointerOperand(),
857             LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
858                       MemInst.isAtomic(), /*IsInvariant=*/false));
859 
860         // Remember that this was the last unordered store we saw for DSE. We
861         // don't yet handle DSE on ordered or volatile stores since we don't
862         // have a good way to model the ordering requirement for following
863         // passes  once the store is removed.  We could insert a fence, but
864         // since fences are slightly stronger than stores in their ordering,
865         // it's not clear this is a profitable transform. Another option would
866         // be to merge the ordering with that of the post dominating store.
867         if (MemInst.isUnordered() && !MemInst.isVolatile())
868           LastStore = Inst;
869         else
870           LastStore = nullptr;
871       }
872     }
873   }
874 
875   return Changed;
876 }
877 
878 bool EarlyCSE::run() {
879   // Note, deque is being used here because there is significant performance
880   // gains over vector when the container becomes very large due to the
881   // specific access patterns. For more information see the mailing list
882   // discussion on this:
883   // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
884   std::deque<StackNode *> nodesToProcess;
885 
886   bool Changed = false;
887 
888   // Process the root node.
889   nodesToProcess.push_back(new StackNode(
890       AvailableValues, AvailableLoads, AvailableCalls, CurrentGeneration,
891       DT.getRootNode(), DT.getRootNode()->begin(), DT.getRootNode()->end()));
892 
893   // Save the current generation.
894   unsigned LiveOutGeneration = CurrentGeneration;
895 
896   // Process the stack.
897   while (!nodesToProcess.empty()) {
898     // Grab the first item off the stack. Set the current generation, remove
899     // the node from the stack, and process it.
900     StackNode *NodeToProcess = nodesToProcess.back();
901 
902     // Initialize class members.
903     CurrentGeneration = NodeToProcess->currentGeneration();
904 
905     // Check if the node needs to be processed.
906     if (!NodeToProcess->isProcessed()) {
907       // Process the node.
908       Changed |= processNode(NodeToProcess->node());
909       NodeToProcess->childGeneration(CurrentGeneration);
910       NodeToProcess->process();
911     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
912       // Push the next child onto the stack.
913       DomTreeNode *child = NodeToProcess->nextChild();
914       nodesToProcess.push_back(
915           new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
916                         NodeToProcess->childGeneration(), child, child->begin(),
917                         child->end()));
918     } else {
919       // It has been processed, and there are no more children to process,
920       // so delete it and pop it off the stack.
921       delete NodeToProcess;
922       nodesToProcess.pop_back();
923     }
924   } // while (!nodes...)
925 
926   // Reset the current generation.
927   CurrentGeneration = LiveOutGeneration;
928 
929   return Changed;
930 }
931 
932 PreservedAnalyses EarlyCSEPass::run(Function &F,
933                                     FunctionAnalysisManager &AM) {
934   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
935   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
936   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
937   auto &AC = AM.getResult<AssumptionAnalysis>(F);
938   auto *MSSA =
939       UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
940 
941   EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
942 
943   if (!CSE.run())
944     return PreservedAnalyses::all();
945 
946   // CSE preserves the dominator tree because it doesn't mutate the CFG.
947   // FIXME: Bundle this with other CFG-preservation.
948   PreservedAnalyses PA;
949   PA.preserve<DominatorTreeAnalysis>();
950   PA.preserve<GlobalsAA>();
951   if (UseMemorySSA)
952     PA.preserve<MemorySSAAnalysis>();
953   return PA;
954 }
955 
956 namespace {
957 /// \brief A simple and fast domtree-based CSE pass.
958 ///
959 /// This pass does a simple depth-first walk over the dominator tree,
960 /// eliminating trivially redundant instructions and using instsimplify to
961 /// canonicalize things as it goes. It is intended to be fast and catch obvious
962 /// cases so that instcombine and other passes are more effective. It is
963 /// expected that a later pass of GVN will catch the interesting/hard cases.
964 template<bool UseMemorySSA>
965 class EarlyCSELegacyCommonPass : public FunctionPass {
966 public:
967   static char ID;
968 
969   EarlyCSELegacyCommonPass() : FunctionPass(ID) {
970     if (UseMemorySSA)
971       initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
972     else
973       initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
974   }
975 
976   bool runOnFunction(Function &F) override {
977     if (skipFunction(F))
978       return false;
979 
980     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
981     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
982     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
983     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
984     auto *MSSA =
985         UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
986 
987     EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
988 
989     return CSE.run();
990   }
991 
992   void getAnalysisUsage(AnalysisUsage &AU) const override {
993     AU.addRequired<AssumptionCacheTracker>();
994     AU.addRequired<DominatorTreeWrapperPass>();
995     AU.addRequired<TargetLibraryInfoWrapperPass>();
996     AU.addRequired<TargetTransformInfoWrapperPass>();
997     if (UseMemorySSA) {
998       AU.addRequired<MemorySSAWrapperPass>();
999       AU.addPreserved<MemorySSAWrapperPass>();
1000     }
1001     AU.addPreserved<GlobalsAAWrapperPass>();
1002     AU.setPreservesCFG();
1003   }
1004 };
1005 }
1006 
1007 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1008 
1009 template<>
1010 char EarlyCSELegacyPass::ID = 0;
1011 
1012 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1013                       false)
1014 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1015 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1016 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1017 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1018 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1019 
1020 using EarlyCSEMemSSALegacyPass =
1021     EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1022 
1023 template<>
1024 char EarlyCSEMemSSALegacyPass::ID = 0;
1025 
1026 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1027   if (UseMemorySSA)
1028     return new EarlyCSEMemSSALegacyPass();
1029   else
1030     return new EarlyCSELegacyPass();
1031 }
1032 
1033 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1034                       "Early CSE w/ MemorySSA", false, false)
1035 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1036 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1037 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1038 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1039 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1040 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1041                     "Early CSE w/ MemorySSA", false, false)
1042