xref: /llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp (revision 383ccfb360fa1ee62061c1662c2d1c1747c80df5)
1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/ScopedHashTable.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/GlobalsModRef.h"
24 #include "llvm/Analysis/GuardUtils.h"
25 #include "llvm/Analysis/InstructionSimplify.h"
26 #include "llvm/Analysis/MemorySSA.h"
27 #include "llvm/Analysis/MemorySSAUpdater.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/TargetTransformInfo.h"
30 #include "llvm/Transforms/Utils/Local.h"
31 #include "llvm/Analysis/ValueTracking.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/LLVMContext.h"
43 #include "llvm/IR/PassManager.h"
44 #include "llvm/IR/PatternMatch.h"
45 #include "llvm/IR/Type.h"
46 #include "llvm/IR/Use.h"
47 #include "llvm/IR/Value.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Allocator.h"
50 #include "llvm/Support/AtomicOrdering.h"
51 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/DebugCounter.h"
54 #include "llvm/Support/RecyclingAllocator.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/Transforms/Utils/GuardUtils.h"
58 #include <cassert>
59 #include <deque>
60 #include <memory>
61 #include <utility>
62 
63 using namespace llvm;
64 using namespace llvm::PatternMatch;
65 
66 #define DEBUG_TYPE "early-cse"
67 
68 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
69 STATISTIC(NumCSE,      "Number of instructions CSE'd");
70 STATISTIC(NumCSECVP,   "Number of compare instructions CVP'd");
71 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
72 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
73 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
74 
75 DEBUG_COUNTER(CSECounter, "early-cse",
76               "Controls which instructions are removed");
77 
78 static cl::opt<unsigned> EarlyCSEMssaOptCap(
79     "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
80     cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
81              "for faster compile. Caps the MemorySSA clobbering calls."));
82 
83 //===----------------------------------------------------------------------===//
84 // SimpleValue
85 //===----------------------------------------------------------------------===//
86 
87 namespace {
88 
89 /// Struct representing the available values in the scoped hash table.
90 struct SimpleValue {
91   Instruction *Inst;
92 
93   SimpleValue(Instruction *I) : Inst(I) {
94     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
95   }
96 
97   bool isSentinel() const {
98     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
99            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
100   }
101 
102   static bool canHandle(Instruction *Inst) {
103     // This can only handle non-void readnone functions.
104     if (CallInst *CI = dyn_cast<CallInst>(Inst))
105       return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
106     return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
107            isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
108            isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
109            isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
110            isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
111   }
112 };
113 
114 } // end anonymous namespace
115 
116 namespace llvm {
117 
118 template <> struct DenseMapInfo<SimpleValue> {
119   static inline SimpleValue getEmptyKey() {
120     return DenseMapInfo<Instruction *>::getEmptyKey();
121   }
122 
123   static inline SimpleValue getTombstoneKey() {
124     return DenseMapInfo<Instruction *>::getTombstoneKey();
125   }
126 
127   static unsigned getHashValue(SimpleValue Val);
128   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
129 };
130 
131 } // end namespace llvm
132 
133 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
134   Instruction *Inst = Val.Inst;
135   // Hash in all of the operands as pointers.
136   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
137     Value *LHS = BinOp->getOperand(0);
138     Value *RHS = BinOp->getOperand(1);
139     if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
140       std::swap(LHS, RHS);
141 
142     return hash_combine(BinOp->getOpcode(), LHS, RHS);
143   }
144 
145   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
146     Value *LHS = CI->getOperand(0);
147     Value *RHS = CI->getOperand(1);
148     CmpInst::Predicate Pred = CI->getPredicate();
149     if (Inst->getOperand(0) > Inst->getOperand(1)) {
150       std::swap(LHS, RHS);
151       Pred = CI->getSwappedPredicate();
152     }
153     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
154   }
155 
156   // Hash min/max/abs (cmp + select) to allow for commuted operands.
157   // Min/max may also have non-canonical compare predicate (eg, the compare for
158   // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
159   // compare.
160   Value *A, *B;
161   SelectPatternFlavor SPF = matchSelectPattern(Inst, A, B).Flavor;
162   // TODO: We should also detect FP min/max.
163   if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
164       SPF == SPF_UMIN || SPF == SPF_UMAX) {
165     if (A > B)
166       std::swap(A, B);
167     return hash_combine(Inst->getOpcode(), SPF, A, B);
168   }
169   if (SPF == SPF_ABS || SPF == SPF_NABS) {
170     // ABS/NABS always puts the input in A and its negation in B.
171     return hash_combine(Inst->getOpcode(), SPF, A, B);
172   }
173 
174   if (CastInst *CI = dyn_cast<CastInst>(Inst))
175     return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
176 
177   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
178     return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
179                         hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
180 
181   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
182     return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
183                         IVI->getOperand(1),
184                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
185 
186   assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
187           isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
188           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
189           isa<ShuffleVectorInst>(Inst)) &&
190          "Invalid/unknown instruction");
191 
192   // Mix in the opcode.
193   return hash_combine(
194       Inst->getOpcode(),
195       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
196 }
197 
198 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
199   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
200 
201   if (LHS.isSentinel() || RHS.isSentinel())
202     return LHSI == RHSI;
203 
204   if (LHSI->getOpcode() != RHSI->getOpcode())
205     return false;
206   if (LHSI->isIdenticalToWhenDefined(RHSI))
207     return true;
208 
209   // If we're not strictly identical, we still might be a commutable instruction
210   if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
211     if (!LHSBinOp->isCommutative())
212       return false;
213 
214     assert(isa<BinaryOperator>(RHSI) &&
215            "same opcode, but different instruction type?");
216     BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
217 
218     // Commuted equality
219     return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
220            LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
221   }
222   if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
223     assert(isa<CmpInst>(RHSI) &&
224            "same opcode, but different instruction type?");
225     CmpInst *RHSCmp = cast<CmpInst>(RHSI);
226     // Commuted equality
227     return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
228            LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
229            LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
230   }
231 
232   // Min/max/abs can occur with commuted operands, non-canonical predicates,
233   // and/or non-canonical operands.
234   Value *LHSA, *LHSB;
235   SelectPatternFlavor LSPF = matchSelectPattern(LHSI, LHSA, LHSB).Flavor;
236   // TODO: We should also detect FP min/max.
237   if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
238       LSPF == SPF_UMIN || LSPF == SPF_UMAX ||
239       LSPF == SPF_ABS || LSPF == SPF_NABS) {
240     Value *RHSA, *RHSB;
241     SelectPatternFlavor RSPF = matchSelectPattern(RHSI, RHSA, RHSB).Flavor;
242     if (LSPF == RSPF) {
243       // Abs results are placed in a defined order by matchSelectPattern.
244       if (LSPF == SPF_ABS || LSPF == SPF_NABS)
245         return LHSA == RHSA && LHSB == RHSB;
246       return ((LHSA == RHSA && LHSB == RHSB) ||
247               (LHSA == RHSB && LHSB == RHSA));
248     }
249   }
250 
251   return false;
252 }
253 
254 //===----------------------------------------------------------------------===//
255 // CallValue
256 //===----------------------------------------------------------------------===//
257 
258 namespace {
259 
260 /// Struct representing the available call values in the scoped hash
261 /// table.
262 struct CallValue {
263   Instruction *Inst;
264 
265   CallValue(Instruction *I) : Inst(I) {
266     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
267   }
268 
269   bool isSentinel() const {
270     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
271            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
272   }
273 
274   static bool canHandle(Instruction *Inst) {
275     // Don't value number anything that returns void.
276     if (Inst->getType()->isVoidTy())
277       return false;
278 
279     CallInst *CI = dyn_cast<CallInst>(Inst);
280     if (!CI || !CI->onlyReadsMemory())
281       return false;
282     return true;
283   }
284 };
285 
286 } // end anonymous namespace
287 
288 namespace llvm {
289 
290 template <> struct DenseMapInfo<CallValue> {
291   static inline CallValue getEmptyKey() {
292     return DenseMapInfo<Instruction *>::getEmptyKey();
293   }
294 
295   static inline CallValue getTombstoneKey() {
296     return DenseMapInfo<Instruction *>::getTombstoneKey();
297   }
298 
299   static unsigned getHashValue(CallValue Val);
300   static bool isEqual(CallValue LHS, CallValue RHS);
301 };
302 
303 } // end namespace llvm
304 
305 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
306   Instruction *Inst = Val.Inst;
307   // Hash all of the operands as pointers and mix in the opcode.
308   return hash_combine(
309       Inst->getOpcode(),
310       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
311 }
312 
313 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
314   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
315   if (LHS.isSentinel() || RHS.isSentinel())
316     return LHSI == RHSI;
317   return LHSI->isIdenticalTo(RHSI);
318 }
319 
320 //===----------------------------------------------------------------------===//
321 // EarlyCSE implementation
322 //===----------------------------------------------------------------------===//
323 
324 namespace {
325 
326 /// A simple and fast domtree-based CSE pass.
327 ///
328 /// This pass does a simple depth-first walk over the dominator tree,
329 /// eliminating trivially redundant instructions and using instsimplify to
330 /// canonicalize things as it goes. It is intended to be fast and catch obvious
331 /// cases so that instcombine and other passes are more effective. It is
332 /// expected that a later pass of GVN will catch the interesting/hard cases.
333 class EarlyCSE {
334 public:
335   const TargetLibraryInfo &TLI;
336   const TargetTransformInfo &TTI;
337   DominatorTree &DT;
338   AssumptionCache &AC;
339   const SimplifyQuery SQ;
340   MemorySSA *MSSA;
341   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
342 
343   using AllocatorTy =
344       RecyclingAllocator<BumpPtrAllocator,
345                          ScopedHashTableVal<SimpleValue, Value *>>;
346   using ScopedHTType =
347       ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
348                       AllocatorTy>;
349 
350   /// A scoped hash table of the current values of all of our simple
351   /// scalar expressions.
352   ///
353   /// As we walk down the domtree, we look to see if instructions are in this:
354   /// if so, we replace them with what we find, otherwise we insert them so
355   /// that dominated values can succeed in their lookup.
356   ScopedHTType AvailableValues;
357 
358   /// A scoped hash table of the current values of previously encountered
359   /// memory locations.
360   ///
361   /// This allows us to get efficient access to dominating loads or stores when
362   /// we have a fully redundant load.  In addition to the most recent load, we
363   /// keep track of a generation count of the read, which is compared against
364   /// the current generation count.  The current generation count is incremented
365   /// after every possibly writing memory operation, which ensures that we only
366   /// CSE loads with other loads that have no intervening store.  Ordering
367   /// events (such as fences or atomic instructions) increment the generation
368   /// count as well; essentially, we model these as writes to all possible
369   /// locations.  Note that atomic and/or volatile loads and stores can be
370   /// present the table; it is the responsibility of the consumer to inspect
371   /// the atomicity/volatility if needed.
372   struct LoadValue {
373     Instruction *DefInst = nullptr;
374     unsigned Generation = 0;
375     int MatchingId = -1;
376     bool IsAtomic = false;
377 
378     LoadValue() = default;
379     LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
380               bool IsAtomic)
381         : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
382           IsAtomic(IsAtomic) {}
383   };
384 
385   using LoadMapAllocator =
386       RecyclingAllocator<BumpPtrAllocator,
387                          ScopedHashTableVal<Value *, LoadValue>>;
388   using LoadHTType =
389       ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
390                       LoadMapAllocator>;
391 
392   LoadHTType AvailableLoads;
393 
394   // A scoped hash table mapping memory locations (represented as typed
395   // addresses) to generation numbers at which that memory location became
396   // (henceforth indefinitely) invariant.
397   using InvariantMapAllocator =
398       RecyclingAllocator<BumpPtrAllocator,
399                          ScopedHashTableVal<MemoryLocation, unsigned>>;
400   using InvariantHTType =
401       ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,
402                       InvariantMapAllocator>;
403   InvariantHTType AvailableInvariants;
404 
405   /// A scoped hash table of the current values of read-only call
406   /// values.
407   ///
408   /// It uses the same generation count as loads.
409   using CallHTType =
410       ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
411   CallHTType AvailableCalls;
412 
413   /// This is the current generation of the memory value.
414   unsigned CurrentGeneration = 0;
415 
416   /// Set up the EarlyCSE runner for a particular function.
417   EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
418            const TargetTransformInfo &TTI, DominatorTree &DT,
419            AssumptionCache &AC, MemorySSA *MSSA)
420       : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
421         MSSAUpdater(llvm::make_unique<MemorySSAUpdater>(MSSA)) {}
422 
423   bool run();
424 
425 private:
426   unsigned ClobberCounter = 0;
427   // Almost a POD, but needs to call the constructors for the scoped hash
428   // tables so that a new scope gets pushed on. These are RAII so that the
429   // scope gets popped when the NodeScope is destroyed.
430   class NodeScope {
431   public:
432     NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
433               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
434       : Scope(AvailableValues), LoadScope(AvailableLoads),
435         InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
436     NodeScope(const NodeScope &) = delete;
437     NodeScope &operator=(const NodeScope &) = delete;
438 
439   private:
440     ScopedHTType::ScopeTy Scope;
441     LoadHTType::ScopeTy LoadScope;
442     InvariantHTType::ScopeTy InvariantScope;
443     CallHTType::ScopeTy CallScope;
444   };
445 
446   // Contains all the needed information to create a stack for doing a depth
447   // first traversal of the tree. This includes scopes for values, loads, and
448   // calls as well as the generation. There is a child iterator so that the
449   // children do not need to be store separately.
450   class StackNode {
451   public:
452     StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
453               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
454               unsigned cg, DomTreeNode *n, DomTreeNode::iterator child,
455               DomTreeNode::iterator end)
456         : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
457           EndIter(end),
458           Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
459                  AvailableCalls)
460           {}
461     StackNode(const StackNode &) = delete;
462     StackNode &operator=(const StackNode &) = delete;
463 
464     // Accessors.
465     unsigned currentGeneration() { return CurrentGeneration; }
466     unsigned childGeneration() { return ChildGeneration; }
467     void childGeneration(unsigned generation) { ChildGeneration = generation; }
468     DomTreeNode *node() { return Node; }
469     DomTreeNode::iterator childIter() { return ChildIter; }
470 
471     DomTreeNode *nextChild() {
472       DomTreeNode *child = *ChildIter;
473       ++ChildIter;
474       return child;
475     }
476 
477     DomTreeNode::iterator end() { return EndIter; }
478     bool isProcessed() { return Processed; }
479     void process() { Processed = true; }
480 
481   private:
482     unsigned CurrentGeneration;
483     unsigned ChildGeneration;
484     DomTreeNode *Node;
485     DomTreeNode::iterator ChildIter;
486     DomTreeNode::iterator EndIter;
487     NodeScope Scopes;
488     bool Processed = false;
489   };
490 
491   /// Wrapper class to handle memory instructions, including loads,
492   /// stores and intrinsic loads and stores defined by the target.
493   class ParseMemoryInst {
494   public:
495     ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
496       : Inst(Inst) {
497       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
498         if (TTI.getTgtMemIntrinsic(II, Info))
499           IsTargetMemInst = true;
500     }
501 
502     bool isLoad() const {
503       if (IsTargetMemInst) return Info.ReadMem;
504       return isa<LoadInst>(Inst);
505     }
506 
507     bool isStore() const {
508       if (IsTargetMemInst) return Info.WriteMem;
509       return isa<StoreInst>(Inst);
510     }
511 
512     bool isAtomic() const {
513       if (IsTargetMemInst)
514         return Info.Ordering != AtomicOrdering::NotAtomic;
515       return Inst->isAtomic();
516     }
517 
518     bool isUnordered() const {
519       if (IsTargetMemInst)
520         return Info.isUnordered();
521 
522       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
523         return LI->isUnordered();
524       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
525         return SI->isUnordered();
526       }
527       // Conservative answer
528       return !Inst->isAtomic();
529     }
530 
531     bool isVolatile() const {
532       if (IsTargetMemInst)
533         return Info.IsVolatile;
534 
535       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
536         return LI->isVolatile();
537       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
538         return SI->isVolatile();
539       }
540       // Conservative answer
541       return true;
542     }
543 
544     bool isInvariantLoad() const {
545       if (auto *LI = dyn_cast<LoadInst>(Inst))
546         return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
547       return false;
548     }
549 
550     bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
551       return (getPointerOperand() == Inst.getPointerOperand() &&
552               getMatchingId() == Inst.getMatchingId());
553     }
554 
555     bool isValid() const { return getPointerOperand() != nullptr; }
556 
557     // For regular (non-intrinsic) loads/stores, this is set to -1. For
558     // intrinsic loads/stores, the id is retrieved from the corresponding
559     // field in the MemIntrinsicInfo structure.  That field contains
560     // non-negative values only.
561     int getMatchingId() const {
562       if (IsTargetMemInst) return Info.MatchingId;
563       return -1;
564     }
565 
566     Value *getPointerOperand() const {
567       if (IsTargetMemInst) return Info.PtrVal;
568       return getLoadStorePointerOperand(Inst);
569     }
570 
571     bool mayReadFromMemory() const {
572       if (IsTargetMemInst) return Info.ReadMem;
573       return Inst->mayReadFromMemory();
574     }
575 
576     bool mayWriteToMemory() const {
577       if (IsTargetMemInst) return Info.WriteMem;
578       return Inst->mayWriteToMemory();
579     }
580 
581   private:
582     bool IsTargetMemInst = false;
583     MemIntrinsicInfo Info;
584     Instruction *Inst;
585   };
586 
587   bool processNode(DomTreeNode *Node);
588 
589   bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
590                              const BasicBlock *BB, const BasicBlock *Pred);
591 
592   Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
593     if (auto *LI = dyn_cast<LoadInst>(Inst))
594       return LI;
595     if (auto *SI = dyn_cast<StoreInst>(Inst))
596       return SI->getValueOperand();
597     assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
598     return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
599                                                  ExpectedType);
600   }
601 
602   /// Return true if the instruction is known to only operate on memory
603   /// provably invariant in the given "generation".
604   bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
605 
606   bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
607                            Instruction *EarlierInst, Instruction *LaterInst);
608 
609   void removeMSSA(Instruction *Inst) {
610     if (!MSSA)
611       return;
612     if (VerifyMemorySSA)
613       MSSA->verifyMemorySSA();
614     // Removing a store here can leave MemorySSA in an unoptimized state by
615     // creating MemoryPhis that have identical arguments and by creating
616     // MemoryUses whose defining access is not an actual clobber. The phi case
617     // is handled by MemorySSA when passing OptimizePhis = true to
618     // removeMemoryAccess.  The non-optimized MemoryUse case is lazily updated
619     // by MemorySSA's getClobberingMemoryAccess.
620     MSSAUpdater->removeMemoryAccess(Inst, true);
621   }
622 };
623 
624 } // end anonymous namespace
625 
626 /// Determine if the memory referenced by LaterInst is from the same heap
627 /// version as EarlierInst.
628 /// This is currently called in two scenarios:
629 ///
630 ///   load p
631 ///   ...
632 ///   load p
633 ///
634 /// and
635 ///
636 ///   x = load p
637 ///   ...
638 ///   store x, p
639 ///
640 /// in both cases we want to verify that there are no possible writes to the
641 /// memory referenced by p between the earlier and later instruction.
642 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
643                                    unsigned LaterGeneration,
644                                    Instruction *EarlierInst,
645                                    Instruction *LaterInst) {
646   // Check the simple memory generation tracking first.
647   if (EarlierGeneration == LaterGeneration)
648     return true;
649 
650   if (!MSSA)
651     return false;
652 
653   // If MemorySSA has determined that one of EarlierInst or LaterInst does not
654   // read/write memory, then we can safely return true here.
655   // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
656   // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
657   // by also checking the MemorySSA MemoryAccess on the instruction.  Initial
658   // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
659   // with the default optimization pipeline.
660   auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
661   if (!EarlierMA)
662     return true;
663   auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
664   if (!LaterMA)
665     return true;
666 
667   // Since we know LaterDef dominates LaterInst and EarlierInst dominates
668   // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
669   // EarlierInst and LaterInst and neither can any other write that potentially
670   // clobbers LaterInst.
671   MemoryAccess *LaterDef;
672   if (ClobberCounter < EarlyCSEMssaOptCap) {
673     LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
674     ClobberCounter++;
675   } else
676     LaterDef = LaterMA->getDefiningAccess();
677 
678   return MSSA->dominates(LaterDef, EarlierMA);
679 }
680 
681 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
682   // A location loaded from with an invariant_load is assumed to *never* change
683   // within the visible scope of the compilation.
684   if (auto *LI = dyn_cast<LoadInst>(I))
685     if (LI->getMetadata(LLVMContext::MD_invariant_load))
686       return true;
687 
688   auto MemLocOpt = MemoryLocation::getOrNone(I);
689   if (!MemLocOpt)
690     // "target" intrinsic forms of loads aren't currently known to
691     // MemoryLocation::get.  TODO
692     return false;
693   MemoryLocation MemLoc = *MemLocOpt;
694   if (!AvailableInvariants.count(MemLoc))
695     return false;
696 
697   // Is the generation at which this became invariant older than the
698   // current one?
699   return AvailableInvariants.lookup(MemLoc) <= GenAt;
700 }
701 
702 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
703                                      const BranchInst *BI, const BasicBlock *BB,
704                                      const BasicBlock *Pred) {
705   assert(BI->isConditional() && "Should be a conditional branch!");
706   assert(BI->getCondition() == CondInst && "Wrong condition?");
707   assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
708   auto *TorF = (BI->getSuccessor(0) == BB)
709                    ? ConstantInt::getTrue(BB->getContext())
710                    : ConstantInt::getFalse(BB->getContext());
711   auto MatchBinOp = [](Instruction *I, unsigned Opcode) {
712     if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(I))
713       return BOp->getOpcode() == Opcode;
714     return false;
715   };
716   // If the condition is AND operation, we can propagate its operands into the
717   // true branch. If it is OR operation, we can propagate them into the false
718   // branch.
719   unsigned PropagateOpcode =
720       (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
721 
722   bool MadeChanges = false;
723   SmallVector<Instruction *, 4> WorkList;
724   SmallPtrSet<Instruction *, 4> Visited;
725   WorkList.push_back(CondInst);
726   while (!WorkList.empty()) {
727     Instruction *Curr = WorkList.pop_back_val();
728 
729     AvailableValues.insert(Curr, TorF);
730     LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
731                       << Curr->getName() << "' as " << *TorF << " in "
732                       << BB->getName() << "\n");
733     if (!DebugCounter::shouldExecute(CSECounter)) {
734       LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
735     } else {
736       // Replace all dominated uses with the known value.
737       if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
738                                                     BasicBlockEdge(Pred, BB))) {
739         NumCSECVP += Count;
740         MadeChanges = true;
741       }
742     }
743 
744     if (MatchBinOp(Curr, PropagateOpcode))
745       for (auto &Op : cast<BinaryOperator>(Curr)->operands())
746         if (Instruction *OPI = dyn_cast<Instruction>(Op))
747           if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
748             WorkList.push_back(OPI);
749   }
750 
751   return MadeChanges;
752 }
753 
754 bool EarlyCSE::processNode(DomTreeNode *Node) {
755   bool Changed = false;
756   BasicBlock *BB = Node->getBlock();
757 
758   // If this block has a single predecessor, then the predecessor is the parent
759   // of the domtree node and all of the live out memory values are still current
760   // in this block.  If this block has multiple predecessors, then they could
761   // have invalidated the live-out memory values of our parent value.  For now,
762   // just be conservative and invalidate memory if this block has multiple
763   // predecessors.
764   if (!BB->getSinglePredecessor())
765     ++CurrentGeneration;
766 
767   // If this node has a single predecessor which ends in a conditional branch,
768   // we can infer the value of the branch condition given that we took this
769   // path.  We need the single predecessor to ensure there's not another path
770   // which reaches this block where the condition might hold a different
771   // value.  Since we're adding this to the scoped hash table (like any other
772   // def), it will have been popped if we encounter a future merge block.
773   if (BasicBlock *Pred = BB->getSinglePredecessor()) {
774     auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
775     if (BI && BI->isConditional()) {
776       auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
777       if (CondInst && SimpleValue::canHandle(CondInst))
778         Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
779     }
780   }
781 
782   /// LastStore - Keep track of the last non-volatile store that we saw... for
783   /// as long as there in no instruction that reads memory.  If we see a store
784   /// to the same location, we delete the dead store.  This zaps trivial dead
785   /// stores which can occur in bitfield code among other things.
786   Instruction *LastStore = nullptr;
787 
788   // See if any instructions in the block can be eliminated.  If so, do it.  If
789   // not, add them to AvailableValues.
790   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
791     Instruction *Inst = &*I++;
792 
793     // Dead instructions should just be removed.
794     if (isInstructionTriviallyDead(Inst, &TLI)) {
795       LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
796       if (!DebugCounter::shouldExecute(CSECounter)) {
797         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
798         continue;
799       }
800       if (!salvageDebugInfo(*Inst))
801         replaceDbgUsesWithUndef(Inst);
802       removeMSSA(Inst);
803       Inst->eraseFromParent();
804       Changed = true;
805       ++NumSimplify;
806       continue;
807     }
808 
809     // Skip assume intrinsics, they don't really have side effects (although
810     // they're marked as such to ensure preservation of control dependencies),
811     // and this pass will not bother with its removal. However, we should mark
812     // its condition as true for all dominated blocks.
813     if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
814       auto *CondI =
815           dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0));
816       if (CondI && SimpleValue::canHandle(CondI)) {
817         LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst
818                           << '\n');
819         AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
820       } else
821         LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
822       continue;
823     }
824 
825     // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
826     if (match(Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
827       LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << *Inst << '\n');
828       continue;
829     }
830 
831     // We can skip all invariant.start intrinsics since they only read memory,
832     // and we can forward values across it. For invariant starts without
833     // invariant ends, we can use the fact that the invariantness never ends to
834     // start a scope in the current generaton which is true for all future
835     // generations.  Also, we dont need to consume the last store since the
836     // semantics of invariant.start allow us to perform   DSE of the last
837     // store, if there was a store following invariant.start. Consider:
838     //
839     // store 30, i8* p
840     // invariant.start(p)
841     // store 40, i8* p
842     // We can DSE the store to 30, since the store 40 to invariant location p
843     // causes undefined behaviour.
844     if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
845       // If there are any uses, the scope might end.
846       if (!Inst->use_empty())
847         continue;
848       auto *CI = cast<CallInst>(Inst);
849       MemoryLocation MemLoc = MemoryLocation::getForArgument(CI, 1, TLI);
850       // Don't start a scope if we already have a better one pushed
851       if (!AvailableInvariants.count(MemLoc))
852         AvailableInvariants.insert(MemLoc, CurrentGeneration);
853       continue;
854     }
855 
856     if (isGuard(Inst)) {
857       if (auto *CondI =
858               dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
859         if (SimpleValue::canHandle(CondI)) {
860           // Do we already know the actual value of this condition?
861           if (auto *KnownCond = AvailableValues.lookup(CondI)) {
862             // Is the condition known to be true?
863             if (isa<ConstantInt>(KnownCond) &&
864                 cast<ConstantInt>(KnownCond)->isOne()) {
865               LLVM_DEBUG(dbgs()
866                          << "EarlyCSE removing guard: " << *Inst << '\n');
867               removeMSSA(Inst);
868               Inst->eraseFromParent();
869               Changed = true;
870               continue;
871             } else
872               // Use the known value if it wasn't true.
873               cast<CallInst>(Inst)->setArgOperand(0, KnownCond);
874           }
875           // The condition we're on guarding here is true for all dominated
876           // locations.
877           AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
878         }
879       }
880 
881       // Guard intrinsics read all memory, but don't write any memory.
882       // Accordingly, don't update the generation but consume the last store (to
883       // avoid an incorrect DSE).
884       LastStore = nullptr;
885       continue;
886     }
887 
888     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
889     // its simpler value.
890     if (Value *V = SimplifyInstruction(Inst, SQ)) {
891       LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V
892                         << '\n');
893       if (!DebugCounter::shouldExecute(CSECounter)) {
894         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
895       } else {
896         bool Killed = false;
897         if (!Inst->use_empty()) {
898           Inst->replaceAllUsesWith(V);
899           Changed = true;
900         }
901         if (isInstructionTriviallyDead(Inst, &TLI)) {
902           removeMSSA(Inst);
903           Inst->eraseFromParent();
904           Changed = true;
905           Killed = true;
906         }
907         if (Changed)
908           ++NumSimplify;
909         if (Killed)
910           continue;
911       }
912     }
913 
914     // If this is a simple instruction that we can value number, process it.
915     if (SimpleValue::canHandle(Inst)) {
916       // See if the instruction has an available value.  If so, use it.
917       if (Value *V = AvailableValues.lookup(Inst)) {
918         LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V
919                           << '\n');
920         if (!DebugCounter::shouldExecute(CSECounter)) {
921           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
922           continue;
923         }
924         if (auto *I = dyn_cast<Instruction>(V))
925           I->andIRFlags(Inst);
926         Inst->replaceAllUsesWith(V);
927         removeMSSA(Inst);
928         Inst->eraseFromParent();
929         Changed = true;
930         ++NumCSE;
931         continue;
932       }
933 
934       // Otherwise, just remember that this value is available.
935       AvailableValues.insert(Inst, Inst);
936       continue;
937     }
938 
939     ParseMemoryInst MemInst(Inst, TTI);
940     // If this is a non-volatile load, process it.
941     if (MemInst.isValid() && MemInst.isLoad()) {
942       // (conservatively) we can't peak past the ordering implied by this
943       // operation, but we can add this load to our set of available values
944       if (MemInst.isVolatile() || !MemInst.isUnordered()) {
945         LastStore = nullptr;
946         ++CurrentGeneration;
947       }
948 
949       if (MemInst.isInvariantLoad()) {
950         // If we pass an invariant load, we know that memory location is
951         // indefinitely constant from the moment of first dereferenceability.
952         // We conservatively treat the invariant_load as that moment.  If we
953         // pass a invariant load after already establishing a scope, don't
954         // restart it since we want to preserve the earliest point seen.
955         auto MemLoc = MemoryLocation::get(Inst);
956         if (!AvailableInvariants.count(MemLoc))
957           AvailableInvariants.insert(MemLoc, CurrentGeneration);
958       }
959 
960       // If we have an available version of this load, and if it is the right
961       // generation or the load is known to be from an invariant location,
962       // replace this instruction.
963       //
964       // If either the dominating load or the current load are invariant, then
965       // we can assume the current load loads the same value as the dominating
966       // load.
967       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
968       if (InVal.DefInst != nullptr &&
969           InVal.MatchingId == MemInst.getMatchingId() &&
970           // We don't yet handle removing loads with ordering of any kind.
971           !MemInst.isVolatile() && MemInst.isUnordered() &&
972           // We can't replace an atomic load with one which isn't also atomic.
973           InVal.IsAtomic >= MemInst.isAtomic() &&
974           (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
975            isSameMemGeneration(InVal.Generation, CurrentGeneration,
976                                InVal.DefInst, Inst))) {
977         Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
978         if (Op != nullptr) {
979           LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
980                             << "  to: " << *InVal.DefInst << '\n');
981           if (!DebugCounter::shouldExecute(CSECounter)) {
982             LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
983             continue;
984           }
985           if (!Inst->use_empty())
986             Inst->replaceAllUsesWith(Op);
987           removeMSSA(Inst);
988           Inst->eraseFromParent();
989           Changed = true;
990           ++NumCSELoad;
991           continue;
992         }
993       }
994 
995       // Otherwise, remember that we have this instruction.
996       AvailableLoads.insert(
997           MemInst.getPointerOperand(),
998           LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
999                     MemInst.isAtomic()));
1000       LastStore = nullptr;
1001       continue;
1002     }
1003 
1004     // If this instruction may read from memory or throw (and potentially read
1005     // from memory in the exception handler), forget LastStore.  Load/store
1006     // intrinsics will indicate both a read and a write to memory.  The target
1007     // may override this (e.g. so that a store intrinsic does not read from
1008     // memory, and thus will be treated the same as a regular store for
1009     // commoning purposes).
1010     if ((Inst->mayReadFromMemory() || Inst->mayThrow()) &&
1011         !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1012       LastStore = nullptr;
1013 
1014     // If this is a read-only call, process it.
1015     if (CallValue::canHandle(Inst)) {
1016       // If we have an available version of this call, and if it is the right
1017       // generation, replace this instruction.
1018       std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
1019       if (InVal.first != nullptr &&
1020           isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1021                               Inst)) {
1022         LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
1023                           << "  to: " << *InVal.first << '\n');
1024         if (!DebugCounter::shouldExecute(CSECounter)) {
1025           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1026           continue;
1027         }
1028         if (!Inst->use_empty())
1029           Inst->replaceAllUsesWith(InVal.first);
1030         removeMSSA(Inst);
1031         Inst->eraseFromParent();
1032         Changed = true;
1033         ++NumCSECall;
1034         continue;
1035       }
1036 
1037       // Otherwise, remember that we have this instruction.
1038       AvailableCalls.insert(
1039           Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration));
1040       continue;
1041     }
1042 
1043     // A release fence requires that all stores complete before it, but does
1044     // not prevent the reordering of following loads 'before' the fence.  As a
1045     // result, we don't need to consider it as writing to memory and don't need
1046     // to advance the generation.  We do need to prevent DSE across the fence,
1047     // but that's handled above.
1048     if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
1049       if (FI->getOrdering() == AtomicOrdering::Release) {
1050         assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
1051         continue;
1052       }
1053 
1054     // write back DSE - If we write back the same value we just loaded from
1055     // the same location and haven't passed any intervening writes or ordering
1056     // operations, we can remove the write.  The primary benefit is in allowing
1057     // the available load table to remain valid and value forward past where
1058     // the store originally was.
1059     if (MemInst.isValid() && MemInst.isStore()) {
1060       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1061       if (InVal.DefInst &&
1062           InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
1063           InVal.MatchingId == MemInst.getMatchingId() &&
1064           // We don't yet handle removing stores with ordering of any kind.
1065           !MemInst.isVolatile() && MemInst.isUnordered() &&
1066           (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
1067            isSameMemGeneration(InVal.Generation, CurrentGeneration,
1068                                InVal.DefInst, Inst))) {
1069         // It is okay to have a LastStore to a different pointer here if MemorySSA
1070         // tells us that the load and store are from the same memory generation.
1071         // In that case, LastStore should keep its present value since we're
1072         // removing the current store.
1073         assert((!LastStore ||
1074                 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1075                     MemInst.getPointerOperand() ||
1076                 MSSA) &&
1077                "can't have an intervening store if not using MemorySSA!");
1078         LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
1079         if (!DebugCounter::shouldExecute(CSECounter)) {
1080           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1081           continue;
1082         }
1083         removeMSSA(Inst);
1084         Inst->eraseFromParent();
1085         Changed = true;
1086         ++NumDSE;
1087         // We can avoid incrementing the generation count since we were able
1088         // to eliminate this store.
1089         continue;
1090       }
1091     }
1092 
1093     // Okay, this isn't something we can CSE at all.  Check to see if it is
1094     // something that could modify memory.  If so, our available memory values
1095     // cannot be used so bump the generation count.
1096     if (Inst->mayWriteToMemory()) {
1097       ++CurrentGeneration;
1098 
1099       if (MemInst.isValid() && MemInst.isStore()) {
1100         // We do a trivial form of DSE if there are two stores to the same
1101         // location with no intervening loads.  Delete the earlier store.
1102         // At the moment, we don't remove ordered stores, but do remove
1103         // unordered atomic stores.  There's no special requirement (for
1104         // unordered atomics) about removing atomic stores only in favor of
1105         // other atomic stores since we we're going to execute the non-atomic
1106         // one anyway and the atomic one might never have become visible.
1107         if (LastStore) {
1108           ParseMemoryInst LastStoreMemInst(LastStore, TTI);
1109           assert(LastStoreMemInst.isUnordered() &&
1110                  !LastStoreMemInst.isVolatile() &&
1111                  "Violated invariant");
1112           if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
1113             LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1114                               << "  due to: " << *Inst << '\n');
1115             if (!DebugCounter::shouldExecute(CSECounter)) {
1116               LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1117             } else {
1118               removeMSSA(LastStore);
1119               LastStore->eraseFromParent();
1120               Changed = true;
1121               ++NumDSE;
1122               LastStore = nullptr;
1123             }
1124           }
1125           // fallthrough - we can exploit information about this store
1126         }
1127 
1128         // Okay, we just invalidated anything we knew about loaded values.  Try
1129         // to salvage *something* by remembering that the stored value is a live
1130         // version of the pointer.  It is safe to forward from volatile stores
1131         // to non-volatile loads, so we don't have to check for volatility of
1132         // the store.
1133         AvailableLoads.insert(
1134             MemInst.getPointerOperand(),
1135             LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
1136                       MemInst.isAtomic()));
1137 
1138         // Remember that this was the last unordered store we saw for DSE. We
1139         // don't yet handle DSE on ordered or volatile stores since we don't
1140         // have a good way to model the ordering requirement for following
1141         // passes  once the store is removed.  We could insert a fence, but
1142         // since fences are slightly stronger than stores in their ordering,
1143         // it's not clear this is a profitable transform. Another option would
1144         // be to merge the ordering with that of the post dominating store.
1145         if (MemInst.isUnordered() && !MemInst.isVolatile())
1146           LastStore = Inst;
1147         else
1148           LastStore = nullptr;
1149       }
1150     }
1151   }
1152 
1153   return Changed;
1154 }
1155 
1156 bool EarlyCSE::run() {
1157   // Note, deque is being used here because there is significant performance
1158   // gains over vector when the container becomes very large due to the
1159   // specific access patterns. For more information see the mailing list
1160   // discussion on this:
1161   // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1162   std::deque<StackNode *> nodesToProcess;
1163 
1164   bool Changed = false;
1165 
1166   // Process the root node.
1167   nodesToProcess.push_back(new StackNode(
1168       AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1169       CurrentGeneration, DT.getRootNode(),
1170       DT.getRootNode()->begin(), DT.getRootNode()->end()));
1171 
1172   // Save the current generation.
1173   unsigned LiveOutGeneration = CurrentGeneration;
1174 
1175   // Process the stack.
1176   while (!nodesToProcess.empty()) {
1177     // Grab the first item off the stack. Set the current generation, remove
1178     // the node from the stack, and process it.
1179     StackNode *NodeToProcess = nodesToProcess.back();
1180 
1181     // Initialize class members.
1182     CurrentGeneration = NodeToProcess->currentGeneration();
1183 
1184     // Check if the node needs to be processed.
1185     if (!NodeToProcess->isProcessed()) {
1186       // Process the node.
1187       Changed |= processNode(NodeToProcess->node());
1188       NodeToProcess->childGeneration(CurrentGeneration);
1189       NodeToProcess->process();
1190     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1191       // Push the next child onto the stack.
1192       DomTreeNode *child = NodeToProcess->nextChild();
1193       nodesToProcess.push_back(
1194           new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1195                         AvailableCalls, NodeToProcess->childGeneration(),
1196                         child, child->begin(), child->end()));
1197     } else {
1198       // It has been processed, and there are no more children to process,
1199       // so delete it and pop it off the stack.
1200       delete NodeToProcess;
1201       nodesToProcess.pop_back();
1202     }
1203   } // while (!nodes...)
1204 
1205   // Reset the current generation.
1206   CurrentGeneration = LiveOutGeneration;
1207 
1208   return Changed;
1209 }
1210 
1211 PreservedAnalyses EarlyCSEPass::run(Function &F,
1212                                     FunctionAnalysisManager &AM) {
1213   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1214   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1215   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1216   auto &AC = AM.getResult<AssumptionAnalysis>(F);
1217   auto *MSSA =
1218       UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1219 
1220   EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1221 
1222   if (!CSE.run())
1223     return PreservedAnalyses::all();
1224 
1225   PreservedAnalyses PA;
1226   PA.preserveSet<CFGAnalyses>();
1227   PA.preserve<GlobalsAA>();
1228   if (UseMemorySSA)
1229     PA.preserve<MemorySSAAnalysis>();
1230   return PA;
1231 }
1232 
1233 namespace {
1234 
1235 /// A simple and fast domtree-based CSE pass.
1236 ///
1237 /// This pass does a simple depth-first walk over the dominator tree,
1238 /// eliminating trivially redundant instructions and using instsimplify to
1239 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1240 /// cases so that instcombine and other passes are more effective. It is
1241 /// expected that a later pass of GVN will catch the interesting/hard cases.
1242 template<bool UseMemorySSA>
1243 class EarlyCSELegacyCommonPass : public FunctionPass {
1244 public:
1245   static char ID;
1246 
1247   EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1248     if (UseMemorySSA)
1249       initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
1250     else
1251       initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
1252   }
1253 
1254   bool runOnFunction(Function &F) override {
1255     if (skipFunction(F))
1256       return false;
1257 
1258     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1259     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1260     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1261     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1262     auto *MSSA =
1263         UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1264 
1265     EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1266 
1267     return CSE.run();
1268   }
1269 
1270   void getAnalysisUsage(AnalysisUsage &AU) const override {
1271     AU.addRequired<AssumptionCacheTracker>();
1272     AU.addRequired<DominatorTreeWrapperPass>();
1273     AU.addRequired<TargetLibraryInfoWrapperPass>();
1274     AU.addRequired<TargetTransformInfoWrapperPass>();
1275     if (UseMemorySSA) {
1276       AU.addRequired<MemorySSAWrapperPass>();
1277       AU.addPreserved<MemorySSAWrapperPass>();
1278     }
1279     AU.addPreserved<GlobalsAAWrapperPass>();
1280     AU.setPreservesCFG();
1281   }
1282 };
1283 
1284 } // end anonymous namespace
1285 
1286 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1287 
1288 template<>
1289 char EarlyCSELegacyPass::ID = 0;
1290 
1291 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1292                       false)
1293 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1294 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1295 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1296 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1297 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1298 
1299 using EarlyCSEMemSSALegacyPass =
1300     EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1301 
1302 template<>
1303 char EarlyCSEMemSSALegacyPass::ID = 0;
1304 
1305 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1306   if (UseMemorySSA)
1307     return new EarlyCSEMemSSALegacyPass();
1308   else
1309     return new EarlyCSELegacyPass();
1310 }
1311 
1312 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1313                       "Early CSE w/ MemorySSA", false, false)
1314 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1315 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1316 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1317 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1318 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1319 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1320                     "Early CSE w/ MemorySSA", false, false)
1321