xref: /llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp (revision cc28a754679a2c85625e719944154fe82491f008)
1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/ScopedHashTable.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AssumptionCache.h"
23 #include "llvm/Analysis/GlobalsModRef.h"
24 #include "llvm/Analysis/GuardUtils.h"
25 #include "llvm/Analysis/InstructionSimplify.h"
26 #include "llvm/Analysis/MemorySSA.h"
27 #include "llvm/Analysis/MemorySSAUpdater.h"
28 #include "llvm/Analysis/TargetLibraryInfo.h"
29 #include "llvm/Analysis/TargetTransformInfo.h"
30 #include "llvm/Analysis/ValueTracking.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/Dominators.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/IntrinsicInst.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/LLVMContext.h"
42 #include "llvm/IR/PassManager.h"
43 #include "llvm/IR/PatternMatch.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Use.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/InitializePasses.h"
48 #include "llvm/Pass.h"
49 #include "llvm/Support/Allocator.h"
50 #include "llvm/Support/AtomicOrdering.h"
51 #include "llvm/Support/Casting.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/DebugCounter.h"
54 #include "llvm/Support/RecyclingAllocator.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include "llvm/Transforms/Scalar.h"
57 #include "llvm/Transforms/Utils/GuardUtils.h"
58 #include "llvm/Transforms/Utils/Local.h"
59 #include <cassert>
60 #include <deque>
61 #include <memory>
62 #include <utility>
63 
64 using namespace llvm;
65 using namespace llvm::PatternMatch;
66 
67 #define DEBUG_TYPE "early-cse"
68 
69 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
70 STATISTIC(NumCSE,      "Number of instructions CSE'd");
71 STATISTIC(NumCSECVP,   "Number of compare instructions CVP'd");
72 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
73 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
74 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
75 
76 DEBUG_COUNTER(CSECounter, "early-cse",
77               "Controls which instructions are removed");
78 
79 static cl::opt<unsigned> EarlyCSEMssaOptCap(
80     "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
81     cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
82              "for faster compile. Caps the MemorySSA clobbering calls."));
83 
84 static cl::opt<bool> EarlyCSEDebugHash(
85     "earlycse-debug-hash", cl::init(false), cl::Hidden,
86     cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
87              "function is well-behaved w.r.t. its isEqual predicate"));
88 
89 //===----------------------------------------------------------------------===//
90 // SimpleValue
91 //===----------------------------------------------------------------------===//
92 
93 namespace {
94 
95 /// Struct representing the available values in the scoped hash table.
96 struct SimpleValue {
97   Instruction *Inst;
98 
99   SimpleValue(Instruction *I) : Inst(I) {
100     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
101   }
102 
103   bool isSentinel() const {
104     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
105            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
106   }
107 
108   static bool canHandle(Instruction *Inst) {
109     // This can only handle non-void readnone functions.
110     if (CallInst *CI = dyn_cast<CallInst>(Inst))
111       return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
112     return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
113            isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
114            isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
115            isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
116            isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
117            isa<InsertValueInst>(Inst) || isa<FreezeInst>(Inst);
118   }
119 };
120 
121 } // end anonymous namespace
122 
123 namespace llvm {
124 
125 template <> struct DenseMapInfo<SimpleValue> {
126   static inline SimpleValue getEmptyKey() {
127     return DenseMapInfo<Instruction *>::getEmptyKey();
128   }
129 
130   static inline SimpleValue getTombstoneKey() {
131     return DenseMapInfo<Instruction *>::getTombstoneKey();
132   }
133 
134   static unsigned getHashValue(SimpleValue Val);
135   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
136 };
137 
138 } // end namespace llvm
139 
140 /// Match a 'select' including an optional 'not's of the condition.
141 static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
142                                            Value *&B,
143                                            SelectPatternFlavor &Flavor) {
144   // Return false if V is not even a select.
145   if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
146     return false;
147 
148   // Look through a 'not' of the condition operand by swapping A/B.
149   Value *CondNot;
150   if (match(Cond, m_Not(m_Value(CondNot)))) {
151     Cond = CondNot;
152     std::swap(A, B);
153   }
154 
155   // Match canonical forms of abs/nabs/min/max. We are not using ValueTracking's
156   // more powerful matchSelectPattern() because it may rely on instruction flags
157   // such as "nsw". That would be incompatible with the current hashing
158   // mechanism that may remove flags to increase the likelihood of CSE.
159 
160   // These are the canonical forms of abs(X) and nabs(X) created by instcombine:
161   // %N = sub i32 0, %X
162   // %C = icmp slt i32 %X, 0
163   // %ABS = select i1 %C, i32 %N, i32 %X
164   //
165   // %N = sub i32 0, %X
166   // %C = icmp slt i32 %X, 0
167   // %NABS = select i1 %C, i32 %X, i32 %N
168   Flavor = SPF_UNKNOWN;
169   CmpInst::Predicate Pred;
170   if (match(Cond, m_ICmp(Pred, m_Specific(B), m_ZeroInt())) &&
171       Pred == ICmpInst::ICMP_SLT && match(A, m_Neg(m_Specific(B)))) {
172     // ABS: B < 0 ? -B : B
173     Flavor = SPF_ABS;
174     return true;
175   }
176   if (match(Cond, m_ICmp(Pred, m_Specific(A), m_ZeroInt())) &&
177       Pred == ICmpInst::ICMP_SLT && match(B, m_Neg(m_Specific(A)))) {
178     // NABS: A < 0 ? A : -A
179     Flavor = SPF_NABS;
180     return true;
181   }
182 
183   if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
184     // Check for commuted variants of min/max by swapping predicate.
185     // If we do not match the standard or commuted patterns, this is not a
186     // recognized form of min/max, but it is still a select, so return true.
187     if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
188       return true;
189     Pred = ICmpInst::getSwappedPredicate(Pred);
190   }
191 
192   switch (Pred) {
193   case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
194   case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
195   case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
196   case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
197   default: break;
198   }
199 
200   return true;
201 }
202 
203 static unsigned getHashValueImpl(SimpleValue Val) {
204   Instruction *Inst = Val.Inst;
205   // Hash in all of the operands as pointers.
206   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
207     Value *LHS = BinOp->getOperand(0);
208     Value *RHS = BinOp->getOperand(1);
209     if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
210       std::swap(LHS, RHS);
211 
212     return hash_combine(BinOp->getOpcode(), LHS, RHS);
213   }
214 
215   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
216     // Compares can be commuted by swapping the comparands and
217     // updating the predicate.  Choose the form that has the
218     // comparands in sorted order, or in the case of a tie, the
219     // one with the lower predicate.
220     Value *LHS = CI->getOperand(0);
221     Value *RHS = CI->getOperand(1);
222     CmpInst::Predicate Pred = CI->getPredicate();
223     CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
224     if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
225       std::swap(LHS, RHS);
226       Pred = SwappedPred;
227     }
228     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
229   }
230 
231   // Hash general selects to allow matching commuted true/false operands.
232   SelectPatternFlavor SPF;
233   Value *Cond, *A, *B;
234   if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
235     // Hash min/max/abs (cmp + select) to allow for commuted operands.
236     // Min/max may also have non-canonical compare predicate (eg, the compare for
237     // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
238     // compare.
239     // TODO: We should also detect FP min/max.
240     if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
241         SPF == SPF_UMIN || SPF == SPF_UMAX) {
242       if (A > B)
243         std::swap(A, B);
244       return hash_combine(Inst->getOpcode(), SPF, A, B);
245     }
246     if (SPF == SPF_ABS || SPF == SPF_NABS) {
247       // ABS/NABS always puts the input in A and its negation in B.
248       return hash_combine(Inst->getOpcode(), SPF, A, B);
249     }
250 
251     // Hash general selects to allow matching commuted true/false operands.
252 
253     // If we do not have a compare as the condition, just hash in the condition.
254     CmpInst::Predicate Pred;
255     Value *X, *Y;
256     if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
257       return hash_combine(Inst->getOpcode(), Cond, A, B);
258 
259     // Similar to cmp normalization (above) - canonicalize the predicate value:
260     // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
261     if (CmpInst::getInversePredicate(Pred) < Pred) {
262       Pred = CmpInst::getInversePredicate(Pred);
263       std::swap(A, B);
264     }
265     return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
266   }
267 
268   if (CastInst *CI = dyn_cast<CastInst>(Inst))
269     return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
270 
271   if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
272     return hash_combine(FI->getOpcode(), FI->getOperand(0));
273 
274   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
275     return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
276                         hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
277 
278   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
279     return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
280                         IVI->getOperand(1),
281                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
282 
283   assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
284           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
285           isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
286           isa<FreezeInst>(Inst)) &&
287          "Invalid/unknown instruction");
288 
289   // Mix in the opcode.
290   return hash_combine(
291       Inst->getOpcode(),
292       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
293 }
294 
295 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
296 #ifndef NDEBUG
297   // If -earlycse-debug-hash was specified, return a constant -- this
298   // will force all hashing to collide, so we'll exhaustively search
299   // the table for a match, and the assertion in isEqual will fire if
300   // there's a bug causing equal keys to hash differently.
301   if (EarlyCSEDebugHash)
302     return 0;
303 #endif
304   return getHashValueImpl(Val);
305 }
306 
307 static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
308   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
309 
310   if (LHS.isSentinel() || RHS.isSentinel())
311     return LHSI == RHSI;
312 
313   if (LHSI->getOpcode() != RHSI->getOpcode())
314     return false;
315   if (LHSI->isIdenticalToWhenDefined(RHSI))
316     return true;
317 
318   // If we're not strictly identical, we still might be a commutable instruction
319   if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
320     if (!LHSBinOp->isCommutative())
321       return false;
322 
323     assert(isa<BinaryOperator>(RHSI) &&
324            "same opcode, but different instruction type?");
325     BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
326 
327     // Commuted equality
328     return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
329            LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
330   }
331   if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
332     assert(isa<CmpInst>(RHSI) &&
333            "same opcode, but different instruction type?");
334     CmpInst *RHSCmp = cast<CmpInst>(RHSI);
335     // Commuted equality
336     return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
337            LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
338            LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
339   }
340 
341   // Min/max/abs can occur with commuted operands, non-canonical predicates,
342   // and/or non-canonical operands.
343   // Selects can be non-trivially equivalent via inverted conditions and swaps.
344   SelectPatternFlavor LSPF, RSPF;
345   Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
346   if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
347       matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
348     if (LSPF == RSPF) {
349       // TODO: We should also detect FP min/max.
350       if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
351           LSPF == SPF_UMIN || LSPF == SPF_UMAX)
352         return ((LHSA == RHSA && LHSB == RHSB) ||
353                 (LHSA == RHSB && LHSB == RHSA));
354 
355       if (LSPF == SPF_ABS || LSPF == SPF_NABS) {
356         // Abs results are placed in a defined order by matchSelectPattern.
357         return LHSA == RHSA && LHSB == RHSB;
358       }
359 
360       // select Cond, A, B <--> select not(Cond), B, A
361       if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
362         return true;
363     }
364 
365     // If the true/false operands are swapped and the conditions are compares
366     // with inverted predicates, the selects are equal:
367     // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
368     //
369     // This also handles patterns with a double-negation in the sense of not +
370     // inverse, because we looked through a 'not' in the matching function and
371     // swapped A/B:
372     // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
373     //
374     // This intentionally does NOT handle patterns with a double-negation in
375     // the sense of not + not, because doing so could result in values
376     // comparing
377     // as equal that hash differently in the min/max/abs cases like:
378     // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
379     //   ^ hashes as min                  ^ would not hash as min
380     // In the context of the EarlyCSE pass, however, such cases never reach
381     // this code, as we simplify the double-negation before hashing the second
382     // select (and so still succeed at CSEing them).
383     if (LHSA == RHSB && LHSB == RHSA) {
384       CmpInst::Predicate PredL, PredR;
385       Value *X, *Y;
386       if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
387           match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
388           CmpInst::getInversePredicate(PredL) == PredR)
389         return true;
390     }
391   }
392 
393   return false;
394 }
395 
396 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
397   // These comparisons are nontrivial, so assert that equality implies
398   // hash equality (DenseMap demands this as an invariant).
399   bool Result = isEqualImpl(LHS, RHS);
400   assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
401          getHashValueImpl(LHS) == getHashValueImpl(RHS));
402   return Result;
403 }
404 
405 //===----------------------------------------------------------------------===//
406 // CallValue
407 //===----------------------------------------------------------------------===//
408 
409 namespace {
410 
411 /// Struct representing the available call values in the scoped hash
412 /// table.
413 struct CallValue {
414   Instruction *Inst;
415 
416   CallValue(Instruction *I) : Inst(I) {
417     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
418   }
419 
420   bool isSentinel() const {
421     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
422            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
423   }
424 
425   static bool canHandle(Instruction *Inst) {
426     // Don't value number anything that returns void.
427     if (Inst->getType()->isVoidTy())
428       return false;
429 
430     CallInst *CI = dyn_cast<CallInst>(Inst);
431     if (!CI || !CI->onlyReadsMemory())
432       return false;
433     return true;
434   }
435 };
436 
437 } // end anonymous namespace
438 
439 namespace llvm {
440 
441 template <> struct DenseMapInfo<CallValue> {
442   static inline CallValue getEmptyKey() {
443     return DenseMapInfo<Instruction *>::getEmptyKey();
444   }
445 
446   static inline CallValue getTombstoneKey() {
447     return DenseMapInfo<Instruction *>::getTombstoneKey();
448   }
449 
450   static unsigned getHashValue(CallValue Val);
451   static bool isEqual(CallValue LHS, CallValue RHS);
452 };
453 
454 } // end namespace llvm
455 
456 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
457   Instruction *Inst = Val.Inst;
458   // Hash all of the operands as pointers and mix in the opcode.
459   return hash_combine(
460       Inst->getOpcode(),
461       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
462 }
463 
464 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
465   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
466   if (LHS.isSentinel() || RHS.isSentinel())
467     return LHSI == RHSI;
468   return LHSI->isIdenticalTo(RHSI);
469 }
470 
471 //===----------------------------------------------------------------------===//
472 // EarlyCSE implementation
473 //===----------------------------------------------------------------------===//
474 
475 namespace {
476 
477 /// A simple and fast domtree-based CSE pass.
478 ///
479 /// This pass does a simple depth-first walk over the dominator tree,
480 /// eliminating trivially redundant instructions and using instsimplify to
481 /// canonicalize things as it goes. It is intended to be fast and catch obvious
482 /// cases so that instcombine and other passes are more effective. It is
483 /// expected that a later pass of GVN will catch the interesting/hard cases.
484 class EarlyCSE {
485 public:
486   const TargetLibraryInfo &TLI;
487   const TargetTransformInfo &TTI;
488   DominatorTree &DT;
489   AssumptionCache &AC;
490   const SimplifyQuery SQ;
491   MemorySSA *MSSA;
492   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
493 
494   using AllocatorTy =
495       RecyclingAllocator<BumpPtrAllocator,
496                          ScopedHashTableVal<SimpleValue, Value *>>;
497   using ScopedHTType =
498       ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
499                       AllocatorTy>;
500 
501   /// A scoped hash table of the current values of all of our simple
502   /// scalar expressions.
503   ///
504   /// As we walk down the domtree, we look to see if instructions are in this:
505   /// if so, we replace them with what we find, otherwise we insert them so
506   /// that dominated values can succeed in their lookup.
507   ScopedHTType AvailableValues;
508 
509   /// A scoped hash table of the current values of previously encountered
510   /// memory locations.
511   ///
512   /// This allows us to get efficient access to dominating loads or stores when
513   /// we have a fully redundant load.  In addition to the most recent load, we
514   /// keep track of a generation count of the read, which is compared against
515   /// the current generation count.  The current generation count is incremented
516   /// after every possibly writing memory operation, which ensures that we only
517   /// CSE loads with other loads that have no intervening store.  Ordering
518   /// events (such as fences or atomic instructions) increment the generation
519   /// count as well; essentially, we model these as writes to all possible
520   /// locations.  Note that atomic and/or volatile loads and stores can be
521   /// present the table; it is the responsibility of the consumer to inspect
522   /// the atomicity/volatility if needed.
523   struct LoadValue {
524     Instruction *DefInst = nullptr;
525     unsigned Generation = 0;
526     int MatchingId = -1;
527     bool IsAtomic = false;
528 
529     LoadValue() = default;
530     LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
531               bool IsAtomic)
532         : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
533           IsAtomic(IsAtomic) {}
534   };
535 
536   using LoadMapAllocator =
537       RecyclingAllocator<BumpPtrAllocator,
538                          ScopedHashTableVal<Value *, LoadValue>>;
539   using LoadHTType =
540       ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
541                       LoadMapAllocator>;
542 
543   LoadHTType AvailableLoads;
544 
545   // A scoped hash table mapping memory locations (represented as typed
546   // addresses) to generation numbers at which that memory location became
547   // (henceforth indefinitely) invariant.
548   using InvariantMapAllocator =
549       RecyclingAllocator<BumpPtrAllocator,
550                          ScopedHashTableVal<MemoryLocation, unsigned>>;
551   using InvariantHTType =
552       ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,
553                       InvariantMapAllocator>;
554   InvariantHTType AvailableInvariants;
555 
556   /// A scoped hash table of the current values of read-only call
557   /// values.
558   ///
559   /// It uses the same generation count as loads.
560   using CallHTType =
561       ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
562   CallHTType AvailableCalls;
563 
564   /// This is the current generation of the memory value.
565   unsigned CurrentGeneration = 0;
566 
567   /// Set up the EarlyCSE runner for a particular function.
568   EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
569            const TargetTransformInfo &TTI, DominatorTree &DT,
570            AssumptionCache &AC, MemorySSA *MSSA)
571       : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
572         MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
573 
574   bool run();
575 
576 private:
577   unsigned ClobberCounter = 0;
578   // Almost a POD, but needs to call the constructors for the scoped hash
579   // tables so that a new scope gets pushed on. These are RAII so that the
580   // scope gets popped when the NodeScope is destroyed.
581   class NodeScope {
582   public:
583     NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
584               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
585       : Scope(AvailableValues), LoadScope(AvailableLoads),
586         InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
587     NodeScope(const NodeScope &) = delete;
588     NodeScope &operator=(const NodeScope &) = delete;
589 
590   private:
591     ScopedHTType::ScopeTy Scope;
592     LoadHTType::ScopeTy LoadScope;
593     InvariantHTType::ScopeTy InvariantScope;
594     CallHTType::ScopeTy CallScope;
595   };
596 
597   // Contains all the needed information to create a stack for doing a depth
598   // first traversal of the tree. This includes scopes for values, loads, and
599   // calls as well as the generation. There is a child iterator so that the
600   // children do not need to be store separately.
601   class StackNode {
602   public:
603     StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
604               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
605               unsigned cg, DomTreeNode *n, DomTreeNode::iterator child,
606               DomTreeNode::iterator end)
607         : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
608           EndIter(end),
609           Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
610                  AvailableCalls)
611           {}
612     StackNode(const StackNode &) = delete;
613     StackNode &operator=(const StackNode &) = delete;
614 
615     // Accessors.
616     unsigned currentGeneration() { return CurrentGeneration; }
617     unsigned childGeneration() { return ChildGeneration; }
618     void childGeneration(unsigned generation) { ChildGeneration = generation; }
619     DomTreeNode *node() { return Node; }
620     DomTreeNode::iterator childIter() { return ChildIter; }
621 
622     DomTreeNode *nextChild() {
623       DomTreeNode *child = *ChildIter;
624       ++ChildIter;
625       return child;
626     }
627 
628     DomTreeNode::iterator end() { return EndIter; }
629     bool isProcessed() { return Processed; }
630     void process() { Processed = true; }
631 
632   private:
633     unsigned CurrentGeneration;
634     unsigned ChildGeneration;
635     DomTreeNode *Node;
636     DomTreeNode::iterator ChildIter;
637     DomTreeNode::iterator EndIter;
638     NodeScope Scopes;
639     bool Processed = false;
640   };
641 
642   /// Wrapper class to handle memory instructions, including loads,
643   /// stores and intrinsic loads and stores defined by the target.
644   class ParseMemoryInst {
645   public:
646     ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
647       : Inst(Inst) {
648       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
649         if (TTI.getTgtMemIntrinsic(II, Info))
650           IsTargetMemInst = true;
651     }
652 
653     bool isLoad() const {
654       if (IsTargetMemInst) return Info.ReadMem;
655       return isa<LoadInst>(Inst);
656     }
657 
658     bool isStore() const {
659       if (IsTargetMemInst) return Info.WriteMem;
660       return isa<StoreInst>(Inst);
661     }
662 
663     bool isAtomic() const {
664       if (IsTargetMemInst)
665         return Info.Ordering != AtomicOrdering::NotAtomic;
666       return Inst->isAtomic();
667     }
668 
669     bool isUnordered() const {
670       if (IsTargetMemInst)
671         return Info.isUnordered();
672 
673       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
674         return LI->isUnordered();
675       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
676         return SI->isUnordered();
677       }
678       // Conservative answer
679       return !Inst->isAtomic();
680     }
681 
682     bool isVolatile() const {
683       if (IsTargetMemInst)
684         return Info.IsVolatile;
685 
686       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
687         return LI->isVolatile();
688       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
689         return SI->isVolatile();
690       }
691       // Conservative answer
692       return true;
693     }
694 
695     bool isInvariantLoad() const {
696       if (auto *LI = dyn_cast<LoadInst>(Inst))
697         return LI->hasMetadata(LLVMContext::MD_invariant_load);
698       return false;
699     }
700 
701     bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
702       return (getPointerOperand() == Inst.getPointerOperand() &&
703               getMatchingId() == Inst.getMatchingId());
704     }
705 
706     bool isValid() const { return getPointerOperand() != nullptr; }
707 
708     // For regular (non-intrinsic) loads/stores, this is set to -1. For
709     // intrinsic loads/stores, the id is retrieved from the corresponding
710     // field in the MemIntrinsicInfo structure.  That field contains
711     // non-negative values only.
712     int getMatchingId() const {
713       if (IsTargetMemInst) return Info.MatchingId;
714       return -1;
715     }
716 
717     Value *getPointerOperand() const {
718       if (IsTargetMemInst) return Info.PtrVal;
719       return getLoadStorePointerOperand(Inst);
720     }
721 
722     bool mayReadFromMemory() const {
723       if (IsTargetMemInst) return Info.ReadMem;
724       return Inst->mayReadFromMemory();
725     }
726 
727     bool mayWriteToMemory() const {
728       if (IsTargetMemInst) return Info.WriteMem;
729       return Inst->mayWriteToMemory();
730     }
731 
732   private:
733     bool IsTargetMemInst = false;
734     MemIntrinsicInfo Info;
735     Instruction *Inst;
736   };
737 
738   bool processNode(DomTreeNode *Node);
739 
740   bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
741                              const BasicBlock *BB, const BasicBlock *Pred);
742 
743   Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
744     if (auto *LI = dyn_cast<LoadInst>(Inst))
745       return LI;
746     if (auto *SI = dyn_cast<StoreInst>(Inst))
747       return SI->getValueOperand();
748     assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
749     return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
750                                                  ExpectedType);
751   }
752 
753   /// Return true if the instruction is known to only operate on memory
754   /// provably invariant in the given "generation".
755   bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
756 
757   bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
758                            Instruction *EarlierInst, Instruction *LaterInst);
759 
760   void removeMSSA(Instruction &Inst) {
761     if (!MSSA)
762       return;
763     if (VerifyMemorySSA)
764       MSSA->verifyMemorySSA();
765     // Removing a store here can leave MemorySSA in an unoptimized state by
766     // creating MemoryPhis that have identical arguments and by creating
767     // MemoryUses whose defining access is not an actual clobber. The phi case
768     // is handled by MemorySSA when passing OptimizePhis = true to
769     // removeMemoryAccess.  The non-optimized MemoryUse case is lazily updated
770     // by MemorySSA's getClobberingMemoryAccess.
771     MSSAUpdater->removeMemoryAccess(&Inst, true);
772   }
773 };
774 
775 } // end anonymous namespace
776 
777 /// Determine if the memory referenced by LaterInst is from the same heap
778 /// version as EarlierInst.
779 /// This is currently called in two scenarios:
780 ///
781 ///   load p
782 ///   ...
783 ///   load p
784 ///
785 /// and
786 ///
787 ///   x = load p
788 ///   ...
789 ///   store x, p
790 ///
791 /// in both cases we want to verify that there are no possible writes to the
792 /// memory referenced by p between the earlier and later instruction.
793 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
794                                    unsigned LaterGeneration,
795                                    Instruction *EarlierInst,
796                                    Instruction *LaterInst) {
797   // Check the simple memory generation tracking first.
798   if (EarlierGeneration == LaterGeneration)
799     return true;
800 
801   if (!MSSA)
802     return false;
803 
804   // If MemorySSA has determined that one of EarlierInst or LaterInst does not
805   // read/write memory, then we can safely return true here.
806   // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
807   // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
808   // by also checking the MemorySSA MemoryAccess on the instruction.  Initial
809   // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
810   // with the default optimization pipeline.
811   auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
812   if (!EarlierMA)
813     return true;
814   auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
815   if (!LaterMA)
816     return true;
817 
818   // Since we know LaterDef dominates LaterInst and EarlierInst dominates
819   // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
820   // EarlierInst and LaterInst and neither can any other write that potentially
821   // clobbers LaterInst.
822   MemoryAccess *LaterDef;
823   if (ClobberCounter < EarlyCSEMssaOptCap) {
824     LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
825     ClobberCounter++;
826   } else
827     LaterDef = LaterMA->getDefiningAccess();
828 
829   return MSSA->dominates(LaterDef, EarlierMA);
830 }
831 
832 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
833   // A location loaded from with an invariant_load is assumed to *never* change
834   // within the visible scope of the compilation.
835   if (auto *LI = dyn_cast<LoadInst>(I))
836     if (LI->hasMetadata(LLVMContext::MD_invariant_load))
837       return true;
838 
839   auto MemLocOpt = MemoryLocation::getOrNone(I);
840   if (!MemLocOpt)
841     // "target" intrinsic forms of loads aren't currently known to
842     // MemoryLocation::get.  TODO
843     return false;
844   MemoryLocation MemLoc = *MemLocOpt;
845   if (!AvailableInvariants.count(MemLoc))
846     return false;
847 
848   // Is the generation at which this became invariant older than the
849   // current one?
850   return AvailableInvariants.lookup(MemLoc) <= GenAt;
851 }
852 
853 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
854                                      const BranchInst *BI, const BasicBlock *BB,
855                                      const BasicBlock *Pred) {
856   assert(BI->isConditional() && "Should be a conditional branch!");
857   assert(BI->getCondition() == CondInst && "Wrong condition?");
858   assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
859   auto *TorF = (BI->getSuccessor(0) == BB)
860                    ? ConstantInt::getTrue(BB->getContext())
861                    : ConstantInt::getFalse(BB->getContext());
862   auto MatchBinOp = [](Instruction *I, unsigned Opcode) {
863     if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(I))
864       return BOp->getOpcode() == Opcode;
865     return false;
866   };
867   // If the condition is AND operation, we can propagate its operands into the
868   // true branch. If it is OR operation, we can propagate them into the false
869   // branch.
870   unsigned PropagateOpcode =
871       (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
872 
873   bool MadeChanges = false;
874   SmallVector<Instruction *, 4> WorkList;
875   SmallPtrSet<Instruction *, 4> Visited;
876   WorkList.push_back(CondInst);
877   while (!WorkList.empty()) {
878     Instruction *Curr = WorkList.pop_back_val();
879 
880     AvailableValues.insert(Curr, TorF);
881     LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
882                       << Curr->getName() << "' as " << *TorF << " in "
883                       << BB->getName() << "\n");
884     if (!DebugCounter::shouldExecute(CSECounter)) {
885       LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
886     } else {
887       // Replace all dominated uses with the known value.
888       if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
889                                                     BasicBlockEdge(Pred, BB))) {
890         NumCSECVP += Count;
891         MadeChanges = true;
892       }
893     }
894 
895     if (MatchBinOp(Curr, PropagateOpcode))
896       for (auto &Op : cast<BinaryOperator>(Curr)->operands())
897         if (Instruction *OPI = dyn_cast<Instruction>(Op))
898           if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
899             WorkList.push_back(OPI);
900   }
901 
902   return MadeChanges;
903 }
904 
905 bool EarlyCSE::processNode(DomTreeNode *Node) {
906   bool Changed = false;
907   BasicBlock *BB = Node->getBlock();
908 
909   // If this block has a single predecessor, then the predecessor is the parent
910   // of the domtree node and all of the live out memory values are still current
911   // in this block.  If this block has multiple predecessors, then they could
912   // have invalidated the live-out memory values of our parent value.  For now,
913   // just be conservative and invalidate memory if this block has multiple
914   // predecessors.
915   if (!BB->getSinglePredecessor())
916     ++CurrentGeneration;
917 
918   // If this node has a single predecessor which ends in a conditional branch,
919   // we can infer the value of the branch condition given that we took this
920   // path.  We need the single predecessor to ensure there's not another path
921   // which reaches this block where the condition might hold a different
922   // value.  Since we're adding this to the scoped hash table (like any other
923   // def), it will have been popped if we encounter a future merge block.
924   if (BasicBlock *Pred = BB->getSinglePredecessor()) {
925     auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
926     if (BI && BI->isConditional()) {
927       auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
928       if (CondInst && SimpleValue::canHandle(CondInst))
929         Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
930     }
931   }
932 
933   /// LastStore - Keep track of the last non-volatile store that we saw... for
934   /// as long as there in no instruction that reads memory.  If we see a store
935   /// to the same location, we delete the dead store.  This zaps trivial dead
936   /// stores which can occur in bitfield code among other things.
937   Instruction *LastStore = nullptr;
938 
939   // See if any instructions in the block can be eliminated.  If so, do it.  If
940   // not, add them to AvailableValues.
941   for (Instruction &Inst : make_early_inc_range(BB->getInstList())) {
942     // Dead instructions should just be removed.
943     if (isInstructionTriviallyDead(&Inst, &TLI)) {
944       LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
945       if (!DebugCounter::shouldExecute(CSECounter)) {
946         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
947         continue;
948       }
949 
950       salvageDebugInfoOrMarkUndef(Inst);
951       removeMSSA(Inst);
952       Inst.eraseFromParent();
953       Changed = true;
954       ++NumSimplify;
955       continue;
956     }
957 
958     // Skip assume intrinsics, they don't really have side effects (although
959     // they're marked as such to ensure preservation of control dependencies),
960     // and this pass will not bother with its removal. However, we should mark
961     // its condition as true for all dominated blocks.
962     if (match(&Inst, m_Intrinsic<Intrinsic::assume>())) {
963       auto *CondI =
964           dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0));
965       if (CondI && SimpleValue::canHandle(CondI)) {
966         LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
967                           << '\n');
968         AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
969       } else
970         LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
971       continue;
972     }
973 
974     // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
975     if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
976       LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
977       continue;
978     }
979 
980     // We can skip all invariant.start intrinsics since they only read memory,
981     // and we can forward values across it. For invariant starts without
982     // invariant ends, we can use the fact that the invariantness never ends to
983     // start a scope in the current generaton which is true for all future
984     // generations.  Also, we dont need to consume the last store since the
985     // semantics of invariant.start allow us to perform   DSE of the last
986     // store, if there was a store following invariant.start. Consider:
987     //
988     // store 30, i8* p
989     // invariant.start(p)
990     // store 40, i8* p
991     // We can DSE the store to 30, since the store 40 to invariant location p
992     // causes undefined behaviour.
993     if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
994       // If there are any uses, the scope might end.
995       if (!Inst.use_empty())
996         continue;
997       MemoryLocation MemLoc =
998           MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);
999       // Don't start a scope if we already have a better one pushed
1000       if (!AvailableInvariants.count(MemLoc))
1001         AvailableInvariants.insert(MemLoc, CurrentGeneration);
1002       continue;
1003     }
1004 
1005     if (isGuard(&Inst)) {
1006       if (auto *CondI =
1007               dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1008         if (SimpleValue::canHandle(CondI)) {
1009           // Do we already know the actual value of this condition?
1010           if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1011             // Is the condition known to be true?
1012             if (isa<ConstantInt>(KnownCond) &&
1013                 cast<ConstantInt>(KnownCond)->isOne()) {
1014               LLVM_DEBUG(dbgs()
1015                          << "EarlyCSE removing guard: " << Inst << '\n');
1016               removeMSSA(Inst);
1017               Inst.eraseFromParent();
1018               Changed = true;
1019               continue;
1020             } else
1021               // Use the known value if it wasn't true.
1022               cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1023           }
1024           // The condition we're on guarding here is true for all dominated
1025           // locations.
1026           AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1027         }
1028       }
1029 
1030       // Guard intrinsics read all memory, but don't write any memory.
1031       // Accordingly, don't update the generation but consume the last store (to
1032       // avoid an incorrect DSE).
1033       LastStore = nullptr;
1034       continue;
1035     }
1036 
1037     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1038     // its simpler value.
1039     if (Value *V = SimplifyInstruction(&Inst, SQ)) {
1040       LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << "  to: " << *V
1041                         << '\n');
1042       if (!DebugCounter::shouldExecute(CSECounter)) {
1043         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1044       } else {
1045         bool Killed = false;
1046         if (!Inst.use_empty()) {
1047           Inst.replaceAllUsesWith(V);
1048           Changed = true;
1049         }
1050         if (isInstructionTriviallyDead(&Inst, &TLI)) {
1051           removeMSSA(Inst);
1052           Inst.eraseFromParent();
1053           Changed = true;
1054           Killed = true;
1055         }
1056         if (Changed)
1057           ++NumSimplify;
1058         if (Killed)
1059           continue;
1060       }
1061     }
1062 
1063     // If this is a simple instruction that we can value number, process it.
1064     if (SimpleValue::canHandle(&Inst)) {
1065       // See if the instruction has an available value.  If so, use it.
1066       if (Value *V = AvailableValues.lookup(&Inst)) {
1067         LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << "  to: " << *V
1068                           << '\n');
1069         if (!DebugCounter::shouldExecute(CSECounter)) {
1070           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1071           continue;
1072         }
1073         if (auto *I = dyn_cast<Instruction>(V))
1074           I->andIRFlags(&Inst);
1075         Inst.replaceAllUsesWith(V);
1076         removeMSSA(Inst);
1077         Inst.eraseFromParent();
1078         Changed = true;
1079         ++NumCSE;
1080         continue;
1081       }
1082 
1083       // Otherwise, just remember that this value is available.
1084       AvailableValues.insert(&Inst, &Inst);
1085       continue;
1086     }
1087 
1088     ParseMemoryInst MemInst(&Inst, TTI);
1089     // If this is a non-volatile load, process it.
1090     if (MemInst.isValid() && MemInst.isLoad()) {
1091       // (conservatively) we can't peak past the ordering implied by this
1092       // operation, but we can add this load to our set of available values
1093       if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1094         LastStore = nullptr;
1095         ++CurrentGeneration;
1096       }
1097 
1098       if (MemInst.isInvariantLoad()) {
1099         // If we pass an invariant load, we know that memory location is
1100         // indefinitely constant from the moment of first dereferenceability.
1101         // We conservatively treat the invariant_load as that moment.  If we
1102         // pass a invariant load after already establishing a scope, don't
1103         // restart it since we want to preserve the earliest point seen.
1104         auto MemLoc = MemoryLocation::get(&Inst);
1105         if (!AvailableInvariants.count(MemLoc))
1106           AvailableInvariants.insert(MemLoc, CurrentGeneration);
1107       }
1108 
1109       // If we have an available version of this load, and if it is the right
1110       // generation or the load is known to be from an invariant location,
1111       // replace this instruction.
1112       //
1113       // If either the dominating load or the current load are invariant, then
1114       // we can assume the current load loads the same value as the dominating
1115       // load.
1116       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1117       if (InVal.DefInst != nullptr &&
1118           InVal.MatchingId == MemInst.getMatchingId() &&
1119           // We don't yet handle removing loads with ordering of any kind.
1120           !MemInst.isVolatile() && MemInst.isUnordered() &&
1121           // We can't replace an atomic load with one which isn't also atomic.
1122           InVal.IsAtomic >= MemInst.isAtomic() &&
1123           (isOperatingOnInvariantMemAt(&Inst, InVal.Generation) ||
1124            isSameMemGeneration(InVal.Generation, CurrentGeneration,
1125                                InVal.DefInst, &Inst))) {
1126         Value *Op = getOrCreateResult(InVal.DefInst, Inst.getType());
1127         if (Op != nullptr) {
1128           LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1129                             << "  to: " << *InVal.DefInst << '\n');
1130           if (!DebugCounter::shouldExecute(CSECounter)) {
1131             LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1132             continue;
1133           }
1134           if (!Inst.use_empty())
1135             Inst.replaceAllUsesWith(Op);
1136           removeMSSA(Inst);
1137           Inst.eraseFromParent();
1138           Changed = true;
1139           ++NumCSELoad;
1140           continue;
1141         }
1142       }
1143 
1144       // Otherwise, remember that we have this instruction.
1145       AvailableLoads.insert(MemInst.getPointerOperand(),
1146                             LoadValue(&Inst, CurrentGeneration,
1147                                       MemInst.getMatchingId(),
1148                                       MemInst.isAtomic()));
1149       LastStore = nullptr;
1150       continue;
1151     }
1152 
1153     // If this instruction may read from memory or throw (and potentially read
1154     // from memory in the exception handler), forget LastStore.  Load/store
1155     // intrinsics will indicate both a read and a write to memory.  The target
1156     // may override this (e.g. so that a store intrinsic does not read from
1157     // memory, and thus will be treated the same as a regular store for
1158     // commoning purposes).
1159     if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&
1160         !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1161       LastStore = nullptr;
1162 
1163     // If this is a read-only call, process it.
1164     if (CallValue::canHandle(&Inst)) {
1165       // If we have an available version of this call, and if it is the right
1166       // generation, replace this instruction.
1167       std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1168       if (InVal.first != nullptr &&
1169           isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1170                               &Inst)) {
1171         LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1172                           << "  to: " << *InVal.first << '\n');
1173         if (!DebugCounter::shouldExecute(CSECounter)) {
1174           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1175           continue;
1176         }
1177         if (!Inst.use_empty())
1178           Inst.replaceAllUsesWith(InVal.first);
1179         removeMSSA(Inst);
1180         Inst.eraseFromParent();
1181         Changed = true;
1182         ++NumCSECall;
1183         continue;
1184       }
1185 
1186       // Otherwise, remember that we have this instruction.
1187       AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1188       continue;
1189     }
1190 
1191     // A release fence requires that all stores complete before it, but does
1192     // not prevent the reordering of following loads 'before' the fence.  As a
1193     // result, we don't need to consider it as writing to memory and don't need
1194     // to advance the generation.  We do need to prevent DSE across the fence,
1195     // but that's handled above.
1196     if (auto *FI = dyn_cast<FenceInst>(&Inst))
1197       if (FI->getOrdering() == AtomicOrdering::Release) {
1198         assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
1199         continue;
1200       }
1201 
1202     // write back DSE - If we write back the same value we just loaded from
1203     // the same location and haven't passed any intervening writes or ordering
1204     // operations, we can remove the write.  The primary benefit is in allowing
1205     // the available load table to remain valid and value forward past where
1206     // the store originally was.
1207     if (MemInst.isValid() && MemInst.isStore()) {
1208       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1209       if (InVal.DefInst &&
1210           InVal.DefInst == getOrCreateResult(&Inst, InVal.DefInst->getType()) &&
1211           InVal.MatchingId == MemInst.getMatchingId() &&
1212           // We don't yet handle removing stores with ordering of any kind.
1213           !MemInst.isVolatile() && MemInst.isUnordered() &&
1214           (isOperatingOnInvariantMemAt(&Inst, InVal.Generation) ||
1215            isSameMemGeneration(InVal.Generation, CurrentGeneration,
1216                                InVal.DefInst, &Inst))) {
1217         // It is okay to have a LastStore to a different pointer here if MemorySSA
1218         // tells us that the load and store are from the same memory generation.
1219         // In that case, LastStore should keep its present value since we're
1220         // removing the current store.
1221         assert((!LastStore ||
1222                 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1223                     MemInst.getPointerOperand() ||
1224                 MSSA) &&
1225                "can't have an intervening store if not using MemorySSA!");
1226         LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
1227         if (!DebugCounter::shouldExecute(CSECounter)) {
1228           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1229           continue;
1230         }
1231         removeMSSA(Inst);
1232         Inst.eraseFromParent();
1233         Changed = true;
1234         ++NumDSE;
1235         // We can avoid incrementing the generation count since we were able
1236         // to eliminate this store.
1237         continue;
1238       }
1239     }
1240 
1241     // Okay, this isn't something we can CSE at all.  Check to see if it is
1242     // something that could modify memory.  If so, our available memory values
1243     // cannot be used so bump the generation count.
1244     if (Inst.mayWriteToMemory()) {
1245       ++CurrentGeneration;
1246 
1247       if (MemInst.isValid() && MemInst.isStore()) {
1248         // We do a trivial form of DSE if there are two stores to the same
1249         // location with no intervening loads.  Delete the earlier store.
1250         // At the moment, we don't remove ordered stores, but do remove
1251         // unordered atomic stores.  There's no special requirement (for
1252         // unordered atomics) about removing atomic stores only in favor of
1253         // other atomic stores since we were going to execute the non-atomic
1254         // one anyway and the atomic one might never have become visible.
1255         if (LastStore) {
1256           ParseMemoryInst LastStoreMemInst(LastStore, TTI);
1257           assert(LastStoreMemInst.isUnordered() &&
1258                  !LastStoreMemInst.isVolatile() &&
1259                  "Violated invariant");
1260           if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
1261             LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1262                               << "  due to: " << Inst << '\n');
1263             if (!DebugCounter::shouldExecute(CSECounter)) {
1264               LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1265             } else {
1266               removeMSSA(*LastStore);
1267               LastStore->eraseFromParent();
1268               Changed = true;
1269               ++NumDSE;
1270               LastStore = nullptr;
1271             }
1272           }
1273           // fallthrough - we can exploit information about this store
1274         }
1275 
1276         // Okay, we just invalidated anything we knew about loaded values.  Try
1277         // to salvage *something* by remembering that the stored value is a live
1278         // version of the pointer.  It is safe to forward from volatile stores
1279         // to non-volatile loads, so we don't have to check for volatility of
1280         // the store.
1281         AvailableLoads.insert(MemInst.getPointerOperand(),
1282                               LoadValue(&Inst, CurrentGeneration,
1283                                         MemInst.getMatchingId(),
1284                                         MemInst.isAtomic()));
1285 
1286         // Remember that this was the last unordered store we saw for DSE. We
1287         // don't yet handle DSE on ordered or volatile stores since we don't
1288         // have a good way to model the ordering requirement for following
1289         // passes  once the store is removed.  We could insert a fence, but
1290         // since fences are slightly stronger than stores in their ordering,
1291         // it's not clear this is a profitable transform. Another option would
1292         // be to merge the ordering with that of the post dominating store.
1293         if (MemInst.isUnordered() && !MemInst.isVolatile())
1294           LastStore = &Inst;
1295         else
1296           LastStore = nullptr;
1297       }
1298     }
1299   }
1300 
1301   return Changed;
1302 }
1303 
1304 bool EarlyCSE::run() {
1305   // Note, deque is being used here because there is significant performance
1306   // gains over vector when the container becomes very large due to the
1307   // specific access patterns. For more information see the mailing list
1308   // discussion on this:
1309   // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1310   std::deque<StackNode *> nodesToProcess;
1311 
1312   bool Changed = false;
1313 
1314   // Process the root node.
1315   nodesToProcess.push_back(new StackNode(
1316       AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1317       CurrentGeneration, DT.getRootNode(),
1318       DT.getRootNode()->begin(), DT.getRootNode()->end()));
1319 
1320   assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1321 
1322   // Process the stack.
1323   while (!nodesToProcess.empty()) {
1324     // Grab the first item off the stack. Set the current generation, remove
1325     // the node from the stack, and process it.
1326     StackNode *NodeToProcess = nodesToProcess.back();
1327 
1328     // Initialize class members.
1329     CurrentGeneration = NodeToProcess->currentGeneration();
1330 
1331     // Check if the node needs to be processed.
1332     if (!NodeToProcess->isProcessed()) {
1333       // Process the node.
1334       Changed |= processNode(NodeToProcess->node());
1335       NodeToProcess->childGeneration(CurrentGeneration);
1336       NodeToProcess->process();
1337     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1338       // Push the next child onto the stack.
1339       DomTreeNode *child = NodeToProcess->nextChild();
1340       nodesToProcess.push_back(
1341           new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1342                         AvailableCalls, NodeToProcess->childGeneration(),
1343                         child, child->begin(), child->end()));
1344     } else {
1345       // It has been processed, and there are no more children to process,
1346       // so delete it and pop it off the stack.
1347       delete NodeToProcess;
1348       nodesToProcess.pop_back();
1349     }
1350   } // while (!nodes...)
1351 
1352   return Changed;
1353 }
1354 
1355 PreservedAnalyses EarlyCSEPass::run(Function &F,
1356                                     FunctionAnalysisManager &AM) {
1357   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1358   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1359   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1360   auto &AC = AM.getResult<AssumptionAnalysis>(F);
1361   auto *MSSA =
1362       UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1363 
1364   EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1365 
1366   if (!CSE.run())
1367     return PreservedAnalyses::all();
1368 
1369   PreservedAnalyses PA;
1370   PA.preserveSet<CFGAnalyses>();
1371   PA.preserve<GlobalsAA>();
1372   if (UseMemorySSA)
1373     PA.preserve<MemorySSAAnalysis>();
1374   return PA;
1375 }
1376 
1377 namespace {
1378 
1379 /// A simple and fast domtree-based CSE pass.
1380 ///
1381 /// This pass does a simple depth-first walk over the dominator tree,
1382 /// eliminating trivially redundant instructions and using instsimplify to
1383 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1384 /// cases so that instcombine and other passes are more effective. It is
1385 /// expected that a later pass of GVN will catch the interesting/hard cases.
1386 template<bool UseMemorySSA>
1387 class EarlyCSELegacyCommonPass : public FunctionPass {
1388 public:
1389   static char ID;
1390 
1391   EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1392     if (UseMemorySSA)
1393       initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
1394     else
1395       initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
1396   }
1397 
1398   bool runOnFunction(Function &F) override {
1399     if (skipFunction(F))
1400       return false;
1401 
1402     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1403     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1404     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1405     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1406     auto *MSSA =
1407         UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1408 
1409     EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1410 
1411     return CSE.run();
1412   }
1413 
1414   void getAnalysisUsage(AnalysisUsage &AU) const override {
1415     AU.addRequired<AssumptionCacheTracker>();
1416     AU.addRequired<DominatorTreeWrapperPass>();
1417     AU.addRequired<TargetLibraryInfoWrapperPass>();
1418     AU.addRequired<TargetTransformInfoWrapperPass>();
1419     if (UseMemorySSA) {
1420       AU.addRequired<MemorySSAWrapperPass>();
1421       AU.addPreserved<MemorySSAWrapperPass>();
1422     }
1423     AU.addPreserved<GlobalsAAWrapperPass>();
1424     AU.addPreserved<AAResultsWrapperPass>();
1425     AU.setPreservesCFG();
1426   }
1427 };
1428 
1429 } // end anonymous namespace
1430 
1431 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1432 
1433 template<>
1434 char EarlyCSELegacyPass::ID = 0;
1435 
1436 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1437                       false)
1438 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1439 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1440 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1441 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1442 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1443 
1444 using EarlyCSEMemSSALegacyPass =
1445     EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1446 
1447 template<>
1448 char EarlyCSEMemSSALegacyPass::ID = 0;
1449 
1450 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1451   if (UseMemorySSA)
1452     return new EarlyCSEMemSSALegacyPass();
1453   else
1454     return new EarlyCSELegacyPass();
1455 }
1456 
1457 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1458                       "Early CSE w/ MemorySSA", false, false)
1459 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1460 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1461 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1462 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1463 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1464 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1465                     "Early CSE w/ MemorySSA", false, false)
1466