xref: /llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp (revision 7f768371a12d121731832b80bc9bef6c2e47e43f)
1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/ScopedHashTable.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/AssumptionCache.h"
22 #include "llvm/Analysis/GlobalsModRef.h"
23 #include "llvm/Analysis/GuardUtils.h"
24 #include "llvm/Analysis/InstructionSimplify.h"
25 #include "llvm/Analysis/MemorySSA.h"
26 #include "llvm/Analysis/MemorySSAUpdater.h"
27 #include "llvm/Analysis/TargetLibraryInfo.h"
28 #include "llvm/Analysis/TargetTransformInfo.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/InstrTypes.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/LLVMContext.h"
39 #include "llvm/IR/PassManager.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Type.h"
42 #include "llvm/IR/Value.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Allocator.h"
46 #include "llvm/Support/AtomicOrdering.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/DebugCounter.h"
50 #include "llvm/Support/RecyclingAllocator.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include "llvm/Transforms/Scalar.h"
53 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
54 #include "llvm/Transforms/Utils/Local.h"
55 #include <cassert>
56 #include <deque>
57 #include <memory>
58 #include <utility>
59 
60 using namespace llvm;
61 using namespace llvm::PatternMatch;
62 
63 #define DEBUG_TYPE "early-cse"
64 
65 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
66 STATISTIC(NumCSE,      "Number of instructions CSE'd");
67 STATISTIC(NumCSECVP,   "Number of compare instructions CVP'd");
68 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
69 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
70 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
71 
72 DEBUG_COUNTER(CSECounter, "early-cse",
73               "Controls which instructions are removed");
74 
75 static cl::opt<unsigned> EarlyCSEMssaOptCap(
76     "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
77     cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
78              "for faster compile. Caps the MemorySSA clobbering calls."));
79 
80 static cl::opt<bool> EarlyCSEDebugHash(
81     "earlycse-debug-hash", cl::init(false), cl::Hidden,
82     cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
83              "function is well-behaved w.r.t. its isEqual predicate"));
84 
85 //===----------------------------------------------------------------------===//
86 // SimpleValue
87 //===----------------------------------------------------------------------===//
88 
89 namespace {
90 
91 /// Struct representing the available values in the scoped hash table.
92 struct SimpleValue {
93   Instruction *Inst;
94 
95   SimpleValue(Instruction *I) : Inst(I) {
96     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
97   }
98 
99   bool isSentinel() const {
100     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
101            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
102   }
103 
104   static bool canHandle(Instruction *Inst) {
105     // This can only handle non-void readnone functions.
106     // Also handled are constrained intrinsic that look like the types
107     // of instruction handled below (UnaryOperator, etc.).
108     if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
109       if (Function *F = CI->getCalledFunction()) {
110         switch ((Intrinsic::ID)F->getIntrinsicID()) {
111         case Intrinsic::experimental_constrained_fadd:
112         case Intrinsic::experimental_constrained_fsub:
113         case Intrinsic::experimental_constrained_fmul:
114         case Intrinsic::experimental_constrained_fdiv:
115         case Intrinsic::experimental_constrained_frem:
116         case Intrinsic::experimental_constrained_fptosi:
117         case Intrinsic::experimental_constrained_sitofp:
118         case Intrinsic::experimental_constrained_fptoui:
119         case Intrinsic::experimental_constrained_uitofp:
120         case Intrinsic::experimental_constrained_fcmp:
121         case Intrinsic::experimental_constrained_fcmps: {
122           auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
123           if (CFP->getExceptionBehavior() &&
124               CFP->getExceptionBehavior() == fp::ebStrict)
125             return false;
126           // Since we CSE across function calls we must not allow
127           // the rounding mode to change.
128           if (CFP->getRoundingMode() &&
129               CFP->getRoundingMode() == RoundingMode::Dynamic)
130             return false;
131           return true;
132         }
133         }
134       }
135       return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
136     }
137     return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
138            isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
139            isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
140            isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
141            isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
142            isa<InsertValueInst>(Inst) || isa<FreezeInst>(Inst);
143   }
144 };
145 
146 } // end anonymous namespace
147 
148 namespace llvm {
149 
150 template <> struct DenseMapInfo<SimpleValue> {
151   static inline SimpleValue getEmptyKey() {
152     return DenseMapInfo<Instruction *>::getEmptyKey();
153   }
154 
155   static inline SimpleValue getTombstoneKey() {
156     return DenseMapInfo<Instruction *>::getTombstoneKey();
157   }
158 
159   static unsigned getHashValue(SimpleValue Val);
160   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
161 };
162 
163 } // end namespace llvm
164 
165 /// Match a 'select' including an optional 'not's of the condition.
166 static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
167                                            Value *&B,
168                                            SelectPatternFlavor &Flavor) {
169   // Return false if V is not even a select.
170   if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
171     return false;
172 
173   // Look through a 'not' of the condition operand by swapping A/B.
174   Value *CondNot;
175   if (match(Cond, m_Not(m_Value(CondNot)))) {
176     Cond = CondNot;
177     std::swap(A, B);
178   }
179 
180   // Match canonical forms of min/max. We are not using ValueTracking's
181   // more powerful matchSelectPattern() because it may rely on instruction flags
182   // such as "nsw". That would be incompatible with the current hashing
183   // mechanism that may remove flags to increase the likelihood of CSE.
184 
185   Flavor = SPF_UNKNOWN;
186   CmpInst::Predicate Pred;
187 
188   if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
189     // Check for commuted variants of min/max by swapping predicate.
190     // If we do not match the standard or commuted patterns, this is not a
191     // recognized form of min/max, but it is still a select, so return true.
192     if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
193       return true;
194     Pred = ICmpInst::getSwappedPredicate(Pred);
195   }
196 
197   switch (Pred) {
198   case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
199   case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
200   case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
201   case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
202   // Non-strict inequalities.
203   case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
204   case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
205   case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
206   case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
207   default: break;
208   }
209 
210   return true;
211 }
212 
213 static unsigned getHashValueImpl(SimpleValue Val) {
214   Instruction *Inst = Val.Inst;
215   // Hash in all of the operands as pointers.
216   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
217     Value *LHS = BinOp->getOperand(0);
218     Value *RHS = BinOp->getOperand(1);
219     if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
220       std::swap(LHS, RHS);
221 
222     return hash_combine(BinOp->getOpcode(), LHS, RHS);
223   }
224 
225   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
226     // Compares can be commuted by swapping the comparands and
227     // updating the predicate.  Choose the form that has the
228     // comparands in sorted order, or in the case of a tie, the
229     // one with the lower predicate.
230     Value *LHS = CI->getOperand(0);
231     Value *RHS = CI->getOperand(1);
232     CmpInst::Predicate Pred = CI->getPredicate();
233     CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
234     if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
235       std::swap(LHS, RHS);
236       Pred = SwappedPred;
237     }
238     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
239   }
240 
241   // Hash general selects to allow matching commuted true/false operands.
242   SelectPatternFlavor SPF;
243   Value *Cond, *A, *B;
244   if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
245     // Hash min/max (cmp + select) to allow for commuted operands.
246     // Min/max may also have non-canonical compare predicate (eg, the compare for
247     // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
248     // compare.
249     // TODO: We should also detect FP min/max.
250     if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
251         SPF == SPF_UMIN || SPF == SPF_UMAX) {
252       if (A > B)
253         std::swap(A, B);
254       return hash_combine(Inst->getOpcode(), SPF, A, B);
255     }
256 
257     // Hash general selects to allow matching commuted true/false operands.
258 
259     // If we do not have a compare as the condition, just hash in the condition.
260     CmpInst::Predicate Pred;
261     Value *X, *Y;
262     if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
263       return hash_combine(Inst->getOpcode(), Cond, A, B);
264 
265     // Similar to cmp normalization (above) - canonicalize the predicate value:
266     // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
267     if (CmpInst::getInversePredicate(Pred) < Pred) {
268       Pred = CmpInst::getInversePredicate(Pred);
269       std::swap(A, B);
270     }
271     return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
272   }
273 
274   if (CastInst *CI = dyn_cast<CastInst>(Inst))
275     return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
276 
277   if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
278     return hash_combine(FI->getOpcode(), FI->getOperand(0));
279 
280   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
281     return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
282                         hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
283 
284   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
285     return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
286                         IVI->getOperand(1),
287                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
288 
289   assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
290           isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
291           isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
292           isa<FreezeInst>(Inst)) &&
293          "Invalid/unknown instruction");
294 
295   // Handle intrinsics with commutative operands.
296   // TODO: Extend this to handle intrinsics with >2 operands where the 1st
297   //       2 operands are commutative.
298   auto *II = dyn_cast<IntrinsicInst>(Inst);
299   if (II && II->isCommutative() && II->arg_size() == 2) {
300     Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
301     if (LHS > RHS)
302       std::swap(LHS, RHS);
303     return hash_combine(II->getOpcode(), LHS, RHS);
304   }
305 
306   // gc.relocate is 'special' call: its second and third operands are
307   // not real values, but indices into statepoint's argument list.
308   // Get values they point to.
309   if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))
310     return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
311                         GCR->getBasePtr(), GCR->getDerivedPtr());
312 
313   // Mix in the opcode.
314   return hash_combine(
315       Inst->getOpcode(),
316       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
317 }
318 
319 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
320 #ifndef NDEBUG
321   // If -earlycse-debug-hash was specified, return a constant -- this
322   // will force all hashing to collide, so we'll exhaustively search
323   // the table for a match, and the assertion in isEqual will fire if
324   // there's a bug causing equal keys to hash differently.
325   if (EarlyCSEDebugHash)
326     return 0;
327 #endif
328   return getHashValueImpl(Val);
329 }
330 
331 static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
332   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
333 
334   if (LHS.isSentinel() || RHS.isSentinel())
335     return LHSI == RHSI;
336 
337   if (LHSI->getOpcode() != RHSI->getOpcode())
338     return false;
339   if (LHSI->isIdenticalToWhenDefined(RHSI))
340     return true;
341 
342   // If we're not strictly identical, we still might be a commutable instruction
343   if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
344     if (!LHSBinOp->isCommutative())
345       return false;
346 
347     assert(isa<BinaryOperator>(RHSI) &&
348            "same opcode, but different instruction type?");
349     BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
350 
351     // Commuted equality
352     return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
353            LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
354   }
355   if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
356     assert(isa<CmpInst>(RHSI) &&
357            "same opcode, but different instruction type?");
358     CmpInst *RHSCmp = cast<CmpInst>(RHSI);
359     // Commuted equality
360     return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
361            LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
362            LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
363   }
364 
365   // TODO: Extend this for >2 args by matching the trailing N-2 args.
366   auto *LII = dyn_cast<IntrinsicInst>(LHSI);
367   auto *RII = dyn_cast<IntrinsicInst>(RHSI);
368   if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
369       LII->isCommutative() && LII->arg_size() == 2) {
370     return LII->getArgOperand(0) == RII->getArgOperand(1) &&
371            LII->getArgOperand(1) == RII->getArgOperand(0);
372   }
373 
374   // See comment above in `getHashValue()`.
375   if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
376     if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
377       return GCR1->getOperand(0) == GCR2->getOperand(0) &&
378              GCR1->getBasePtr() == GCR2->getBasePtr() &&
379              GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
380 
381   // Min/max can occur with commuted operands, non-canonical predicates,
382   // and/or non-canonical operands.
383   // Selects can be non-trivially equivalent via inverted conditions and swaps.
384   SelectPatternFlavor LSPF, RSPF;
385   Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
386   if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
387       matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
388     if (LSPF == RSPF) {
389       // TODO: We should also detect FP min/max.
390       if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
391           LSPF == SPF_UMIN || LSPF == SPF_UMAX)
392         return ((LHSA == RHSA && LHSB == RHSB) ||
393                 (LHSA == RHSB && LHSB == RHSA));
394 
395       // select Cond, A, B <--> select not(Cond), B, A
396       if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
397         return true;
398     }
399 
400     // If the true/false operands are swapped and the conditions are compares
401     // with inverted predicates, the selects are equal:
402     // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
403     //
404     // This also handles patterns with a double-negation in the sense of not +
405     // inverse, because we looked through a 'not' in the matching function and
406     // swapped A/B:
407     // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
408     //
409     // This intentionally does NOT handle patterns with a double-negation in
410     // the sense of not + not, because doing so could result in values
411     // comparing
412     // as equal that hash differently in the min/max cases like:
413     // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
414     //   ^ hashes as min                  ^ would not hash as min
415     // In the context of the EarlyCSE pass, however, such cases never reach
416     // this code, as we simplify the double-negation before hashing the second
417     // select (and so still succeed at CSEing them).
418     if (LHSA == RHSB && LHSB == RHSA) {
419       CmpInst::Predicate PredL, PredR;
420       Value *X, *Y;
421       if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
422           match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
423           CmpInst::getInversePredicate(PredL) == PredR)
424         return true;
425     }
426   }
427 
428   return false;
429 }
430 
431 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
432   // These comparisons are nontrivial, so assert that equality implies
433   // hash equality (DenseMap demands this as an invariant).
434   bool Result = isEqualImpl(LHS, RHS);
435   assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
436          getHashValueImpl(LHS) == getHashValueImpl(RHS));
437   return Result;
438 }
439 
440 //===----------------------------------------------------------------------===//
441 // CallValue
442 //===----------------------------------------------------------------------===//
443 
444 namespace {
445 
446 /// Struct representing the available call values in the scoped hash
447 /// table.
448 struct CallValue {
449   Instruction *Inst;
450 
451   CallValue(Instruction *I) : Inst(I) {
452     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
453   }
454 
455   bool isSentinel() const {
456     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
457            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
458   }
459 
460   static bool canHandle(Instruction *Inst) {
461     // Don't value number anything that returns void.
462     if (Inst->getType()->isVoidTy())
463       return false;
464 
465     CallInst *CI = dyn_cast<CallInst>(Inst);
466     if (!CI || !CI->onlyReadsMemory())
467       return false;
468     return true;
469   }
470 };
471 
472 } // end anonymous namespace
473 
474 namespace llvm {
475 
476 template <> struct DenseMapInfo<CallValue> {
477   static inline CallValue getEmptyKey() {
478     return DenseMapInfo<Instruction *>::getEmptyKey();
479   }
480 
481   static inline CallValue getTombstoneKey() {
482     return DenseMapInfo<Instruction *>::getTombstoneKey();
483   }
484 
485   static unsigned getHashValue(CallValue Val);
486   static bool isEqual(CallValue LHS, CallValue RHS);
487 };
488 
489 } // end namespace llvm
490 
491 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
492   Instruction *Inst = Val.Inst;
493 
494   // Hash all of the operands as pointers and mix in the opcode.
495   return hash_combine(
496       Inst->getOpcode(),
497       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
498 }
499 
500 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
501   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
502   if (LHS.isSentinel() || RHS.isSentinel())
503     return LHSI == RHSI;
504 
505   return LHSI->isIdenticalTo(RHSI);
506 }
507 
508 //===----------------------------------------------------------------------===//
509 // EarlyCSE implementation
510 //===----------------------------------------------------------------------===//
511 
512 namespace {
513 
514 /// A simple and fast domtree-based CSE pass.
515 ///
516 /// This pass does a simple depth-first walk over the dominator tree,
517 /// eliminating trivially redundant instructions and using instsimplify to
518 /// canonicalize things as it goes. It is intended to be fast and catch obvious
519 /// cases so that instcombine and other passes are more effective. It is
520 /// expected that a later pass of GVN will catch the interesting/hard cases.
521 class EarlyCSE {
522 public:
523   const TargetLibraryInfo &TLI;
524   const TargetTransformInfo &TTI;
525   DominatorTree &DT;
526   AssumptionCache &AC;
527   const SimplifyQuery SQ;
528   MemorySSA *MSSA;
529   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
530 
531   using AllocatorTy =
532       RecyclingAllocator<BumpPtrAllocator,
533                          ScopedHashTableVal<SimpleValue, Value *>>;
534   using ScopedHTType =
535       ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
536                       AllocatorTy>;
537 
538   /// A scoped hash table of the current values of all of our simple
539   /// scalar expressions.
540   ///
541   /// As we walk down the domtree, we look to see if instructions are in this:
542   /// if so, we replace them with what we find, otherwise we insert them so
543   /// that dominated values can succeed in their lookup.
544   ScopedHTType AvailableValues;
545 
546   /// A scoped hash table of the current values of previously encountered
547   /// memory locations.
548   ///
549   /// This allows us to get efficient access to dominating loads or stores when
550   /// we have a fully redundant load.  In addition to the most recent load, we
551   /// keep track of a generation count of the read, which is compared against
552   /// the current generation count.  The current generation count is incremented
553   /// after every possibly writing memory operation, which ensures that we only
554   /// CSE loads with other loads that have no intervening store.  Ordering
555   /// events (such as fences or atomic instructions) increment the generation
556   /// count as well; essentially, we model these as writes to all possible
557   /// locations.  Note that atomic and/or volatile loads and stores can be
558   /// present the table; it is the responsibility of the consumer to inspect
559   /// the atomicity/volatility if needed.
560   struct LoadValue {
561     Instruction *DefInst = nullptr;
562     unsigned Generation = 0;
563     int MatchingId = -1;
564     bool IsAtomic = false;
565 
566     LoadValue() = default;
567     LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
568               bool IsAtomic)
569         : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
570           IsAtomic(IsAtomic) {}
571   };
572 
573   using LoadMapAllocator =
574       RecyclingAllocator<BumpPtrAllocator,
575                          ScopedHashTableVal<Value *, LoadValue>>;
576   using LoadHTType =
577       ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
578                       LoadMapAllocator>;
579 
580   LoadHTType AvailableLoads;
581 
582   // A scoped hash table mapping memory locations (represented as typed
583   // addresses) to generation numbers at which that memory location became
584   // (henceforth indefinitely) invariant.
585   using InvariantMapAllocator =
586       RecyclingAllocator<BumpPtrAllocator,
587                          ScopedHashTableVal<MemoryLocation, unsigned>>;
588   using InvariantHTType =
589       ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,
590                       InvariantMapAllocator>;
591   InvariantHTType AvailableInvariants;
592 
593   /// A scoped hash table of the current values of read-only call
594   /// values.
595   ///
596   /// It uses the same generation count as loads.
597   using CallHTType =
598       ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
599   CallHTType AvailableCalls;
600 
601   /// This is the current generation of the memory value.
602   unsigned CurrentGeneration = 0;
603 
604   /// Set up the EarlyCSE runner for a particular function.
605   EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
606            const TargetTransformInfo &TTI, DominatorTree &DT,
607            AssumptionCache &AC, MemorySSA *MSSA)
608       : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
609         MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
610 
611   bool run();
612 
613 private:
614   unsigned ClobberCounter = 0;
615   // Almost a POD, but needs to call the constructors for the scoped hash
616   // tables so that a new scope gets pushed on. These are RAII so that the
617   // scope gets popped when the NodeScope is destroyed.
618   class NodeScope {
619   public:
620     NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
621               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
622       : Scope(AvailableValues), LoadScope(AvailableLoads),
623         InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
624     NodeScope(const NodeScope &) = delete;
625     NodeScope &operator=(const NodeScope &) = delete;
626 
627   private:
628     ScopedHTType::ScopeTy Scope;
629     LoadHTType::ScopeTy LoadScope;
630     InvariantHTType::ScopeTy InvariantScope;
631     CallHTType::ScopeTy CallScope;
632   };
633 
634   // Contains all the needed information to create a stack for doing a depth
635   // first traversal of the tree. This includes scopes for values, loads, and
636   // calls as well as the generation. There is a child iterator so that the
637   // children do not need to be store separately.
638   class StackNode {
639   public:
640     StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
641               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
642               unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
643               DomTreeNode::const_iterator end)
644         : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
645           EndIter(end),
646           Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
647                  AvailableCalls)
648           {}
649     StackNode(const StackNode &) = delete;
650     StackNode &operator=(const StackNode &) = delete;
651 
652     // Accessors.
653     unsigned currentGeneration() const { return CurrentGeneration; }
654     unsigned childGeneration() const { return ChildGeneration; }
655     void childGeneration(unsigned generation) { ChildGeneration = generation; }
656     DomTreeNode *node() { return Node; }
657     DomTreeNode::const_iterator childIter() const { return ChildIter; }
658 
659     DomTreeNode *nextChild() {
660       DomTreeNode *child = *ChildIter;
661       ++ChildIter;
662       return child;
663     }
664 
665     DomTreeNode::const_iterator end() const { return EndIter; }
666     bool isProcessed() const { return Processed; }
667     void process() { Processed = true; }
668 
669   private:
670     unsigned CurrentGeneration;
671     unsigned ChildGeneration;
672     DomTreeNode *Node;
673     DomTreeNode::const_iterator ChildIter;
674     DomTreeNode::const_iterator EndIter;
675     NodeScope Scopes;
676     bool Processed = false;
677   };
678 
679   /// Wrapper class to handle memory instructions, including loads,
680   /// stores and intrinsic loads and stores defined by the target.
681   class ParseMemoryInst {
682   public:
683     ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
684       : Inst(Inst) {
685       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
686         IntrID = II->getIntrinsicID();
687         if (TTI.getTgtMemIntrinsic(II, Info))
688           return;
689         if (isHandledNonTargetIntrinsic(IntrID)) {
690           switch (IntrID) {
691           case Intrinsic::masked_load:
692             Info.PtrVal = Inst->getOperand(0);
693             Info.MatchingId = Intrinsic::masked_load;
694             Info.ReadMem = true;
695             Info.WriteMem = false;
696             Info.IsVolatile = false;
697             break;
698           case Intrinsic::masked_store:
699             Info.PtrVal = Inst->getOperand(1);
700             // Use the ID of masked load as the "matching id". This will
701             // prevent matching non-masked loads/stores with masked ones
702             // (which could be done), but at the moment, the code here
703             // does not support matching intrinsics with non-intrinsics,
704             // so keep the MatchingIds specific to masked instructions
705             // for now (TODO).
706             Info.MatchingId = Intrinsic::masked_load;
707             Info.ReadMem = false;
708             Info.WriteMem = true;
709             Info.IsVolatile = false;
710             break;
711           }
712         }
713       }
714     }
715 
716     Instruction *get() { return Inst; }
717     const Instruction *get() const { return Inst; }
718 
719     bool isLoad() const {
720       if (IntrID != 0)
721         return Info.ReadMem;
722       return isa<LoadInst>(Inst);
723     }
724 
725     bool isStore() const {
726       if (IntrID != 0)
727         return Info.WriteMem;
728       return isa<StoreInst>(Inst);
729     }
730 
731     bool isAtomic() const {
732       if (IntrID != 0)
733         return Info.Ordering != AtomicOrdering::NotAtomic;
734       return Inst->isAtomic();
735     }
736 
737     bool isUnordered() const {
738       if (IntrID != 0)
739         return Info.isUnordered();
740 
741       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
742         return LI->isUnordered();
743       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
744         return SI->isUnordered();
745       }
746       // Conservative answer
747       return !Inst->isAtomic();
748     }
749 
750     bool isVolatile() const {
751       if (IntrID != 0)
752         return Info.IsVolatile;
753 
754       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
755         return LI->isVolatile();
756       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
757         return SI->isVolatile();
758       }
759       // Conservative answer
760       return true;
761     }
762 
763     bool isInvariantLoad() const {
764       if (auto *LI = dyn_cast<LoadInst>(Inst))
765         return LI->hasMetadata(LLVMContext::MD_invariant_load);
766       return false;
767     }
768 
769     bool isValid() const { return getPointerOperand() != nullptr; }
770 
771     // For regular (non-intrinsic) loads/stores, this is set to -1. For
772     // intrinsic loads/stores, the id is retrieved from the corresponding
773     // field in the MemIntrinsicInfo structure.  That field contains
774     // non-negative values only.
775     int getMatchingId() const {
776       if (IntrID != 0)
777         return Info.MatchingId;
778       return -1;
779     }
780 
781     Value *getPointerOperand() const {
782       if (IntrID != 0)
783         return Info.PtrVal;
784       return getLoadStorePointerOperand(Inst);
785     }
786 
787     Type *getValueType() const {
788       // TODO: handle target-specific intrinsics.
789       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
790         switch (II->getIntrinsicID()) {
791         case Intrinsic::masked_load:
792           return II->getType();
793         case Intrinsic::masked_store:
794           return II->getArgOperand(0)->getType();
795         default:
796           return nullptr;
797         }
798       }
799       return getLoadStoreType(Inst);
800     }
801 
802     bool mayReadFromMemory() const {
803       if (IntrID != 0)
804         return Info.ReadMem;
805       return Inst->mayReadFromMemory();
806     }
807 
808     bool mayWriteToMemory() const {
809       if (IntrID != 0)
810         return Info.WriteMem;
811       return Inst->mayWriteToMemory();
812     }
813 
814   private:
815     Intrinsic::ID IntrID = 0;
816     MemIntrinsicInfo Info;
817     Instruction *Inst;
818   };
819 
820   // This function is to prevent accidentally passing a non-target
821   // intrinsic ID to TargetTransformInfo.
822   static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
823     switch (ID) {
824     case Intrinsic::masked_load:
825     case Intrinsic::masked_store:
826       return true;
827     }
828     return false;
829   }
830   static bool isHandledNonTargetIntrinsic(const Value *V) {
831     if (auto *II = dyn_cast<IntrinsicInst>(V))
832       return isHandledNonTargetIntrinsic(II->getIntrinsicID());
833     return false;
834   }
835 
836   bool processNode(DomTreeNode *Node);
837 
838   bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
839                              const BasicBlock *BB, const BasicBlock *Pred);
840 
841   Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
842                           unsigned CurrentGeneration);
843 
844   bool overridingStores(const ParseMemoryInst &Earlier,
845                         const ParseMemoryInst &Later);
846 
847   Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
848     // TODO: We could insert relevant casts on type mismatch here.
849     if (auto *LI = dyn_cast<LoadInst>(Inst))
850       return LI->getType() == ExpectedType ? LI : nullptr;
851     else if (auto *SI = dyn_cast<StoreInst>(Inst)) {
852       Value *V = SI->getValueOperand();
853       return V->getType() == ExpectedType ? V : nullptr;
854     }
855     assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
856     auto *II = cast<IntrinsicInst>(Inst);
857     if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
858       return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
859     return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
860   }
861 
862   Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
863                                                 Type *ExpectedType) const {
864     switch (II->getIntrinsicID()) {
865     case Intrinsic::masked_load:
866       return II;
867     case Intrinsic::masked_store:
868       return II->getOperand(0);
869     }
870     return nullptr;
871   }
872 
873   /// Return true if the instruction is known to only operate on memory
874   /// provably invariant in the given "generation".
875   bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
876 
877   bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
878                            Instruction *EarlierInst, Instruction *LaterInst);
879 
880   bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
881                                  const IntrinsicInst *Later) {
882     auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
883       // Is Mask0 a submask of Mask1?
884       if (Mask0 == Mask1)
885         return true;
886       if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
887         return false;
888       auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
889       auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
890       if (!Vec0 || !Vec1)
891         return false;
892       if (Vec0->getType() != Vec1->getType())
893         return false;
894       for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
895         Constant *Elem0 = Vec0->getOperand(i);
896         Constant *Elem1 = Vec1->getOperand(i);
897         auto *Int0 = dyn_cast<ConstantInt>(Elem0);
898         if (Int0 && Int0->isZero())
899           continue;
900         auto *Int1 = dyn_cast<ConstantInt>(Elem1);
901         if (Int1 && !Int1->isZero())
902           continue;
903         if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
904           return false;
905         if (Elem0 == Elem1)
906           continue;
907         return false;
908       }
909       return true;
910     };
911     auto PtrOp = [](const IntrinsicInst *II) {
912       if (II->getIntrinsicID() == Intrinsic::masked_load)
913         return II->getOperand(0);
914       if (II->getIntrinsicID() == Intrinsic::masked_store)
915         return II->getOperand(1);
916       llvm_unreachable("Unexpected IntrinsicInst");
917     };
918     auto MaskOp = [](const IntrinsicInst *II) {
919       if (II->getIntrinsicID() == Intrinsic::masked_load)
920         return II->getOperand(2);
921       if (II->getIntrinsicID() == Intrinsic::masked_store)
922         return II->getOperand(3);
923       llvm_unreachable("Unexpected IntrinsicInst");
924     };
925     auto ThruOp = [](const IntrinsicInst *II) {
926       if (II->getIntrinsicID() == Intrinsic::masked_load)
927         return II->getOperand(3);
928       llvm_unreachable("Unexpected IntrinsicInst");
929     };
930 
931     if (PtrOp(Earlier) != PtrOp(Later))
932       return false;
933 
934     Intrinsic::ID IDE = Earlier->getIntrinsicID();
935     Intrinsic::ID IDL = Later->getIntrinsicID();
936     // We could really use specific intrinsic classes for masked loads
937     // and stores in IntrinsicInst.h.
938     if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
939       // Trying to replace later masked load with the earlier one.
940       // Check that the pointers are the same, and
941       // - masks and pass-throughs are the same, or
942       // - replacee's pass-through is "undef" and replacer's mask is a
943       //   super-set of the replacee's mask.
944       if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
945         return true;
946       if (!isa<UndefValue>(ThruOp(Later)))
947         return false;
948       return IsSubmask(MaskOp(Later), MaskOp(Earlier));
949     }
950     if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
951       // Trying to replace a load of a stored value with the store's value.
952       // Check that the pointers are the same, and
953       // - load's mask is a subset of store's mask, and
954       // - load's pass-through is "undef".
955       if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
956         return false;
957       return isa<UndefValue>(ThruOp(Later));
958     }
959     if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
960       // Trying to remove a store of the loaded value.
961       // Check that the pointers are the same, and
962       // - store's mask is a subset of the load's mask.
963       return IsSubmask(MaskOp(Later), MaskOp(Earlier));
964     }
965     if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
966       // Trying to remove a dead store (earlier).
967       // Check that the pointers are the same,
968       // - the to-be-removed store's mask is a subset of the other store's
969       //   mask.
970       return IsSubmask(MaskOp(Earlier), MaskOp(Later));
971     }
972     return false;
973   }
974 
975   void removeMSSA(Instruction &Inst) {
976     if (!MSSA)
977       return;
978     if (VerifyMemorySSA)
979       MSSA->verifyMemorySSA();
980     // Removing a store here can leave MemorySSA in an unoptimized state by
981     // creating MemoryPhis that have identical arguments and by creating
982     // MemoryUses whose defining access is not an actual clobber. The phi case
983     // is handled by MemorySSA when passing OptimizePhis = true to
984     // removeMemoryAccess.  The non-optimized MemoryUse case is lazily updated
985     // by MemorySSA's getClobberingMemoryAccess.
986     MSSAUpdater->removeMemoryAccess(&Inst, true);
987   }
988 };
989 
990 } // end anonymous namespace
991 
992 /// Determine if the memory referenced by LaterInst is from the same heap
993 /// version as EarlierInst.
994 /// This is currently called in two scenarios:
995 ///
996 ///   load p
997 ///   ...
998 ///   load p
999 ///
1000 /// and
1001 ///
1002 ///   x = load p
1003 ///   ...
1004 ///   store x, p
1005 ///
1006 /// in both cases we want to verify that there are no possible writes to the
1007 /// memory referenced by p between the earlier and later instruction.
1008 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
1009                                    unsigned LaterGeneration,
1010                                    Instruction *EarlierInst,
1011                                    Instruction *LaterInst) {
1012   // Check the simple memory generation tracking first.
1013   if (EarlierGeneration == LaterGeneration)
1014     return true;
1015 
1016   if (!MSSA)
1017     return false;
1018 
1019   // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1020   // read/write memory, then we can safely return true here.
1021   // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1022   // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1023   // by also checking the MemorySSA MemoryAccess on the instruction.  Initial
1024   // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1025   // with the default optimization pipeline.
1026   auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
1027   if (!EarlierMA)
1028     return true;
1029   auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
1030   if (!LaterMA)
1031     return true;
1032 
1033   // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1034   // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1035   // EarlierInst and LaterInst and neither can any other write that potentially
1036   // clobbers LaterInst.
1037   MemoryAccess *LaterDef;
1038   if (ClobberCounter < EarlyCSEMssaOptCap) {
1039     LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
1040     ClobberCounter++;
1041   } else
1042     LaterDef = LaterMA->getDefiningAccess();
1043 
1044   return MSSA->dominates(LaterDef, EarlierMA);
1045 }
1046 
1047 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
1048   // A location loaded from with an invariant_load is assumed to *never* change
1049   // within the visible scope of the compilation.
1050   if (auto *LI = dyn_cast<LoadInst>(I))
1051     if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1052       return true;
1053 
1054   auto MemLocOpt = MemoryLocation::getOrNone(I);
1055   if (!MemLocOpt)
1056     // "target" intrinsic forms of loads aren't currently known to
1057     // MemoryLocation::get.  TODO
1058     return false;
1059   MemoryLocation MemLoc = *MemLocOpt;
1060   if (!AvailableInvariants.count(MemLoc))
1061     return false;
1062 
1063   // Is the generation at which this became invariant older than the
1064   // current one?
1065   return AvailableInvariants.lookup(MemLoc) <= GenAt;
1066 }
1067 
1068 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
1069                                      const BranchInst *BI, const BasicBlock *BB,
1070                                      const BasicBlock *Pred) {
1071   assert(BI->isConditional() && "Should be a conditional branch!");
1072   assert(BI->getCondition() == CondInst && "Wrong condition?");
1073   assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
1074   auto *TorF = (BI->getSuccessor(0) == BB)
1075                    ? ConstantInt::getTrue(BB->getContext())
1076                    : ConstantInt::getFalse(BB->getContext());
1077   auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
1078                        Value *&RHS) {
1079     if (Opcode == Instruction::And &&
1080         match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1081       return true;
1082     else if (Opcode == Instruction::Or &&
1083              match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1084       return true;
1085     return false;
1086   };
1087   // If the condition is AND operation, we can propagate its operands into the
1088   // true branch. If it is OR operation, we can propagate them into the false
1089   // branch.
1090   unsigned PropagateOpcode =
1091       (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1092 
1093   bool MadeChanges = false;
1094   SmallVector<Instruction *, 4> WorkList;
1095   SmallPtrSet<Instruction *, 4> Visited;
1096   WorkList.push_back(CondInst);
1097   while (!WorkList.empty()) {
1098     Instruction *Curr = WorkList.pop_back_val();
1099 
1100     AvailableValues.insert(Curr, TorF);
1101     LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1102                       << Curr->getName() << "' as " << *TorF << " in "
1103                       << BB->getName() << "\n");
1104     if (!DebugCounter::shouldExecute(CSECounter)) {
1105       LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1106     } else {
1107       // Replace all dominated uses with the known value.
1108       if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
1109                                                     BasicBlockEdge(Pred, BB))) {
1110         NumCSECVP += Count;
1111         MadeChanges = true;
1112       }
1113     }
1114 
1115     Value *LHS, *RHS;
1116     if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1117       for (auto *Op : { LHS, RHS })
1118         if (Instruction *OPI = dyn_cast<Instruction>(Op))
1119           if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
1120             WorkList.push_back(OPI);
1121   }
1122 
1123   return MadeChanges;
1124 }
1125 
1126 Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
1127                                   unsigned CurrentGeneration) {
1128   if (InVal.DefInst == nullptr)
1129     return nullptr;
1130   if (InVal.MatchingId != MemInst.getMatchingId())
1131     return nullptr;
1132   // We don't yet handle removing loads with ordering of any kind.
1133   if (MemInst.isVolatile() || !MemInst.isUnordered())
1134     return nullptr;
1135   // We can't replace an atomic load with one which isn't also atomic.
1136   if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1137     return nullptr;
1138   // The value V returned from this function is used differently depending
1139   // on whether MemInst is a load or a store. If it's a load, we will replace
1140   // MemInst with V, if it's a store, we will check if V is the same as the
1141   // available value.
1142   bool MemInstMatching = !MemInst.isLoad();
1143   Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1144   Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
1145 
1146   // For stores check the result values before checking memory generation
1147   // (otherwise isSameMemGeneration may crash).
1148   Value *Result = MemInst.isStore()
1149                       ? getOrCreateResult(Matching, Other->getType())
1150                       : nullptr;
1151   if (MemInst.isStore() && InVal.DefInst != Result)
1152     return nullptr;
1153 
1154   // Deal with non-target memory intrinsics.
1155   bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1156   bool OtherNTI = isHandledNonTargetIntrinsic(Other);
1157   if (OtherNTI != MatchingNTI)
1158     return nullptr;
1159   if (OtherNTI && MatchingNTI) {
1160     if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1161                                    cast<IntrinsicInst>(MemInst.get())))
1162       return nullptr;
1163   }
1164 
1165   if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1166       !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1167                            MemInst.get()))
1168     return nullptr;
1169 
1170   if (!Result)
1171     Result = getOrCreateResult(Matching, Other->getType());
1172   return Result;
1173 }
1174 
1175 bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
1176                                 const ParseMemoryInst &Later) {
1177   // Can we remove Earlier store because of Later store?
1178 
1179   assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1180          "Violated invariant");
1181   if (Earlier.getPointerOperand() != Later.getPointerOperand())
1182     return false;
1183   if (!Earlier.getValueType() || !Later.getValueType() ||
1184       Earlier.getValueType() != Later.getValueType())
1185     return false;
1186   if (Earlier.getMatchingId() != Later.getMatchingId())
1187     return false;
1188   // At the moment, we don't remove ordered stores, but do remove
1189   // unordered atomic stores.  There's no special requirement (for
1190   // unordered atomics) about removing atomic stores only in favor of
1191   // other atomic stores since we were going to execute the non-atomic
1192   // one anyway and the atomic one might never have become visible.
1193   if (!Earlier.isUnordered() || !Later.isUnordered())
1194     return false;
1195 
1196   // Deal with non-target memory intrinsics.
1197   bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1198   bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1199   if (ENTI && LNTI)
1200     return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1201                                      cast<IntrinsicInst>(Later.get()));
1202 
1203   // Because of the check above, at least one of them is false.
1204   // For now disallow matching intrinsics with non-intrinsics,
1205   // so assume that the stores match if neither is an intrinsic.
1206   return ENTI == LNTI;
1207 }
1208 
1209 bool EarlyCSE::processNode(DomTreeNode *Node) {
1210   bool Changed = false;
1211   BasicBlock *BB = Node->getBlock();
1212 
1213   // If this block has a single predecessor, then the predecessor is the parent
1214   // of the domtree node and all of the live out memory values are still current
1215   // in this block.  If this block has multiple predecessors, then they could
1216   // have invalidated the live-out memory values of our parent value.  For now,
1217   // just be conservative and invalidate memory if this block has multiple
1218   // predecessors.
1219   if (!BB->getSinglePredecessor())
1220     ++CurrentGeneration;
1221 
1222   // If this node has a single predecessor which ends in a conditional branch,
1223   // we can infer the value of the branch condition given that we took this
1224   // path.  We need the single predecessor to ensure there's not another path
1225   // which reaches this block where the condition might hold a different
1226   // value.  Since we're adding this to the scoped hash table (like any other
1227   // def), it will have been popped if we encounter a future merge block.
1228   if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1229     auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1230     if (BI && BI->isConditional()) {
1231       auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
1232       if (CondInst && SimpleValue::canHandle(CondInst))
1233         Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1234     }
1235   }
1236 
1237   /// LastStore - Keep track of the last non-volatile store that we saw... for
1238   /// as long as there in no instruction that reads memory.  If we see a store
1239   /// to the same location, we delete the dead store.  This zaps trivial dead
1240   /// stores which can occur in bitfield code among other things.
1241   Instruction *LastStore = nullptr;
1242 
1243   // See if any instructions in the block can be eliminated.  If so, do it.  If
1244   // not, add them to AvailableValues.
1245   for (Instruction &Inst : make_early_inc_range(BB->getInstList())) {
1246     // Dead instructions should just be removed.
1247     if (isInstructionTriviallyDead(&Inst, &TLI)) {
1248       LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
1249       if (!DebugCounter::shouldExecute(CSECounter)) {
1250         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1251         continue;
1252       }
1253 
1254       salvageKnowledge(&Inst, &AC);
1255       salvageDebugInfo(Inst);
1256       removeMSSA(Inst);
1257       Inst.eraseFromParent();
1258       Changed = true;
1259       ++NumSimplify;
1260       continue;
1261     }
1262 
1263     // Skip assume intrinsics, they don't really have side effects (although
1264     // they're marked as such to ensure preservation of control dependencies),
1265     // and this pass will not bother with its removal. However, we should mark
1266     // its condition as true for all dominated blocks.
1267     if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1268       auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
1269       if (CondI && SimpleValue::canHandle(CondI)) {
1270         LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1271                           << '\n');
1272         AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1273       } else
1274         LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
1275       continue;
1276     }
1277 
1278     // Likewise, noalias intrinsics don't actually write.
1279     if (match(&Inst,
1280               m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
1281       LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1282                         << '\n');
1283       continue;
1284     }
1285 
1286     // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1287     if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
1288       LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
1289       continue;
1290     }
1291 
1292     // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1293     if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
1294       LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
1295       continue;
1296     }
1297 
1298     // We can skip all invariant.start intrinsics since they only read memory,
1299     // and we can forward values across it. For invariant starts without
1300     // invariant ends, we can use the fact that the invariantness never ends to
1301     // start a scope in the current generaton which is true for all future
1302     // generations.  Also, we dont need to consume the last store since the
1303     // semantics of invariant.start allow us to perform   DSE of the last
1304     // store, if there was a store following invariant.start. Consider:
1305     //
1306     // store 30, i8* p
1307     // invariant.start(p)
1308     // store 40, i8* p
1309     // We can DSE the store to 30, since the store 40 to invariant location p
1310     // causes undefined behaviour.
1311     if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
1312       // If there are any uses, the scope might end.
1313       if (!Inst.use_empty())
1314         continue;
1315       MemoryLocation MemLoc =
1316           MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);
1317       // Don't start a scope if we already have a better one pushed
1318       if (!AvailableInvariants.count(MemLoc))
1319         AvailableInvariants.insert(MemLoc, CurrentGeneration);
1320       continue;
1321     }
1322 
1323     if (isGuard(&Inst)) {
1324       if (auto *CondI =
1325               dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1326         if (SimpleValue::canHandle(CondI)) {
1327           // Do we already know the actual value of this condition?
1328           if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1329             // Is the condition known to be true?
1330             if (isa<ConstantInt>(KnownCond) &&
1331                 cast<ConstantInt>(KnownCond)->isOne()) {
1332               LLVM_DEBUG(dbgs()
1333                          << "EarlyCSE removing guard: " << Inst << '\n');
1334               salvageKnowledge(&Inst, &AC);
1335               removeMSSA(Inst);
1336               Inst.eraseFromParent();
1337               Changed = true;
1338               continue;
1339             } else
1340               // Use the known value if it wasn't true.
1341               cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1342           }
1343           // The condition we're on guarding here is true for all dominated
1344           // locations.
1345           AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1346         }
1347       }
1348 
1349       // Guard intrinsics read all memory, but don't write any memory.
1350       // Accordingly, don't update the generation but consume the last store (to
1351       // avoid an incorrect DSE).
1352       LastStore = nullptr;
1353       continue;
1354     }
1355 
1356     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1357     // its simpler value.
1358     if (Value *V = simplifyInstruction(&Inst, SQ)) {
1359       LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << "  to: " << *V
1360                         << '\n');
1361       if (!DebugCounter::shouldExecute(CSECounter)) {
1362         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1363       } else {
1364         bool Killed = false;
1365         if (!Inst.use_empty()) {
1366           Inst.replaceAllUsesWith(V);
1367           Changed = true;
1368         }
1369         if (isInstructionTriviallyDead(&Inst, &TLI)) {
1370           salvageKnowledge(&Inst, &AC);
1371           removeMSSA(Inst);
1372           Inst.eraseFromParent();
1373           Changed = true;
1374           Killed = true;
1375         }
1376         if (Changed)
1377           ++NumSimplify;
1378         if (Killed)
1379           continue;
1380       }
1381     }
1382 
1383     // If this is a simple instruction that we can value number, process it.
1384     if (SimpleValue::canHandle(&Inst)) {
1385       if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1386         assert(CI->getExceptionBehavior() != fp::ebStrict &&
1387                "Unexpected ebStrict from SimpleValue::canHandle()");
1388         assert((!CI->getRoundingMode() ||
1389                 CI->getRoundingMode() != RoundingMode::Dynamic) &&
1390                "Unexpected dynamic rounding from SimpleValue::canHandle()");
1391       }
1392       // See if the instruction has an available value.  If so, use it.
1393       if (Value *V = AvailableValues.lookup(&Inst)) {
1394         LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << "  to: " << *V
1395                           << '\n');
1396         if (!DebugCounter::shouldExecute(CSECounter)) {
1397           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1398           continue;
1399         }
1400         if (auto *I = dyn_cast<Instruction>(V)) {
1401           // If I being poison triggers UB, there is no need to drop those
1402           // flags. Otherwise, only retain flags present on both I and Inst.
1403           // TODO: Currently some fast-math flags are not treated as
1404           // poison-generating even though they should. Until this is fixed,
1405           // always retain flags present on both I and Inst for floating point
1406           // instructions.
1407           if (isa<FPMathOperator>(I) || (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
1408             I->andIRFlags(&Inst);
1409         }
1410         Inst.replaceAllUsesWith(V);
1411         salvageKnowledge(&Inst, &AC);
1412         removeMSSA(Inst);
1413         Inst.eraseFromParent();
1414         Changed = true;
1415         ++NumCSE;
1416         continue;
1417       }
1418 
1419       // Otherwise, just remember that this value is available.
1420       AvailableValues.insert(&Inst, &Inst);
1421       continue;
1422     }
1423 
1424     ParseMemoryInst MemInst(&Inst, TTI);
1425     // If this is a non-volatile load, process it.
1426     if (MemInst.isValid() && MemInst.isLoad()) {
1427       // (conservatively) we can't peak past the ordering implied by this
1428       // operation, but we can add this load to our set of available values
1429       if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1430         LastStore = nullptr;
1431         ++CurrentGeneration;
1432       }
1433 
1434       if (MemInst.isInvariantLoad()) {
1435         // If we pass an invariant load, we know that memory location is
1436         // indefinitely constant from the moment of first dereferenceability.
1437         // We conservatively treat the invariant_load as that moment.  If we
1438         // pass a invariant load after already establishing a scope, don't
1439         // restart it since we want to preserve the earliest point seen.
1440         auto MemLoc = MemoryLocation::get(&Inst);
1441         if (!AvailableInvariants.count(MemLoc))
1442           AvailableInvariants.insert(MemLoc, CurrentGeneration);
1443       }
1444 
1445       // If we have an available version of this load, and if it is the right
1446       // generation or the load is known to be from an invariant location,
1447       // replace this instruction.
1448       //
1449       // If either the dominating load or the current load are invariant, then
1450       // we can assume the current load loads the same value as the dominating
1451       // load.
1452       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1453       if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1454         LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1455                           << "  to: " << *InVal.DefInst << '\n');
1456         if (!DebugCounter::shouldExecute(CSECounter)) {
1457           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1458           continue;
1459         }
1460         if (!Inst.use_empty())
1461           Inst.replaceAllUsesWith(Op);
1462         salvageKnowledge(&Inst, &AC);
1463         removeMSSA(Inst);
1464         Inst.eraseFromParent();
1465         Changed = true;
1466         ++NumCSELoad;
1467         continue;
1468       }
1469 
1470       // Otherwise, remember that we have this instruction.
1471       AvailableLoads.insert(MemInst.getPointerOperand(),
1472                             LoadValue(&Inst, CurrentGeneration,
1473                                       MemInst.getMatchingId(),
1474                                       MemInst.isAtomic()));
1475       LastStore = nullptr;
1476       continue;
1477     }
1478 
1479     // If this instruction may read from memory or throw (and potentially read
1480     // from memory in the exception handler), forget LastStore.  Load/store
1481     // intrinsics will indicate both a read and a write to memory.  The target
1482     // may override this (e.g. so that a store intrinsic does not read from
1483     // memory, and thus will be treated the same as a regular store for
1484     // commoning purposes).
1485     if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&
1486         !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1487       LastStore = nullptr;
1488 
1489     // If this is a read-only call, process it.
1490     if (CallValue::canHandle(&Inst)) {
1491       // If we have an available version of this call, and if it is the right
1492       // generation, replace this instruction.
1493       std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1494       if (InVal.first != nullptr &&
1495           isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1496                               &Inst)) {
1497         LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1498                           << "  to: " << *InVal.first << '\n');
1499         if (!DebugCounter::shouldExecute(CSECounter)) {
1500           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1501           continue;
1502         }
1503         if (!Inst.use_empty())
1504           Inst.replaceAllUsesWith(InVal.first);
1505         salvageKnowledge(&Inst, &AC);
1506         removeMSSA(Inst);
1507         Inst.eraseFromParent();
1508         Changed = true;
1509         ++NumCSECall;
1510         continue;
1511       }
1512 
1513       // Otherwise, remember that we have this instruction.
1514       AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1515       continue;
1516     }
1517 
1518     // A release fence requires that all stores complete before it, but does
1519     // not prevent the reordering of following loads 'before' the fence.  As a
1520     // result, we don't need to consider it as writing to memory and don't need
1521     // to advance the generation.  We do need to prevent DSE across the fence,
1522     // but that's handled above.
1523     if (auto *FI = dyn_cast<FenceInst>(&Inst))
1524       if (FI->getOrdering() == AtomicOrdering::Release) {
1525         assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
1526         continue;
1527       }
1528 
1529     // write back DSE - If we write back the same value we just loaded from
1530     // the same location and haven't passed any intervening writes or ordering
1531     // operations, we can remove the write.  The primary benefit is in allowing
1532     // the available load table to remain valid and value forward past where
1533     // the store originally was.
1534     if (MemInst.isValid() && MemInst.isStore()) {
1535       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1536       if (InVal.DefInst &&
1537           InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1538         // It is okay to have a LastStore to a different pointer here if MemorySSA
1539         // tells us that the load and store are from the same memory generation.
1540         // In that case, LastStore should keep its present value since we're
1541         // removing the current store.
1542         assert((!LastStore ||
1543                 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1544                     MemInst.getPointerOperand() ||
1545                 MSSA) &&
1546                "can't have an intervening store if not using MemorySSA!");
1547         LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
1548         if (!DebugCounter::shouldExecute(CSECounter)) {
1549           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1550           continue;
1551         }
1552         salvageKnowledge(&Inst, &AC);
1553         removeMSSA(Inst);
1554         Inst.eraseFromParent();
1555         Changed = true;
1556         ++NumDSE;
1557         // We can avoid incrementing the generation count since we were able
1558         // to eliminate this store.
1559         continue;
1560       }
1561     }
1562 
1563     // Okay, this isn't something we can CSE at all.  Check to see if it is
1564     // something that could modify memory.  If so, our available memory values
1565     // cannot be used so bump the generation count.
1566     if (Inst.mayWriteToMemory()) {
1567       ++CurrentGeneration;
1568 
1569       if (MemInst.isValid() && MemInst.isStore()) {
1570         // We do a trivial form of DSE if there are two stores to the same
1571         // location with no intervening loads.  Delete the earlier store.
1572         if (LastStore) {
1573           if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
1574             LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1575                               << "  due to: " << Inst << '\n');
1576             if (!DebugCounter::shouldExecute(CSECounter)) {
1577               LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1578             } else {
1579               salvageKnowledge(&Inst, &AC);
1580               removeMSSA(*LastStore);
1581               LastStore->eraseFromParent();
1582               Changed = true;
1583               ++NumDSE;
1584               LastStore = nullptr;
1585             }
1586           }
1587           // fallthrough - we can exploit information about this store
1588         }
1589 
1590         // Okay, we just invalidated anything we knew about loaded values.  Try
1591         // to salvage *something* by remembering that the stored value is a live
1592         // version of the pointer.  It is safe to forward from volatile stores
1593         // to non-volatile loads, so we don't have to check for volatility of
1594         // the store.
1595         AvailableLoads.insert(MemInst.getPointerOperand(),
1596                               LoadValue(&Inst, CurrentGeneration,
1597                                         MemInst.getMatchingId(),
1598                                         MemInst.isAtomic()));
1599 
1600         // Remember that this was the last unordered store we saw for DSE. We
1601         // don't yet handle DSE on ordered or volatile stores since we don't
1602         // have a good way to model the ordering requirement for following
1603         // passes  once the store is removed.  We could insert a fence, but
1604         // since fences are slightly stronger than stores in their ordering,
1605         // it's not clear this is a profitable transform. Another option would
1606         // be to merge the ordering with that of the post dominating store.
1607         if (MemInst.isUnordered() && !MemInst.isVolatile())
1608           LastStore = &Inst;
1609         else
1610           LastStore = nullptr;
1611       }
1612     }
1613   }
1614 
1615   return Changed;
1616 }
1617 
1618 bool EarlyCSE::run() {
1619   // Note, deque is being used here because there is significant performance
1620   // gains over vector when the container becomes very large due to the
1621   // specific access patterns. For more information see the mailing list
1622   // discussion on this:
1623   // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1624   std::deque<StackNode *> nodesToProcess;
1625 
1626   bool Changed = false;
1627 
1628   // Process the root node.
1629   nodesToProcess.push_back(new StackNode(
1630       AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1631       CurrentGeneration, DT.getRootNode(),
1632       DT.getRootNode()->begin(), DT.getRootNode()->end()));
1633 
1634   assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1635 
1636   // Process the stack.
1637   while (!nodesToProcess.empty()) {
1638     // Grab the first item off the stack. Set the current generation, remove
1639     // the node from the stack, and process it.
1640     StackNode *NodeToProcess = nodesToProcess.back();
1641 
1642     // Initialize class members.
1643     CurrentGeneration = NodeToProcess->currentGeneration();
1644 
1645     // Check if the node needs to be processed.
1646     if (!NodeToProcess->isProcessed()) {
1647       // Process the node.
1648       Changed |= processNode(NodeToProcess->node());
1649       NodeToProcess->childGeneration(CurrentGeneration);
1650       NodeToProcess->process();
1651     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1652       // Push the next child onto the stack.
1653       DomTreeNode *child = NodeToProcess->nextChild();
1654       nodesToProcess.push_back(
1655           new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1656                         AvailableCalls, NodeToProcess->childGeneration(),
1657                         child, child->begin(), child->end()));
1658     } else {
1659       // It has been processed, and there are no more children to process,
1660       // so delete it and pop it off the stack.
1661       delete NodeToProcess;
1662       nodesToProcess.pop_back();
1663     }
1664   } // while (!nodes...)
1665 
1666   return Changed;
1667 }
1668 
1669 PreservedAnalyses EarlyCSEPass::run(Function &F,
1670                                     FunctionAnalysisManager &AM) {
1671   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1672   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1673   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1674   auto &AC = AM.getResult<AssumptionAnalysis>(F);
1675   auto *MSSA =
1676       UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1677 
1678   EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1679 
1680   if (!CSE.run())
1681     return PreservedAnalyses::all();
1682 
1683   PreservedAnalyses PA;
1684   PA.preserveSet<CFGAnalyses>();
1685   if (UseMemorySSA)
1686     PA.preserve<MemorySSAAnalysis>();
1687   return PA;
1688 }
1689 
1690 void EarlyCSEPass::printPipeline(
1691     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1692   static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
1693       OS, MapClassName2PassName);
1694   OS << "<";
1695   if (UseMemorySSA)
1696     OS << "memssa";
1697   OS << ">";
1698 }
1699 
1700 namespace {
1701 
1702 /// A simple and fast domtree-based CSE pass.
1703 ///
1704 /// This pass does a simple depth-first walk over the dominator tree,
1705 /// eliminating trivially redundant instructions and using instsimplify to
1706 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1707 /// cases so that instcombine and other passes are more effective. It is
1708 /// expected that a later pass of GVN will catch the interesting/hard cases.
1709 template<bool UseMemorySSA>
1710 class EarlyCSELegacyCommonPass : public FunctionPass {
1711 public:
1712   static char ID;
1713 
1714   EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1715     if (UseMemorySSA)
1716       initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
1717     else
1718       initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
1719   }
1720 
1721   bool runOnFunction(Function &F) override {
1722     if (skipFunction(F))
1723       return false;
1724 
1725     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1726     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1727     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1728     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1729     auto *MSSA =
1730         UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1731 
1732     EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1733 
1734     return CSE.run();
1735   }
1736 
1737   void getAnalysisUsage(AnalysisUsage &AU) const override {
1738     AU.addRequired<AssumptionCacheTracker>();
1739     AU.addRequired<DominatorTreeWrapperPass>();
1740     AU.addRequired<TargetLibraryInfoWrapperPass>();
1741     AU.addRequired<TargetTransformInfoWrapperPass>();
1742     if (UseMemorySSA) {
1743       AU.addRequired<AAResultsWrapperPass>();
1744       AU.addRequired<MemorySSAWrapperPass>();
1745       AU.addPreserved<MemorySSAWrapperPass>();
1746     }
1747     AU.addPreserved<GlobalsAAWrapperPass>();
1748     AU.addPreserved<AAResultsWrapperPass>();
1749     AU.setPreservesCFG();
1750   }
1751 };
1752 
1753 } // end anonymous namespace
1754 
1755 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1756 
1757 template<>
1758 char EarlyCSELegacyPass::ID = 0;
1759 
1760 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1761                       false)
1762 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1763 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1764 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1765 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1766 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1767 
1768 using EarlyCSEMemSSALegacyPass =
1769     EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1770 
1771 template<>
1772 char EarlyCSEMemSSALegacyPass::ID = 0;
1773 
1774 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1775   if (UseMemorySSA)
1776     return new EarlyCSEMemSSALegacyPass();
1777   else
1778     return new EarlyCSELegacyPass();
1779 }
1780 
1781 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1782                       "Early CSE w/ MemorySSA", false, false)
1783 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1784 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1785 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1786 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1787 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1788 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1789 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1790                     "Early CSE w/ MemorySSA", false, false)
1791