xref: /llvm-project/llvm/lib/Transforms/Scalar/EarlyCSE.cpp (revision 5528388e3664c6d7d292f20a739f1bf1c8ef768d)
1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/ScopedHashTable.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/AssumptionCache.h"
22 #include "llvm/Analysis/GlobalsModRef.h"
23 #include "llvm/Analysis/GuardUtils.h"
24 #include "llvm/Analysis/InstructionSimplify.h"
25 #include "llvm/Analysis/MemorySSA.h"
26 #include "llvm/Analysis/MemorySSAUpdater.h"
27 #include "llvm/Analysis/TargetLibraryInfo.h"
28 #include "llvm/Analysis/TargetTransformInfo.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/InstrTypes.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/LLVMContext.h"
39 #include "llvm/IR/PassManager.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Type.h"
42 #include "llvm/IR/Value.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Allocator.h"
46 #include "llvm/Support/AtomicOrdering.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/DebugCounter.h"
50 #include "llvm/Support/RecyclingAllocator.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include "llvm/Transforms/Scalar.h"
53 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
54 #include "llvm/Transforms/Utils/Local.h"
55 #include <cassert>
56 #include <deque>
57 #include <memory>
58 #include <utility>
59 
60 using namespace llvm;
61 using namespace llvm::PatternMatch;
62 
63 #define DEBUG_TYPE "early-cse"
64 
65 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
66 STATISTIC(NumCSE,      "Number of instructions CSE'd");
67 STATISTIC(NumCSECVP,   "Number of compare instructions CVP'd");
68 STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
69 STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
70 STATISTIC(NumCSEGEP, "Number of GEP instructions CSE'd");
71 STATISTIC(NumDSE,      "Number of trivial dead stores removed");
72 
73 DEBUG_COUNTER(CSECounter, "early-cse",
74               "Controls which instructions are removed");
75 
76 static cl::opt<unsigned> EarlyCSEMssaOptCap(
77     "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
78     cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
79              "for faster compile. Caps the MemorySSA clobbering calls."));
80 
81 static cl::opt<bool> EarlyCSEDebugHash(
82     "earlycse-debug-hash", cl::init(false), cl::Hidden,
83     cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
84              "function is well-behaved w.r.t. its isEqual predicate"));
85 
86 //===----------------------------------------------------------------------===//
87 // SimpleValue
88 //===----------------------------------------------------------------------===//
89 
90 namespace {
91 
92 /// Struct representing the available values in the scoped hash table.
93 struct SimpleValue {
94   Instruction *Inst;
95 
96   SimpleValue(Instruction *I) : Inst(I) {
97     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
98   }
99 
100   bool isSentinel() const {
101     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
102            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
103   }
104 
105   static bool canHandle(Instruction *Inst) {
106     // This can only handle non-void readnone functions.
107     // Also handled are constrained intrinsic that look like the types
108     // of instruction handled below (UnaryOperator, etc.).
109     if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
110       if (Function *F = CI->getCalledFunction()) {
111         switch ((Intrinsic::ID)F->getIntrinsicID()) {
112         case Intrinsic::experimental_constrained_fadd:
113         case Intrinsic::experimental_constrained_fsub:
114         case Intrinsic::experimental_constrained_fmul:
115         case Intrinsic::experimental_constrained_fdiv:
116         case Intrinsic::experimental_constrained_frem:
117         case Intrinsic::experimental_constrained_fptosi:
118         case Intrinsic::experimental_constrained_sitofp:
119         case Intrinsic::experimental_constrained_fptoui:
120         case Intrinsic::experimental_constrained_uitofp:
121         case Intrinsic::experimental_constrained_fcmp:
122         case Intrinsic::experimental_constrained_fcmps: {
123           auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
124           if (CFP->getExceptionBehavior() &&
125               CFP->getExceptionBehavior() == fp::ebStrict)
126             return false;
127           // Since we CSE across function calls we must not allow
128           // the rounding mode to change.
129           if (CFP->getRoundingMode() &&
130               CFP->getRoundingMode() == RoundingMode::Dynamic)
131             return false;
132           return true;
133         }
134         }
135       }
136       return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
137              // FIXME: Currently the calls which may access the thread id may
138              // be considered as not accessing the memory. But this is
139              // problematic for coroutines, since coroutines may resume in a
140              // different thread. So we disable the optimization here for the
141              // correctness. However, it may block many other correct
142              // optimizations. Revert this one when we detect the memory
143              // accessing kind more precisely.
144              !CI->getFunction()->isPresplitCoroutine();
145     }
146     return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
147            isa<BinaryOperator>(Inst) || isa<CmpInst>(Inst) ||
148            isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
149            isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
150            isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst) ||
151            isa<FreezeInst>(Inst);
152   }
153 };
154 
155 } // end anonymous namespace
156 
157 namespace llvm {
158 
159 template <> struct DenseMapInfo<SimpleValue> {
160   static inline SimpleValue getEmptyKey() {
161     return DenseMapInfo<Instruction *>::getEmptyKey();
162   }
163 
164   static inline SimpleValue getTombstoneKey() {
165     return DenseMapInfo<Instruction *>::getTombstoneKey();
166   }
167 
168   static unsigned getHashValue(SimpleValue Val);
169   static bool isEqual(SimpleValue LHS, SimpleValue RHS);
170 };
171 
172 } // end namespace llvm
173 
174 /// Match a 'select' including an optional 'not's of the condition.
175 static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
176                                            Value *&B,
177                                            SelectPatternFlavor &Flavor) {
178   // Return false if V is not even a select.
179   if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
180     return false;
181 
182   // Look through a 'not' of the condition operand by swapping A/B.
183   Value *CondNot;
184   if (match(Cond, m_Not(m_Value(CondNot)))) {
185     Cond = CondNot;
186     std::swap(A, B);
187   }
188 
189   // Match canonical forms of min/max. We are not using ValueTracking's
190   // more powerful matchSelectPattern() because it may rely on instruction flags
191   // such as "nsw". That would be incompatible with the current hashing
192   // mechanism that may remove flags to increase the likelihood of CSE.
193 
194   Flavor = SPF_UNKNOWN;
195   CmpPredicate Pred;
196 
197   if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
198     // Check for commuted variants of min/max by swapping predicate.
199     // If we do not match the standard or commuted patterns, this is not a
200     // recognized form of min/max, but it is still a select, so return true.
201     if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
202       return true;
203     Pred = ICmpInst::getSwappedPredicate(Pred);
204   }
205 
206   switch (Pred) {
207   case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
208   case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
209   case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
210   case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
211   // Non-strict inequalities.
212   case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
213   case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
214   case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
215   case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
216   default: break;
217   }
218 
219   return true;
220 }
221 
222 static unsigned hashCallInst(CallInst *CI) {
223   // Don't CSE convergent calls in different basic blocks, because they
224   // implicitly depend on the set of threads that is currently executing.
225   if (CI->isConvergent()) {
226     return hash_combine(
227         CI->getOpcode(), CI->getParent(),
228         hash_combine_range(CI->value_op_begin(), CI->value_op_end()));
229   }
230   return hash_combine(
231       CI->getOpcode(),
232       hash_combine_range(CI->value_op_begin(), CI->value_op_end()));
233 }
234 
235 static unsigned getHashValueImpl(SimpleValue Val) {
236   Instruction *Inst = Val.Inst;
237   // Hash in all of the operands as pointers.
238   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
239     Value *LHS = BinOp->getOperand(0);
240     Value *RHS = BinOp->getOperand(1);
241     if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
242       std::swap(LHS, RHS);
243 
244     return hash_combine(BinOp->getOpcode(), LHS, RHS);
245   }
246 
247   if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
248     // Compares can be commuted by swapping the comparands and
249     // updating the predicate.  Choose the form that has the
250     // comparands in sorted order, or in the case of a tie, the
251     // one with the lower predicate.
252     Value *LHS = CI->getOperand(0);
253     Value *RHS = CI->getOperand(1);
254     CmpInst::Predicate Pred = CI->getPredicate();
255     CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
256     if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
257       std::swap(LHS, RHS);
258       Pred = SwappedPred;
259     }
260     return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
261   }
262 
263   // Hash general selects to allow matching commuted true/false operands.
264   SelectPatternFlavor SPF;
265   Value *Cond, *A, *B;
266   if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
267     // Hash min/max (cmp + select) to allow for commuted operands.
268     // Min/max may also have non-canonical compare predicate (eg, the compare for
269     // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
270     // compare.
271     // TODO: We should also detect FP min/max.
272     if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
273         SPF == SPF_UMIN || SPF == SPF_UMAX) {
274       if (A > B)
275         std::swap(A, B);
276       return hash_combine(Inst->getOpcode(), SPF, A, B);
277     }
278 
279     // Hash general selects to allow matching commuted true/false operands.
280 
281     // If we do not have a compare as the condition, just hash in the condition.
282     CmpPredicate Pred;
283     Value *X, *Y;
284     if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
285       return hash_combine(Inst->getOpcode(), Cond, A, B);
286 
287     // Similar to cmp normalization (above) - canonicalize the predicate value:
288     // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
289     if (CmpInst::getInversePredicate(Pred) < Pred) {
290       Pred = CmpInst::getInversePredicate(Pred);
291       std::swap(A, B);
292     }
293     return hash_combine(Inst->getOpcode(),
294                         static_cast<CmpInst::Predicate>(Pred), X, Y, A, B);
295   }
296 
297   if (CastInst *CI = dyn_cast<CastInst>(Inst))
298     return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
299 
300   if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
301     return hash_combine(FI->getOpcode(), FI->getOperand(0));
302 
303   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
304     return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
305                         hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
306 
307   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
308     return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
309                         IVI->getOperand(1),
310                         hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
311 
312   assert((isa<CallInst>(Inst) || isa<ExtractElementInst>(Inst) ||
313           isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
314           isa<UnaryOperator>(Inst) || isa<FreezeInst>(Inst)) &&
315          "Invalid/unknown instruction");
316 
317   // Handle intrinsics with commutative operands.
318   auto *II = dyn_cast<IntrinsicInst>(Inst);
319   if (II && II->isCommutative() && II->arg_size() >= 2) {
320     Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
321     if (LHS > RHS)
322       std::swap(LHS, RHS);
323     return hash_combine(
324         II->getOpcode(), LHS, RHS,
325         hash_combine_range(II->value_op_begin() + 2, II->value_op_end()));
326   }
327 
328   // gc.relocate is 'special' call: its second and third operands are
329   // not real values, but indices into statepoint's argument list.
330   // Get values they point to.
331   if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))
332     return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
333                         GCR->getBasePtr(), GCR->getDerivedPtr());
334 
335   // Don't CSE convergent calls in different basic blocks, because they
336   // implicitly depend on the set of threads that is currently executing.
337   if (CallInst *CI = dyn_cast<CallInst>(Inst))
338     return hashCallInst(CI);
339 
340   // Mix in the opcode.
341   return hash_combine(
342       Inst->getOpcode(),
343       hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
344 }
345 
346 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
347 #ifndef NDEBUG
348   // If -earlycse-debug-hash was specified, return a constant -- this
349   // will force all hashing to collide, so we'll exhaustively search
350   // the table for a match, and the assertion in isEqual will fire if
351   // there's a bug causing equal keys to hash differently.
352   if (EarlyCSEDebugHash)
353     return 0;
354 #endif
355   return getHashValueImpl(Val);
356 }
357 
358 static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
359   Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
360 
361   if (LHS.isSentinel() || RHS.isSentinel())
362     return LHSI == RHSI;
363 
364   if (LHSI->getOpcode() != RHSI->getOpcode())
365     return false;
366   if (LHSI->isIdenticalToWhenDefined(RHSI, /*IntersectAttrs=*/true)) {
367     // Convergent calls implicitly depend on the set of threads that is
368     // currently executing, so conservatively return false if they are in
369     // different basic blocks.
370     if (CallInst *CI = dyn_cast<CallInst>(LHSI);
371         CI && CI->isConvergent() && LHSI->getParent() != RHSI->getParent())
372       return false;
373 
374     return true;
375   }
376 
377   // If we're not strictly identical, we still might be a commutable instruction
378   if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
379     if (!LHSBinOp->isCommutative())
380       return false;
381 
382     assert(isa<BinaryOperator>(RHSI) &&
383            "same opcode, but different instruction type?");
384     BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
385 
386     // Commuted equality
387     return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
388            LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
389   }
390   if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
391     assert(isa<CmpInst>(RHSI) &&
392            "same opcode, but different instruction type?");
393     CmpInst *RHSCmp = cast<CmpInst>(RHSI);
394     // Commuted equality
395     return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
396            LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
397            LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
398   }
399 
400   auto *LII = dyn_cast<IntrinsicInst>(LHSI);
401   auto *RII = dyn_cast<IntrinsicInst>(RHSI);
402   if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
403       LII->isCommutative() && LII->arg_size() >= 2) {
404     return LII->getArgOperand(0) == RII->getArgOperand(1) &&
405            LII->getArgOperand(1) == RII->getArgOperand(0) &&
406            std::equal(LII->arg_begin() + 2, LII->arg_end(),
407                       RII->arg_begin() + 2, RII->arg_end());
408   }
409 
410   // See comment above in `getHashValue()`.
411   if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
412     if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
413       return GCR1->getOperand(0) == GCR2->getOperand(0) &&
414              GCR1->getBasePtr() == GCR2->getBasePtr() &&
415              GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
416 
417   // Min/max can occur with commuted operands, non-canonical predicates,
418   // and/or non-canonical operands.
419   // Selects can be non-trivially equivalent via inverted conditions and swaps.
420   SelectPatternFlavor LSPF, RSPF;
421   Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
422   if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
423       matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
424     if (LSPF == RSPF) {
425       // TODO: We should also detect FP min/max.
426       if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
427           LSPF == SPF_UMIN || LSPF == SPF_UMAX)
428         return ((LHSA == RHSA && LHSB == RHSB) ||
429                 (LHSA == RHSB && LHSB == RHSA));
430 
431       // select Cond, A, B <--> select not(Cond), B, A
432       if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
433         return true;
434     }
435 
436     // If the true/false operands are swapped and the conditions are compares
437     // with inverted predicates, the selects are equal:
438     // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
439     //
440     // This also handles patterns with a double-negation in the sense of not +
441     // inverse, because we looked through a 'not' in the matching function and
442     // swapped A/B:
443     // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
444     //
445     // This intentionally does NOT handle patterns with a double-negation in
446     // the sense of not + not, because doing so could result in values
447     // comparing
448     // as equal that hash differently in the min/max cases like:
449     // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
450     //   ^ hashes as min                  ^ would not hash as min
451     // In the context of the EarlyCSE pass, however, such cases never reach
452     // this code, as we simplify the double-negation before hashing the second
453     // select (and so still succeed at CSEing them).
454     if (LHSA == RHSB && LHSB == RHSA) {
455       CmpPredicate PredL, PredR;
456       Value *X, *Y;
457       if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
458           match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
459           CmpInst::getInversePredicate(PredL) == PredR)
460         return true;
461     }
462   }
463 
464   return false;
465 }
466 
467 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
468   // These comparisons are nontrivial, so assert that equality implies
469   // hash equality (DenseMap demands this as an invariant).
470   bool Result = isEqualImpl(LHS, RHS);
471   assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
472          getHashValueImpl(LHS) == getHashValueImpl(RHS));
473   return Result;
474 }
475 
476 //===----------------------------------------------------------------------===//
477 // CallValue
478 //===----------------------------------------------------------------------===//
479 
480 namespace {
481 
482 /// Struct representing the available call values in the scoped hash
483 /// table.
484 struct CallValue {
485   Instruction *Inst;
486 
487   CallValue(Instruction *I) : Inst(I) {
488     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
489   }
490 
491   bool isSentinel() const {
492     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
493            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
494   }
495 
496   static bool canHandle(Instruction *Inst) {
497     // Don't value number anything that returns void.
498     if (Inst->getType()->isVoidTy())
499       return false;
500 
501     CallInst *CI = dyn_cast<CallInst>(Inst);
502     if (!CI || !CI->onlyReadsMemory() ||
503         // FIXME: Currently the calls which may access the thread id may
504         // be considered as not accessing the memory. But this is
505         // problematic for coroutines, since coroutines may resume in a
506         // different thread. So we disable the optimization here for the
507         // correctness. However, it may block many other correct
508         // optimizations. Revert this one when we detect the memory
509         // accessing kind more precisely.
510         CI->getFunction()->isPresplitCoroutine())
511       return false;
512     return true;
513   }
514 };
515 
516 } // end anonymous namespace
517 
518 namespace llvm {
519 
520 template <> struct DenseMapInfo<CallValue> {
521   static inline CallValue getEmptyKey() {
522     return DenseMapInfo<Instruction *>::getEmptyKey();
523   }
524 
525   static inline CallValue getTombstoneKey() {
526     return DenseMapInfo<Instruction *>::getTombstoneKey();
527   }
528 
529   static unsigned getHashValue(CallValue Val);
530   static bool isEqual(CallValue LHS, CallValue RHS);
531 };
532 
533 } // end namespace llvm
534 
535 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
536   Instruction *Inst = Val.Inst;
537 
538   // Hash all of the operands as pointers and mix in the opcode.
539   return hashCallInst(cast<CallInst>(Inst));
540 }
541 
542 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
543   if (LHS.isSentinel() || RHS.isSentinel())
544     return LHS.Inst == RHS.Inst;
545 
546   CallInst *LHSI = cast<CallInst>(LHS.Inst);
547   CallInst *RHSI = cast<CallInst>(RHS.Inst);
548 
549   // Convergent calls implicitly depend on the set of threads that is
550   // currently executing, so conservatively return false if they are in
551   // different basic blocks.
552   if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())
553     return false;
554 
555   return LHSI->isIdenticalToWhenDefined(RHSI, /*IntersectAttrs=*/true);
556 }
557 
558 //===----------------------------------------------------------------------===//
559 // GEPValue
560 //===----------------------------------------------------------------------===//
561 
562 namespace {
563 
564 struct GEPValue {
565   Instruction *Inst;
566   std::optional<int64_t> ConstantOffset;
567 
568   GEPValue(Instruction *I) : Inst(I) {
569     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
570   }
571 
572   GEPValue(Instruction *I, std::optional<int64_t> ConstantOffset)
573       : Inst(I), ConstantOffset(ConstantOffset) {
574     assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
575   }
576 
577   bool isSentinel() const {
578     return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
579            Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
580   }
581 
582   static bool canHandle(Instruction *Inst) {
583     return isa<GetElementPtrInst>(Inst);
584   }
585 };
586 
587 } // namespace
588 
589 namespace llvm {
590 
591 template <> struct DenseMapInfo<GEPValue> {
592   static inline GEPValue getEmptyKey() {
593     return DenseMapInfo<Instruction *>::getEmptyKey();
594   }
595 
596   static inline GEPValue getTombstoneKey() {
597     return DenseMapInfo<Instruction *>::getTombstoneKey();
598   }
599 
600   static unsigned getHashValue(const GEPValue &Val);
601   static bool isEqual(const GEPValue &LHS, const GEPValue &RHS);
602 };
603 
604 } // end namespace llvm
605 
606 unsigned DenseMapInfo<GEPValue>::getHashValue(const GEPValue &Val) {
607   auto *GEP = cast<GetElementPtrInst>(Val.Inst);
608   if (Val.ConstantOffset.has_value())
609     return hash_combine(GEP->getOpcode(), GEP->getPointerOperand(),
610                         Val.ConstantOffset.value());
611   return hash_combine(
612       GEP->getOpcode(),
613       hash_combine_range(GEP->value_op_begin(), GEP->value_op_end()));
614 }
615 
616 bool DenseMapInfo<GEPValue>::isEqual(const GEPValue &LHS, const GEPValue &RHS) {
617   if (LHS.isSentinel() || RHS.isSentinel())
618     return LHS.Inst == RHS.Inst;
619   auto *LGEP = cast<GetElementPtrInst>(LHS.Inst);
620   auto *RGEP = cast<GetElementPtrInst>(RHS.Inst);
621   if (LGEP->getPointerOperand() != RGEP->getPointerOperand())
622     return false;
623   if (LHS.ConstantOffset.has_value() && RHS.ConstantOffset.has_value())
624     return LHS.ConstantOffset.value() == RHS.ConstantOffset.value();
625   return LGEP->isIdenticalToWhenDefined(RGEP);
626 }
627 
628 //===----------------------------------------------------------------------===//
629 // EarlyCSE implementation
630 //===----------------------------------------------------------------------===//
631 
632 namespace {
633 
634 /// A simple and fast domtree-based CSE pass.
635 ///
636 /// This pass does a simple depth-first walk over the dominator tree,
637 /// eliminating trivially redundant instructions and using instsimplify to
638 /// canonicalize things as it goes. It is intended to be fast and catch obvious
639 /// cases so that instcombine and other passes are more effective. It is
640 /// expected that a later pass of GVN will catch the interesting/hard cases.
641 class EarlyCSE {
642 public:
643   const TargetLibraryInfo &TLI;
644   const TargetTransformInfo &TTI;
645   DominatorTree &DT;
646   AssumptionCache &AC;
647   const SimplifyQuery SQ;
648   MemorySSA *MSSA;
649   std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
650 
651   using AllocatorTy =
652       RecyclingAllocator<BumpPtrAllocator,
653                          ScopedHashTableVal<SimpleValue, Value *>>;
654   using ScopedHTType =
655       ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
656                       AllocatorTy>;
657 
658   /// A scoped hash table of the current values of all of our simple
659   /// scalar expressions.
660   ///
661   /// As we walk down the domtree, we look to see if instructions are in this:
662   /// if so, we replace them with what we find, otherwise we insert them so
663   /// that dominated values can succeed in their lookup.
664   ScopedHTType AvailableValues;
665 
666   /// A scoped hash table of the current values of previously encountered
667   /// memory locations.
668   ///
669   /// This allows us to get efficient access to dominating loads or stores when
670   /// we have a fully redundant load.  In addition to the most recent load, we
671   /// keep track of a generation count of the read, which is compared against
672   /// the current generation count.  The current generation count is incremented
673   /// after every possibly writing memory operation, which ensures that we only
674   /// CSE loads with other loads that have no intervening store.  Ordering
675   /// events (such as fences or atomic instructions) increment the generation
676   /// count as well; essentially, we model these as writes to all possible
677   /// locations.  Note that atomic and/or volatile loads and stores can be
678   /// present the table; it is the responsibility of the consumer to inspect
679   /// the atomicity/volatility if needed.
680   struct LoadValue {
681     Instruction *DefInst = nullptr;
682     unsigned Generation = 0;
683     int MatchingId = -1;
684     bool IsAtomic = false;
685     bool IsLoad = false;
686 
687     LoadValue() = default;
688     LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
689               bool IsAtomic, bool IsLoad)
690         : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
691           IsAtomic(IsAtomic), IsLoad(IsLoad) {}
692   };
693 
694   using LoadMapAllocator =
695       RecyclingAllocator<BumpPtrAllocator,
696                          ScopedHashTableVal<Value *, LoadValue>>;
697   using LoadHTType =
698       ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
699                       LoadMapAllocator>;
700 
701   LoadHTType AvailableLoads;
702 
703   // A scoped hash table mapping memory locations (represented as typed
704   // addresses) to generation numbers at which that memory location became
705   // (henceforth indefinitely) invariant.
706   using InvariantMapAllocator =
707       RecyclingAllocator<BumpPtrAllocator,
708                          ScopedHashTableVal<MemoryLocation, unsigned>>;
709   using InvariantHTType =
710       ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,
711                       InvariantMapAllocator>;
712   InvariantHTType AvailableInvariants;
713 
714   /// A scoped hash table of the current values of read-only call
715   /// values.
716   ///
717   /// It uses the same generation count as loads.
718   using CallHTType =
719       ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
720   CallHTType AvailableCalls;
721 
722   using GEPMapAllocatorTy =
723       RecyclingAllocator<BumpPtrAllocator,
724                          ScopedHashTableVal<GEPValue, Value *>>;
725   using GEPHTType = ScopedHashTable<GEPValue, Value *, DenseMapInfo<GEPValue>,
726                                     GEPMapAllocatorTy>;
727   GEPHTType AvailableGEPs;
728 
729   /// This is the current generation of the memory value.
730   unsigned CurrentGeneration = 0;
731 
732   /// Set up the EarlyCSE runner for a particular function.
733   EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
734            const TargetTransformInfo &TTI, DominatorTree &DT,
735            AssumptionCache &AC, MemorySSA *MSSA)
736       : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
737         MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
738 
739   bool run();
740 
741 private:
742   unsigned ClobberCounter = 0;
743   // Almost a POD, but needs to call the constructors for the scoped hash
744   // tables so that a new scope gets pushed on. These are RAII so that the
745   // scope gets popped when the NodeScope is destroyed.
746   class NodeScope {
747   public:
748     NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
749               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
750               GEPHTType &AvailableGEPs)
751         : Scope(AvailableValues), LoadScope(AvailableLoads),
752           InvariantScope(AvailableInvariants), CallScope(AvailableCalls),
753           GEPScope(AvailableGEPs) {}
754     NodeScope(const NodeScope &) = delete;
755     NodeScope &operator=(const NodeScope &) = delete;
756 
757   private:
758     ScopedHTType::ScopeTy Scope;
759     LoadHTType::ScopeTy LoadScope;
760     InvariantHTType::ScopeTy InvariantScope;
761     CallHTType::ScopeTy CallScope;
762     GEPHTType::ScopeTy GEPScope;
763   };
764 
765   // Contains all the needed information to create a stack for doing a depth
766   // first traversal of the tree. This includes scopes for values, loads, and
767   // calls as well as the generation. There is a child iterator so that the
768   // children do not need to be store separately.
769   class StackNode {
770   public:
771     StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
772               InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
773               GEPHTType &AvailableGEPs, unsigned cg, DomTreeNode *n,
774               DomTreeNode::const_iterator child,
775               DomTreeNode::const_iterator end)
776         : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
777           EndIter(end),
778           Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
779                  AvailableCalls, AvailableGEPs) {}
780     StackNode(const StackNode &) = delete;
781     StackNode &operator=(const StackNode &) = delete;
782 
783     // Accessors.
784     unsigned currentGeneration() const { return CurrentGeneration; }
785     unsigned childGeneration() const { return ChildGeneration; }
786     void childGeneration(unsigned generation) { ChildGeneration = generation; }
787     DomTreeNode *node() { return Node; }
788     DomTreeNode::const_iterator childIter() const { return ChildIter; }
789 
790     DomTreeNode *nextChild() {
791       DomTreeNode *child = *ChildIter;
792       ++ChildIter;
793       return child;
794     }
795 
796     DomTreeNode::const_iterator end() const { return EndIter; }
797     bool isProcessed() const { return Processed; }
798     void process() { Processed = true; }
799 
800   private:
801     unsigned CurrentGeneration;
802     unsigned ChildGeneration;
803     DomTreeNode *Node;
804     DomTreeNode::const_iterator ChildIter;
805     DomTreeNode::const_iterator EndIter;
806     NodeScope Scopes;
807     bool Processed = false;
808   };
809 
810   /// Wrapper class to handle memory instructions, including loads,
811   /// stores and intrinsic loads and stores defined by the target.
812   class ParseMemoryInst {
813   public:
814     ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
815       : Inst(Inst) {
816       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
817         IntrID = II->getIntrinsicID();
818         if (TTI.getTgtMemIntrinsic(II, Info))
819           return;
820         if (isHandledNonTargetIntrinsic(IntrID)) {
821           switch (IntrID) {
822           case Intrinsic::masked_load:
823             Info.PtrVal = Inst->getOperand(0);
824             Info.MatchingId = Intrinsic::masked_load;
825             Info.ReadMem = true;
826             Info.WriteMem = false;
827             Info.IsVolatile = false;
828             break;
829           case Intrinsic::masked_store:
830             Info.PtrVal = Inst->getOperand(1);
831             // Use the ID of masked load as the "matching id". This will
832             // prevent matching non-masked loads/stores with masked ones
833             // (which could be done), but at the moment, the code here
834             // does not support matching intrinsics with non-intrinsics,
835             // so keep the MatchingIds specific to masked instructions
836             // for now (TODO).
837             Info.MatchingId = Intrinsic::masked_load;
838             Info.ReadMem = false;
839             Info.WriteMem = true;
840             Info.IsVolatile = false;
841             break;
842           }
843         }
844       }
845     }
846 
847     Instruction *get() { return Inst; }
848     const Instruction *get() const { return Inst; }
849 
850     bool isLoad() const {
851       if (IntrID != 0)
852         return Info.ReadMem;
853       return isa<LoadInst>(Inst);
854     }
855 
856     bool isStore() const {
857       if (IntrID != 0)
858         return Info.WriteMem;
859       return isa<StoreInst>(Inst);
860     }
861 
862     bool isAtomic() const {
863       if (IntrID != 0)
864         return Info.Ordering != AtomicOrdering::NotAtomic;
865       return Inst->isAtomic();
866     }
867 
868     bool isUnordered() const {
869       if (IntrID != 0)
870         return Info.isUnordered();
871 
872       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
873         return LI->isUnordered();
874       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
875         return SI->isUnordered();
876       }
877       // Conservative answer
878       return !Inst->isAtomic();
879     }
880 
881     bool isVolatile() const {
882       if (IntrID != 0)
883         return Info.IsVolatile;
884 
885       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
886         return LI->isVolatile();
887       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
888         return SI->isVolatile();
889       }
890       // Conservative answer
891       return true;
892     }
893 
894     bool isInvariantLoad() const {
895       if (auto *LI = dyn_cast<LoadInst>(Inst))
896         return LI->hasMetadata(LLVMContext::MD_invariant_load);
897       return false;
898     }
899 
900     bool isValid() const { return getPointerOperand() != nullptr; }
901 
902     // For regular (non-intrinsic) loads/stores, this is set to -1. For
903     // intrinsic loads/stores, the id is retrieved from the corresponding
904     // field in the MemIntrinsicInfo structure.  That field contains
905     // non-negative values only.
906     int getMatchingId() const {
907       if (IntrID != 0)
908         return Info.MatchingId;
909       return -1;
910     }
911 
912     Value *getPointerOperand() const {
913       if (IntrID != 0)
914         return Info.PtrVal;
915       return getLoadStorePointerOperand(Inst);
916     }
917 
918     Type *getValueType() const {
919       // TODO: handle target-specific intrinsics.
920       return Inst->getAccessType();
921     }
922 
923     bool mayReadFromMemory() const {
924       if (IntrID != 0)
925         return Info.ReadMem;
926       return Inst->mayReadFromMemory();
927     }
928 
929     bool mayWriteToMemory() const {
930       if (IntrID != 0)
931         return Info.WriteMem;
932       return Inst->mayWriteToMemory();
933     }
934 
935   private:
936     Intrinsic::ID IntrID = 0;
937     MemIntrinsicInfo Info;
938     Instruction *Inst;
939   };
940 
941   // This function is to prevent accidentally passing a non-target
942   // intrinsic ID to TargetTransformInfo.
943   static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
944     switch (ID) {
945     case Intrinsic::masked_load:
946     case Intrinsic::masked_store:
947       return true;
948     }
949     return false;
950   }
951   static bool isHandledNonTargetIntrinsic(const Value *V) {
952     if (auto *II = dyn_cast<IntrinsicInst>(V))
953       return isHandledNonTargetIntrinsic(II->getIntrinsicID());
954     return false;
955   }
956 
957   bool processNode(DomTreeNode *Node);
958 
959   bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
960                              const BasicBlock *BB, const BasicBlock *Pred);
961 
962   Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
963                           unsigned CurrentGeneration);
964 
965   bool overridingStores(const ParseMemoryInst &Earlier,
966                         const ParseMemoryInst &Later);
967 
968   Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType) const {
969     // TODO: We could insert relevant casts on type mismatch.
970     // The load or the store's first operand.
971     Value *V;
972     if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
973       switch (II->getIntrinsicID()) {
974       case Intrinsic::masked_load:
975         V = II;
976         break;
977       case Intrinsic::masked_store:
978         V = II->getOperand(0);
979         break;
980       default:
981         return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
982       }
983     } else {
984       V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
985     }
986 
987     return V->getType() == ExpectedType ? V : nullptr;
988   }
989 
990   /// Return true if the instruction is known to only operate on memory
991   /// provably invariant in the given "generation".
992   bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
993 
994   bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
995                            Instruction *EarlierInst, Instruction *LaterInst);
996 
997   bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
998                                  const IntrinsicInst *Later) {
999     auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
1000       // Is Mask0 a submask of Mask1?
1001       if (Mask0 == Mask1)
1002         return true;
1003       if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
1004         return false;
1005       auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
1006       auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
1007       if (!Vec0 || !Vec1)
1008         return false;
1009       if (Vec0->getType() != Vec1->getType())
1010         return false;
1011       for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
1012         Constant *Elem0 = Vec0->getOperand(i);
1013         Constant *Elem1 = Vec1->getOperand(i);
1014         auto *Int0 = dyn_cast<ConstantInt>(Elem0);
1015         if (Int0 && Int0->isZero())
1016           continue;
1017         auto *Int1 = dyn_cast<ConstantInt>(Elem1);
1018         if (Int1 && !Int1->isZero())
1019           continue;
1020         if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
1021           return false;
1022         if (Elem0 == Elem1)
1023           continue;
1024         return false;
1025       }
1026       return true;
1027     };
1028     auto PtrOp = [](const IntrinsicInst *II) {
1029       if (II->getIntrinsicID() == Intrinsic::masked_load)
1030         return II->getOperand(0);
1031       if (II->getIntrinsicID() == Intrinsic::masked_store)
1032         return II->getOperand(1);
1033       llvm_unreachable("Unexpected IntrinsicInst");
1034     };
1035     auto MaskOp = [](const IntrinsicInst *II) {
1036       if (II->getIntrinsicID() == Intrinsic::masked_load)
1037         return II->getOperand(2);
1038       if (II->getIntrinsicID() == Intrinsic::masked_store)
1039         return II->getOperand(3);
1040       llvm_unreachable("Unexpected IntrinsicInst");
1041     };
1042     auto ThruOp = [](const IntrinsicInst *II) {
1043       if (II->getIntrinsicID() == Intrinsic::masked_load)
1044         return II->getOperand(3);
1045       llvm_unreachable("Unexpected IntrinsicInst");
1046     };
1047 
1048     if (PtrOp(Earlier) != PtrOp(Later))
1049       return false;
1050 
1051     Intrinsic::ID IDE = Earlier->getIntrinsicID();
1052     Intrinsic::ID IDL = Later->getIntrinsicID();
1053     // We could really use specific intrinsic classes for masked loads
1054     // and stores in IntrinsicInst.h.
1055     if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
1056       // Trying to replace later masked load with the earlier one.
1057       // Check that the pointers are the same, and
1058       // - masks and pass-throughs are the same, or
1059       // - replacee's pass-through is "undef" and replacer's mask is a
1060       //   super-set of the replacee's mask.
1061       if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
1062         return true;
1063       if (!isa<UndefValue>(ThruOp(Later)))
1064         return false;
1065       return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1066     }
1067     if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
1068       // Trying to replace a load of a stored value with the store's value.
1069       // Check that the pointers are the same, and
1070       // - load's mask is a subset of store's mask, and
1071       // - load's pass-through is "undef".
1072       if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
1073         return false;
1074       return isa<UndefValue>(ThruOp(Later));
1075     }
1076     if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
1077       // Trying to remove a store of the loaded value.
1078       // Check that the pointers are the same, and
1079       // - store's mask is a subset of the load's mask.
1080       return IsSubmask(MaskOp(Later), MaskOp(Earlier));
1081     }
1082     if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
1083       // Trying to remove a dead store (earlier).
1084       // Check that the pointers are the same,
1085       // - the to-be-removed store's mask is a subset of the other store's
1086       //   mask.
1087       return IsSubmask(MaskOp(Earlier), MaskOp(Later));
1088     }
1089     return false;
1090   }
1091 
1092   void removeMSSA(Instruction &Inst) {
1093     if (!MSSA)
1094       return;
1095     if (VerifyMemorySSA)
1096       MSSA->verifyMemorySSA();
1097     // Removing a store here can leave MemorySSA in an unoptimized state by
1098     // creating MemoryPhis that have identical arguments and by creating
1099     // MemoryUses whose defining access is not an actual clobber. The phi case
1100     // is handled by MemorySSA when passing OptimizePhis = true to
1101     // removeMemoryAccess.  The non-optimized MemoryUse case is lazily updated
1102     // by MemorySSA's getClobberingMemoryAccess.
1103     MSSAUpdater->removeMemoryAccess(&Inst, true);
1104   }
1105 };
1106 
1107 } // end anonymous namespace
1108 
1109 /// Determine if the memory referenced by LaterInst is from the same heap
1110 /// version as EarlierInst.
1111 /// This is currently called in two scenarios:
1112 ///
1113 ///   load p
1114 ///   ...
1115 ///   load p
1116 ///
1117 /// and
1118 ///
1119 ///   x = load p
1120 ///   ...
1121 ///   store x, p
1122 ///
1123 /// in both cases we want to verify that there are no possible writes to the
1124 /// memory referenced by p between the earlier and later instruction.
1125 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
1126                                    unsigned LaterGeneration,
1127                                    Instruction *EarlierInst,
1128                                    Instruction *LaterInst) {
1129   // Check the simple memory generation tracking first.
1130   if (EarlierGeneration == LaterGeneration)
1131     return true;
1132 
1133   if (!MSSA)
1134     return false;
1135 
1136   // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1137   // read/write memory, then we can safely return true here.
1138   // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1139   // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1140   // by also checking the MemorySSA MemoryAccess on the instruction.  Initial
1141   // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1142   // with the default optimization pipeline.
1143   auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
1144   if (!EarlierMA)
1145     return true;
1146   auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
1147   if (!LaterMA)
1148     return true;
1149 
1150   // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1151   // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1152   // EarlierInst and LaterInst and neither can any other write that potentially
1153   // clobbers LaterInst.
1154   MemoryAccess *LaterDef;
1155   if (ClobberCounter < EarlyCSEMssaOptCap) {
1156     LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
1157     ClobberCounter++;
1158   } else
1159     LaterDef = LaterMA->getDefiningAccess();
1160 
1161   return MSSA->dominates(LaterDef, EarlierMA);
1162 }
1163 
1164 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
1165   // A location loaded from with an invariant_load is assumed to *never* change
1166   // within the visible scope of the compilation.
1167   if (auto *LI = dyn_cast<LoadInst>(I))
1168     if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1169       return true;
1170 
1171   auto MemLocOpt = MemoryLocation::getOrNone(I);
1172   if (!MemLocOpt)
1173     // "target" intrinsic forms of loads aren't currently known to
1174     // MemoryLocation::get.  TODO
1175     return false;
1176   MemoryLocation MemLoc = *MemLocOpt;
1177   if (!AvailableInvariants.count(MemLoc))
1178     return false;
1179 
1180   // Is the generation at which this became invariant older than the
1181   // current one?
1182   return AvailableInvariants.lookup(MemLoc) <= GenAt;
1183 }
1184 
1185 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
1186                                      const BranchInst *BI, const BasicBlock *BB,
1187                                      const BasicBlock *Pred) {
1188   assert(BI->isConditional() && "Should be a conditional branch!");
1189   assert(BI->getCondition() == CondInst && "Wrong condition?");
1190   assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
1191   auto *TorF = (BI->getSuccessor(0) == BB)
1192                    ? ConstantInt::getTrue(BB->getContext())
1193                    : ConstantInt::getFalse(BB->getContext());
1194   auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
1195                        Value *&RHS) {
1196     if (Opcode == Instruction::And &&
1197         match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1198       return true;
1199     else if (Opcode == Instruction::Or &&
1200              match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1201       return true;
1202     return false;
1203   };
1204   // If the condition is AND operation, we can propagate its operands into the
1205   // true branch. If it is OR operation, we can propagate them into the false
1206   // branch.
1207   unsigned PropagateOpcode =
1208       (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1209 
1210   bool MadeChanges = false;
1211   SmallVector<Instruction *, 4> WorkList;
1212   SmallPtrSet<Instruction *, 4> Visited;
1213   WorkList.push_back(CondInst);
1214   while (!WorkList.empty()) {
1215     Instruction *Curr = WorkList.pop_back_val();
1216 
1217     AvailableValues.insert(Curr, TorF);
1218     LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1219                       << Curr->getName() << "' as " << *TorF << " in "
1220                       << BB->getName() << "\n");
1221     if (!DebugCounter::shouldExecute(CSECounter)) {
1222       LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1223     } else {
1224       // Replace all dominated uses with the known value.
1225       if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
1226                                                     BasicBlockEdge(Pred, BB))) {
1227         NumCSECVP += Count;
1228         MadeChanges = true;
1229       }
1230     }
1231 
1232     Value *LHS, *RHS;
1233     if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1234       for (auto *Op : { LHS, RHS })
1235         if (Instruction *OPI = dyn_cast<Instruction>(Op))
1236           if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
1237             WorkList.push_back(OPI);
1238   }
1239 
1240   return MadeChanges;
1241 }
1242 
1243 Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
1244                                   unsigned CurrentGeneration) {
1245   if (InVal.DefInst == nullptr)
1246     return nullptr;
1247   if (InVal.MatchingId != MemInst.getMatchingId())
1248     return nullptr;
1249   // We don't yet handle removing loads with ordering of any kind.
1250   if (MemInst.isVolatile() || !MemInst.isUnordered())
1251     return nullptr;
1252   // We can't replace an atomic load with one which isn't also atomic.
1253   if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1254     return nullptr;
1255   // The value V returned from this function is used differently depending
1256   // on whether MemInst is a load or a store. If it's a load, we will replace
1257   // MemInst with V, if it's a store, we will check if V is the same as the
1258   // available value.
1259   bool MemInstMatching = !MemInst.isLoad();
1260   Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1261   Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
1262 
1263   // For stores check the result values before checking memory generation
1264   // (otherwise isSameMemGeneration may crash).
1265   Value *Result = MemInst.isStore()
1266                       ? getOrCreateResult(Matching, Other->getType())
1267                       : nullptr;
1268   if (MemInst.isStore() && InVal.DefInst != Result)
1269     return nullptr;
1270 
1271   // Deal with non-target memory intrinsics.
1272   bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1273   bool OtherNTI = isHandledNonTargetIntrinsic(Other);
1274   if (OtherNTI != MatchingNTI)
1275     return nullptr;
1276   if (OtherNTI && MatchingNTI) {
1277     if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1278                                    cast<IntrinsicInst>(MemInst.get())))
1279       return nullptr;
1280   }
1281 
1282   if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1283       !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1284                            MemInst.get()))
1285     return nullptr;
1286 
1287   if (!Result)
1288     Result = getOrCreateResult(Matching, Other->getType());
1289   return Result;
1290 }
1291 
1292 static void combineIRFlags(Instruction &From, Value *To) {
1293   if (auto *I = dyn_cast<Instruction>(To)) {
1294     // If I being poison triggers UB, there is no need to drop those
1295     // flags. Otherwise, only retain flags present on both I and Inst.
1296     // TODO: Currently some fast-math flags are not treated as
1297     // poison-generating even though they should. Until this is fixed,
1298     // always retain flags present on both I and Inst for floating point
1299     // instructions.
1300     if (isa<FPMathOperator>(I) ||
1301         (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
1302       I->andIRFlags(&From);
1303   }
1304   if (isa<CallBase>(&From) && isa<CallBase>(To)) {
1305     // NB: Intersection of attrs between InVal.first and Inst is overly
1306     // conservative. Since we only CSE readonly functions that have the same
1307     // memory state, we can preserve (or possibly in some cases combine)
1308     // more attributes. Likewise this implies when checking equality of
1309     // callsite for CSEing, we can probably ignore more attributes.
1310     // Generally poison generating attributes need to be handled with more
1311     // care as they can create *new* UB if preserved/combined and violated.
1312     // Attributes that imply immediate UB on the other hand would have been
1313     // violated either way.
1314     bool Success =
1315         cast<CallBase>(To)->tryIntersectAttributes(cast<CallBase>(&From));
1316     assert(Success && "Failed to intersect attributes in callsites that "
1317                       "passed identical check");
1318     // For NDEBUG Compile.
1319     (void)Success;
1320   }
1321 }
1322 
1323 bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
1324                                 const ParseMemoryInst &Later) {
1325   // Can we remove Earlier store because of Later store?
1326 
1327   assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1328          "Violated invariant");
1329   if (Earlier.getPointerOperand() != Later.getPointerOperand())
1330     return false;
1331   if (!Earlier.getValueType() || !Later.getValueType() ||
1332       Earlier.getValueType() != Later.getValueType())
1333     return false;
1334   if (Earlier.getMatchingId() != Later.getMatchingId())
1335     return false;
1336   // At the moment, we don't remove ordered stores, but do remove
1337   // unordered atomic stores.  There's no special requirement (for
1338   // unordered atomics) about removing atomic stores only in favor of
1339   // other atomic stores since we were going to execute the non-atomic
1340   // one anyway and the atomic one might never have become visible.
1341   if (!Earlier.isUnordered() || !Later.isUnordered())
1342     return false;
1343 
1344   // Deal with non-target memory intrinsics.
1345   bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1346   bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1347   if (ENTI && LNTI)
1348     return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1349                                      cast<IntrinsicInst>(Later.get()));
1350 
1351   // Because of the check above, at least one of them is false.
1352   // For now disallow matching intrinsics with non-intrinsics,
1353   // so assume that the stores match if neither is an intrinsic.
1354   return ENTI == LNTI;
1355 }
1356 
1357 bool EarlyCSE::processNode(DomTreeNode *Node) {
1358   bool Changed = false;
1359   BasicBlock *BB = Node->getBlock();
1360 
1361   // If this block has a single predecessor, then the predecessor is the parent
1362   // of the domtree node and all of the live out memory values are still current
1363   // in this block.  If this block has multiple predecessors, then they could
1364   // have invalidated the live-out memory values of our parent value.  For now,
1365   // just be conservative and invalidate memory if this block has multiple
1366   // predecessors.
1367   if (!BB->getSinglePredecessor())
1368     ++CurrentGeneration;
1369 
1370   // If this node has a single predecessor which ends in a conditional branch,
1371   // we can infer the value of the branch condition given that we took this
1372   // path.  We need the single predecessor to ensure there's not another path
1373   // which reaches this block where the condition might hold a different
1374   // value.  Since we're adding this to the scoped hash table (like any other
1375   // def), it will have been popped if we encounter a future merge block.
1376   if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1377     auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1378     if (BI && BI->isConditional()) {
1379       auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
1380       if (CondInst && SimpleValue::canHandle(CondInst))
1381         Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1382     }
1383   }
1384 
1385   /// LastStore - Keep track of the last non-volatile store that we saw... for
1386   /// as long as there in no instruction that reads memory.  If we see a store
1387   /// to the same location, we delete the dead store.  This zaps trivial dead
1388   /// stores which can occur in bitfield code among other things.
1389   Instruction *LastStore = nullptr;
1390 
1391   // See if any instructions in the block can be eliminated.  If so, do it.  If
1392   // not, add them to AvailableValues.
1393   for (Instruction &Inst : make_early_inc_range(*BB)) {
1394     // Dead instructions should just be removed.
1395     if (isInstructionTriviallyDead(&Inst, &TLI)) {
1396       LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
1397       if (!DebugCounter::shouldExecute(CSECounter)) {
1398         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1399         continue;
1400       }
1401 
1402       salvageKnowledge(&Inst, &AC);
1403       salvageDebugInfo(Inst);
1404       removeMSSA(Inst);
1405       Inst.eraseFromParent();
1406       Changed = true;
1407       ++NumSimplify;
1408       continue;
1409     }
1410 
1411     // Skip assume intrinsics, they don't really have side effects (although
1412     // they're marked as such to ensure preservation of control dependencies),
1413     // and this pass will not bother with its removal. However, we should mark
1414     // its condition as true for all dominated blocks.
1415     if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1416       auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
1417       if (CondI && SimpleValue::canHandle(CondI)) {
1418         LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1419                           << '\n');
1420         AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1421       } else
1422         LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
1423       continue;
1424     }
1425 
1426     // Likewise, noalias intrinsics don't actually write.
1427     if (match(&Inst,
1428               m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
1429       LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1430                         << '\n');
1431       continue;
1432     }
1433 
1434     // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1435     if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
1436       LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
1437       continue;
1438     }
1439 
1440     // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1441     if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
1442       LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
1443       continue;
1444     }
1445 
1446     // We can skip all invariant.start intrinsics since they only read memory,
1447     // and we can forward values across it. For invariant starts without
1448     // invariant ends, we can use the fact that the invariantness never ends to
1449     // start a scope in the current generaton which is true for all future
1450     // generations.  Also, we dont need to consume the last store since the
1451     // semantics of invariant.start allow us to perform   DSE of the last
1452     // store, if there was a store following invariant.start. Consider:
1453     //
1454     // store 30, i8* p
1455     // invariant.start(p)
1456     // store 40, i8* p
1457     // We can DSE the store to 30, since the store 40 to invariant location p
1458     // causes undefined behaviour.
1459     if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
1460       // If there are any uses, the scope might end.
1461       if (!Inst.use_empty())
1462         continue;
1463       MemoryLocation MemLoc =
1464           MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);
1465       // Don't start a scope if we already have a better one pushed
1466       if (!AvailableInvariants.count(MemLoc))
1467         AvailableInvariants.insert(MemLoc, CurrentGeneration);
1468       continue;
1469     }
1470 
1471     if (isGuard(&Inst)) {
1472       if (auto *CondI =
1473               dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1474         if (SimpleValue::canHandle(CondI)) {
1475           // Do we already know the actual value of this condition?
1476           if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1477             // Is the condition known to be true?
1478             if (isa<ConstantInt>(KnownCond) &&
1479                 cast<ConstantInt>(KnownCond)->isOne()) {
1480               LLVM_DEBUG(dbgs()
1481                          << "EarlyCSE removing guard: " << Inst << '\n');
1482               salvageKnowledge(&Inst, &AC);
1483               removeMSSA(Inst);
1484               Inst.eraseFromParent();
1485               Changed = true;
1486               continue;
1487             } else
1488               // Use the known value if it wasn't true.
1489               cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1490           }
1491           // The condition we're on guarding here is true for all dominated
1492           // locations.
1493           AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1494         }
1495       }
1496 
1497       // Guard intrinsics read all memory, but don't write any memory.
1498       // Accordingly, don't update the generation but consume the last store (to
1499       // avoid an incorrect DSE).
1500       LastStore = nullptr;
1501       continue;
1502     }
1503 
1504     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1505     // its simpler value.
1506     if (Value *V = simplifyInstruction(&Inst, SQ)) {
1507       LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << "  to: " << *V
1508                         << '\n');
1509       if (!DebugCounter::shouldExecute(CSECounter)) {
1510         LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1511       } else {
1512         bool Killed = false;
1513         if (!Inst.use_empty()) {
1514           Inst.replaceAllUsesWith(V);
1515           Changed = true;
1516         }
1517         if (isInstructionTriviallyDead(&Inst, &TLI)) {
1518           salvageKnowledge(&Inst, &AC);
1519           removeMSSA(Inst);
1520           Inst.eraseFromParent();
1521           Changed = true;
1522           Killed = true;
1523         }
1524         if (Changed)
1525           ++NumSimplify;
1526         if (Killed)
1527           continue;
1528       }
1529     }
1530 
1531     // If this is a simple instruction that we can value number, process it.
1532     if (SimpleValue::canHandle(&Inst)) {
1533       if ([[maybe_unused]] auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1534         assert(CI->getExceptionBehavior() != fp::ebStrict &&
1535                "Unexpected ebStrict from SimpleValue::canHandle()");
1536         assert((!CI->getRoundingMode() ||
1537                 CI->getRoundingMode() != RoundingMode::Dynamic) &&
1538                "Unexpected dynamic rounding from SimpleValue::canHandle()");
1539       }
1540       // See if the instruction has an available value.  If so, use it.
1541       if (Value *V = AvailableValues.lookup(&Inst)) {
1542         LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << "  to: " << *V
1543                           << '\n');
1544         if (!DebugCounter::shouldExecute(CSECounter)) {
1545           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1546           continue;
1547         }
1548         combineIRFlags(Inst, V);
1549         Inst.replaceAllUsesWith(V);
1550         salvageKnowledge(&Inst, &AC);
1551         removeMSSA(Inst);
1552         Inst.eraseFromParent();
1553         Changed = true;
1554         ++NumCSE;
1555         continue;
1556       }
1557 
1558       // Otherwise, just remember that this value is available.
1559       AvailableValues.insert(&Inst, &Inst);
1560       continue;
1561     }
1562 
1563     ParseMemoryInst MemInst(&Inst, TTI);
1564     // If this is a non-volatile load, process it.
1565     if (MemInst.isValid() && MemInst.isLoad()) {
1566       // (conservatively) we can't peak past the ordering implied by this
1567       // operation, but we can add this load to our set of available values
1568       if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1569         LastStore = nullptr;
1570         ++CurrentGeneration;
1571       }
1572 
1573       if (MemInst.isInvariantLoad()) {
1574         // If we pass an invariant load, we know that memory location is
1575         // indefinitely constant from the moment of first dereferenceability.
1576         // We conservatively treat the invariant_load as that moment.  If we
1577         // pass a invariant load after already establishing a scope, don't
1578         // restart it since we want to preserve the earliest point seen.
1579         auto MemLoc = MemoryLocation::get(&Inst);
1580         if (!AvailableInvariants.count(MemLoc))
1581           AvailableInvariants.insert(MemLoc, CurrentGeneration);
1582       }
1583 
1584       // If we have an available version of this load, and if it is the right
1585       // generation or the load is known to be from an invariant location,
1586       // replace this instruction.
1587       //
1588       // If either the dominating load or the current load are invariant, then
1589       // we can assume the current load loads the same value as the dominating
1590       // load.
1591       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1592       if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1593         LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1594                           << "  to: " << *InVal.DefInst << '\n');
1595         if (!DebugCounter::shouldExecute(CSECounter)) {
1596           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1597           continue;
1598         }
1599         if (InVal.IsLoad)
1600           if (auto *I = dyn_cast<Instruction>(Op))
1601             combineMetadataForCSE(I, &Inst, false);
1602         if (!Inst.use_empty())
1603           Inst.replaceAllUsesWith(Op);
1604         salvageKnowledge(&Inst, &AC);
1605         removeMSSA(Inst);
1606         Inst.eraseFromParent();
1607         Changed = true;
1608         ++NumCSELoad;
1609         continue;
1610       }
1611 
1612       // Otherwise, remember that we have this instruction.
1613       AvailableLoads.insert(MemInst.getPointerOperand(),
1614                             LoadValue(&Inst, CurrentGeneration,
1615                                       MemInst.getMatchingId(),
1616                                       MemInst.isAtomic(),
1617                                       MemInst.isLoad()));
1618       LastStore = nullptr;
1619       continue;
1620     }
1621 
1622     // If this instruction may read from memory or throw (and potentially read
1623     // from memory in the exception handler), forget LastStore.  Load/store
1624     // intrinsics will indicate both a read and a write to memory.  The target
1625     // may override this (e.g. so that a store intrinsic does not read from
1626     // memory, and thus will be treated the same as a regular store for
1627     // commoning purposes).
1628     if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&
1629         !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1630       LastStore = nullptr;
1631 
1632     // If this is a read-only call, process it.
1633     if (CallValue::canHandle(&Inst)) {
1634       // If we have an available version of this call, and if it is the right
1635       // generation, replace this instruction.
1636       std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1637       if (InVal.first != nullptr &&
1638           isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1639                               &Inst)) {
1640         LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1641                           << "  to: " << *InVal.first << '\n');
1642         if (!DebugCounter::shouldExecute(CSECounter)) {
1643           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1644           continue;
1645         }
1646         combineIRFlags(Inst, InVal.first);
1647         if (!Inst.use_empty())
1648           Inst.replaceAllUsesWith(InVal.first);
1649         salvageKnowledge(&Inst, &AC);
1650         removeMSSA(Inst);
1651         Inst.eraseFromParent();
1652         Changed = true;
1653         ++NumCSECall;
1654         continue;
1655       }
1656 
1657       // Otherwise, remember that we have this instruction.
1658       AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1659       continue;
1660     }
1661 
1662     // Compare GEP instructions based on offset.
1663     if (GEPValue::canHandle(&Inst)) {
1664       auto *GEP = cast<GetElementPtrInst>(&Inst);
1665       APInt Offset = APInt(SQ.DL.getIndexTypeSizeInBits(GEP->getType()), 0);
1666       GEPValue GEPVal(GEP, GEP->accumulateConstantOffset(SQ.DL, Offset)
1667                                ? Offset.trySExtValue()
1668                                : std::nullopt);
1669       if (Value *V = AvailableGEPs.lookup(GEPVal)) {
1670         LLVM_DEBUG(dbgs() << "EarlyCSE CSE GEP: " << Inst << "  to: " << *V
1671                           << '\n');
1672         combineIRFlags(Inst, V);
1673         Inst.replaceAllUsesWith(V);
1674         salvageKnowledge(&Inst, &AC);
1675         removeMSSA(Inst);
1676         Inst.eraseFromParent();
1677         Changed = true;
1678         ++NumCSEGEP;
1679         continue;
1680       }
1681 
1682       // Otherwise, just remember that we have this GEP.
1683       AvailableGEPs.insert(GEPVal, &Inst);
1684       continue;
1685     }
1686 
1687     // A release fence requires that all stores complete before it, but does
1688     // not prevent the reordering of following loads 'before' the fence.  As a
1689     // result, we don't need to consider it as writing to memory and don't need
1690     // to advance the generation.  We do need to prevent DSE across the fence,
1691     // but that's handled above.
1692     if (auto *FI = dyn_cast<FenceInst>(&Inst))
1693       if (FI->getOrdering() == AtomicOrdering::Release) {
1694         assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
1695         continue;
1696       }
1697 
1698     // write back DSE - If we write back the same value we just loaded from
1699     // the same location and haven't passed any intervening writes or ordering
1700     // operations, we can remove the write.  The primary benefit is in allowing
1701     // the available load table to remain valid and value forward past where
1702     // the store originally was.
1703     if (MemInst.isValid() && MemInst.isStore()) {
1704       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1705       if (InVal.DefInst &&
1706           InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1707         // It is okay to have a LastStore to a different pointer here if MemorySSA
1708         // tells us that the load and store are from the same memory generation.
1709         // In that case, LastStore should keep its present value since we're
1710         // removing the current store.
1711         assert((!LastStore ||
1712                 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1713                     MemInst.getPointerOperand() ||
1714                 MSSA) &&
1715                "can't have an intervening store if not using MemorySSA!");
1716         LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
1717         if (!DebugCounter::shouldExecute(CSECounter)) {
1718           LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1719           continue;
1720         }
1721         salvageKnowledge(&Inst, &AC);
1722         removeMSSA(Inst);
1723         Inst.eraseFromParent();
1724         Changed = true;
1725         ++NumDSE;
1726         // We can avoid incrementing the generation count since we were able
1727         // to eliminate this store.
1728         continue;
1729       }
1730     }
1731 
1732     // Okay, this isn't something we can CSE at all.  Check to see if it is
1733     // something that could modify memory.  If so, our available memory values
1734     // cannot be used so bump the generation count.
1735     if (Inst.mayWriteToMemory()) {
1736       ++CurrentGeneration;
1737 
1738       if (MemInst.isValid() && MemInst.isStore()) {
1739         // We do a trivial form of DSE if there are two stores to the same
1740         // location with no intervening loads.  Delete the earlier store.
1741         if (LastStore) {
1742           if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
1743             LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1744                               << "  due to: " << Inst << '\n');
1745             if (!DebugCounter::shouldExecute(CSECounter)) {
1746               LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1747             } else {
1748               salvageKnowledge(&Inst, &AC);
1749               removeMSSA(*LastStore);
1750               LastStore->eraseFromParent();
1751               Changed = true;
1752               ++NumDSE;
1753               LastStore = nullptr;
1754             }
1755           }
1756           // fallthrough - we can exploit information about this store
1757         }
1758 
1759         // Okay, we just invalidated anything we knew about loaded values.  Try
1760         // to salvage *something* by remembering that the stored value is a live
1761         // version of the pointer.  It is safe to forward from volatile stores
1762         // to non-volatile loads, so we don't have to check for volatility of
1763         // the store.
1764         AvailableLoads.insert(MemInst.getPointerOperand(),
1765                               LoadValue(&Inst, CurrentGeneration,
1766                                         MemInst.getMatchingId(),
1767                                         MemInst.isAtomic(),
1768                                         MemInst.isLoad()));
1769 
1770         // Remember that this was the last unordered store we saw for DSE. We
1771         // don't yet handle DSE on ordered or volatile stores since we don't
1772         // have a good way to model the ordering requirement for following
1773         // passes  once the store is removed.  We could insert a fence, but
1774         // since fences are slightly stronger than stores in their ordering,
1775         // it's not clear this is a profitable transform. Another option would
1776         // be to merge the ordering with that of the post dominating store.
1777         if (MemInst.isUnordered() && !MemInst.isVolatile())
1778           LastStore = &Inst;
1779         else
1780           LastStore = nullptr;
1781       }
1782     }
1783   }
1784 
1785   return Changed;
1786 }
1787 
1788 bool EarlyCSE::run() {
1789   // Note, deque is being used here because there is significant performance
1790   // gains over vector when the container becomes very large due to the
1791   // specific access patterns. For more information see the mailing list
1792   // discussion on this:
1793   // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1794   std::deque<StackNode *> nodesToProcess;
1795 
1796   bool Changed = false;
1797 
1798   // Process the root node.
1799   nodesToProcess.push_back(new StackNode(
1800       AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1801       AvailableGEPs, CurrentGeneration, DT.getRootNode(),
1802       DT.getRootNode()->begin(), DT.getRootNode()->end()));
1803 
1804   assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1805 
1806   // Process the stack.
1807   while (!nodesToProcess.empty()) {
1808     // Grab the first item off the stack. Set the current generation, remove
1809     // the node from the stack, and process it.
1810     StackNode *NodeToProcess = nodesToProcess.back();
1811 
1812     // Initialize class members.
1813     CurrentGeneration = NodeToProcess->currentGeneration();
1814 
1815     // Check if the node needs to be processed.
1816     if (!NodeToProcess->isProcessed()) {
1817       // Process the node.
1818       Changed |= processNode(NodeToProcess->node());
1819       NodeToProcess->childGeneration(CurrentGeneration);
1820       NodeToProcess->process();
1821     } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1822       // Push the next child onto the stack.
1823       DomTreeNode *child = NodeToProcess->nextChild();
1824       nodesToProcess.push_back(new StackNode(
1825           AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1826           AvailableGEPs, NodeToProcess->childGeneration(), child,
1827           child->begin(), child->end()));
1828     } else {
1829       // It has been processed, and there are no more children to process,
1830       // so delete it and pop it off the stack.
1831       delete NodeToProcess;
1832       nodesToProcess.pop_back();
1833     }
1834   } // while (!nodes...)
1835 
1836   return Changed;
1837 }
1838 
1839 PreservedAnalyses EarlyCSEPass::run(Function &F,
1840                                     FunctionAnalysisManager &AM) {
1841   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1842   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1843   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1844   auto &AC = AM.getResult<AssumptionAnalysis>(F);
1845   auto *MSSA =
1846       UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1847 
1848   EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
1849 
1850   if (!CSE.run())
1851     return PreservedAnalyses::all();
1852 
1853   PreservedAnalyses PA;
1854   PA.preserveSet<CFGAnalyses>();
1855   if (UseMemorySSA)
1856     PA.preserve<MemorySSAAnalysis>();
1857   return PA;
1858 }
1859 
1860 void EarlyCSEPass::printPipeline(
1861     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1862   static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
1863       OS, MapClassName2PassName);
1864   OS << '<';
1865   if (UseMemorySSA)
1866     OS << "memssa";
1867   OS << '>';
1868 }
1869 
1870 namespace {
1871 
1872 /// A simple and fast domtree-based CSE pass.
1873 ///
1874 /// This pass does a simple depth-first walk over the dominator tree,
1875 /// eliminating trivially redundant instructions and using instsimplify to
1876 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1877 /// cases so that instcombine and other passes are more effective. It is
1878 /// expected that a later pass of GVN will catch the interesting/hard cases.
1879 template<bool UseMemorySSA>
1880 class EarlyCSELegacyCommonPass : public FunctionPass {
1881 public:
1882   static char ID;
1883 
1884   EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1885     if (UseMemorySSA)
1886       initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
1887     else
1888       initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
1889   }
1890 
1891   bool runOnFunction(Function &F) override {
1892     if (skipFunction(F))
1893       return false;
1894 
1895     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1896     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1897     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1898     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1899     auto *MSSA =
1900         UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1901 
1902     EarlyCSE CSE(F.getDataLayout(), TLI, TTI, DT, AC, MSSA);
1903 
1904     return CSE.run();
1905   }
1906 
1907   void getAnalysisUsage(AnalysisUsage &AU) const override {
1908     AU.addRequired<AssumptionCacheTracker>();
1909     AU.addRequired<DominatorTreeWrapperPass>();
1910     AU.addRequired<TargetLibraryInfoWrapperPass>();
1911     AU.addRequired<TargetTransformInfoWrapperPass>();
1912     if (UseMemorySSA) {
1913       AU.addRequired<AAResultsWrapperPass>();
1914       AU.addRequired<MemorySSAWrapperPass>();
1915       AU.addPreserved<MemorySSAWrapperPass>();
1916     }
1917     AU.addPreserved<GlobalsAAWrapperPass>();
1918     AU.addPreserved<AAResultsWrapperPass>();
1919     AU.setPreservesCFG();
1920   }
1921 };
1922 
1923 } // end anonymous namespace
1924 
1925 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1926 
1927 template<>
1928 char EarlyCSELegacyPass::ID = 0;
1929 
1930 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1931                       false)
1932 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1933 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1934 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1935 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1936 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1937 
1938 using EarlyCSEMemSSALegacyPass =
1939     EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1940 
1941 template<>
1942 char EarlyCSEMemSSALegacyPass::ID = 0;
1943 
1944 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1945   if (UseMemorySSA)
1946     return new EarlyCSEMemSSALegacyPass();
1947   else
1948     return new EarlyCSELegacyPass();
1949 }
1950 
1951 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1952                       "Early CSE w/ MemorySSA", false, false)
1953 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1954 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1955 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1956 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1957 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1958 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1959 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1960                     "Early CSE w/ MemorySSA", false, false)
1961