1 //===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs a simple dominator tree walk that eliminates trivially
10 // redundant instructions.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Transforms/Scalar/EarlyCSE.h"
15 #include "llvm/ADT/DenseMapInfo.h"
16 #include "llvm/ADT/Hashing.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/ScopedHashTable.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/AssumptionCache.h"
22 #include "llvm/Analysis/GlobalsModRef.h"
23 #include "llvm/Analysis/GuardUtils.h"
24 #include "llvm/Analysis/InstructionSimplify.h"
25 #include "llvm/Analysis/MemorySSA.h"
26 #include "llvm/Analysis/MemorySSAUpdater.h"
27 #include "llvm/Analysis/TargetLibraryInfo.h"
28 #include "llvm/Analysis/TargetTransformInfo.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/Constants.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/InstrTypes.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/LLVMContext.h"
39 #include "llvm/IR/PassManager.h"
40 #include "llvm/IR/PatternMatch.h"
41 #include "llvm/IR/Type.h"
42 #include "llvm/IR/Value.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/Support/Allocator.h"
46 #include "llvm/Support/AtomicOrdering.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/DebugCounter.h"
50 #include "llvm/Support/RecyclingAllocator.h"
51 #include "llvm/Support/raw_ostream.h"
52 #include "llvm/Transforms/Scalar.h"
53 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
54 #include "llvm/Transforms/Utils/Local.h"
55 #include <cassert>
56 #include <deque>
57 #include <memory>
58 #include <utility>
59
60 using namespace llvm;
61 using namespace llvm::PatternMatch;
62
63 #define DEBUG_TYPE "early-cse"
64
65 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
66 STATISTIC(NumCSE, "Number of instructions CSE'd");
67 STATISTIC(NumCSECVP, "Number of compare instructions CVP'd");
68 STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
69 STATISTIC(NumCSECall, "Number of call instructions CSE'd");
70 STATISTIC(NumDSE, "Number of trivial dead stores removed");
71
72 DEBUG_COUNTER(CSECounter, "early-cse",
73 "Controls which instructions are removed");
74
75 static cl::opt<unsigned> EarlyCSEMssaOptCap(
76 "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
77 cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
78 "for faster compile. Caps the MemorySSA clobbering calls."));
79
80 static cl::opt<bool> EarlyCSEDebugHash(
81 "earlycse-debug-hash", cl::init(false), cl::Hidden,
82 cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
83 "function is well-behaved w.r.t. its isEqual predicate"));
84
85 //===----------------------------------------------------------------------===//
86 // SimpleValue
87 //===----------------------------------------------------------------------===//
88
89 namespace {
90
91 /// Struct representing the available values in the scoped hash table.
92 struct SimpleValue {
93 Instruction *Inst;
94
SimpleValue__anon76457fe20111::SimpleValue95 SimpleValue(Instruction *I) : Inst(I) {
96 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
97 }
98
isSentinel__anon76457fe20111::SimpleValue99 bool isSentinel() const {
100 return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
101 Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
102 }
103
canHandle__anon76457fe20111::SimpleValue104 static bool canHandle(Instruction *Inst) {
105 // This can only handle non-void readnone functions.
106 // Also handled are constrained intrinsic that look like the types
107 // of instruction handled below (UnaryOperator, etc.).
108 if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
109 if (Function *F = CI->getCalledFunction()) {
110 switch ((Intrinsic::ID)F->getIntrinsicID()) {
111 case Intrinsic::experimental_constrained_fadd:
112 case Intrinsic::experimental_constrained_fsub:
113 case Intrinsic::experimental_constrained_fmul:
114 case Intrinsic::experimental_constrained_fdiv:
115 case Intrinsic::experimental_constrained_frem:
116 case Intrinsic::experimental_constrained_fptosi:
117 case Intrinsic::experimental_constrained_sitofp:
118 case Intrinsic::experimental_constrained_fptoui:
119 case Intrinsic::experimental_constrained_uitofp:
120 case Intrinsic::experimental_constrained_fcmp:
121 case Intrinsic::experimental_constrained_fcmps: {
122 auto *CFP = cast<ConstrainedFPIntrinsic>(CI);
123 if (CFP->getExceptionBehavior() &&
124 CFP->getExceptionBehavior() == fp::ebStrict)
125 return false;
126 // Since we CSE across function calls we must not allow
127 // the rounding mode to change.
128 if (CFP->getRoundingMode() &&
129 CFP->getRoundingMode() == RoundingMode::Dynamic)
130 return false;
131 return true;
132 }
133 }
134 }
135 return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() &&
136 // FIXME: Currently the calls which may access the thread id may
137 // be considered as not accessing the memory. But this is
138 // problematic for coroutines, since coroutines may resume in a
139 // different thread. So we disable the optimization here for the
140 // correctness. However, it may block many other correct
141 // optimizations. Revert this one when we detect the memory
142 // accessing kind more precisely.
143 !CI->getFunction()->isPresplitCoroutine();
144 }
145 return isa<CastInst>(Inst) || isa<UnaryOperator>(Inst) ||
146 isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
147 isa<CmpInst>(Inst) || isa<SelectInst>(Inst) ||
148 isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
149 isa<ShuffleVectorInst>(Inst) || isa<ExtractValueInst>(Inst) ||
150 isa<InsertValueInst>(Inst) || isa<FreezeInst>(Inst);
151 }
152 };
153
154 } // end anonymous namespace
155
156 namespace llvm {
157
158 template <> struct DenseMapInfo<SimpleValue> {
getEmptyKeyllvm::DenseMapInfo159 static inline SimpleValue getEmptyKey() {
160 return DenseMapInfo<Instruction *>::getEmptyKey();
161 }
162
getTombstoneKeyllvm::DenseMapInfo163 static inline SimpleValue getTombstoneKey() {
164 return DenseMapInfo<Instruction *>::getTombstoneKey();
165 }
166
167 static unsigned getHashValue(SimpleValue Val);
168 static bool isEqual(SimpleValue LHS, SimpleValue RHS);
169 };
170
171 } // end namespace llvm
172
173 /// Match a 'select' including an optional 'not's of the condition.
matchSelectWithOptionalNotCond(Value * V,Value * & Cond,Value * & A,Value * & B,SelectPatternFlavor & Flavor)174 static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
175 Value *&B,
176 SelectPatternFlavor &Flavor) {
177 // Return false if V is not even a select.
178 if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
179 return false;
180
181 // Look through a 'not' of the condition operand by swapping A/B.
182 Value *CondNot;
183 if (match(Cond, m_Not(m_Value(CondNot)))) {
184 Cond = CondNot;
185 std::swap(A, B);
186 }
187
188 // Match canonical forms of min/max. We are not using ValueTracking's
189 // more powerful matchSelectPattern() because it may rely on instruction flags
190 // such as "nsw". That would be incompatible with the current hashing
191 // mechanism that may remove flags to increase the likelihood of CSE.
192
193 Flavor = SPF_UNKNOWN;
194 CmpInst::Predicate Pred;
195
196 if (!match(Cond, m_ICmp(Pred, m_Specific(A), m_Specific(B)))) {
197 // Check for commuted variants of min/max by swapping predicate.
198 // If we do not match the standard or commuted patterns, this is not a
199 // recognized form of min/max, but it is still a select, so return true.
200 if (!match(Cond, m_ICmp(Pred, m_Specific(B), m_Specific(A))))
201 return true;
202 Pred = ICmpInst::getSwappedPredicate(Pred);
203 }
204
205 switch (Pred) {
206 case CmpInst::ICMP_UGT: Flavor = SPF_UMAX; break;
207 case CmpInst::ICMP_ULT: Flavor = SPF_UMIN; break;
208 case CmpInst::ICMP_SGT: Flavor = SPF_SMAX; break;
209 case CmpInst::ICMP_SLT: Flavor = SPF_SMIN; break;
210 // Non-strict inequalities.
211 case CmpInst::ICMP_ULE: Flavor = SPF_UMIN; break;
212 case CmpInst::ICMP_UGE: Flavor = SPF_UMAX; break;
213 case CmpInst::ICMP_SLE: Flavor = SPF_SMIN; break;
214 case CmpInst::ICMP_SGE: Flavor = SPF_SMAX; break;
215 default: break;
216 }
217
218 return true;
219 }
220
getHashValueImpl(SimpleValue Val)221 static unsigned getHashValueImpl(SimpleValue Val) {
222 Instruction *Inst = Val.Inst;
223 // Hash in all of the operands as pointers.
224 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
225 Value *LHS = BinOp->getOperand(0);
226 Value *RHS = BinOp->getOperand(1);
227 if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
228 std::swap(LHS, RHS);
229
230 return hash_combine(BinOp->getOpcode(), LHS, RHS);
231 }
232
233 if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
234 // Compares can be commuted by swapping the comparands and
235 // updating the predicate. Choose the form that has the
236 // comparands in sorted order, or in the case of a tie, the
237 // one with the lower predicate.
238 Value *LHS = CI->getOperand(0);
239 Value *RHS = CI->getOperand(1);
240 CmpInst::Predicate Pred = CI->getPredicate();
241 CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
242 if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
243 std::swap(LHS, RHS);
244 Pred = SwappedPred;
245 }
246 return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
247 }
248
249 // Hash general selects to allow matching commuted true/false operands.
250 SelectPatternFlavor SPF;
251 Value *Cond, *A, *B;
252 if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
253 // Hash min/max (cmp + select) to allow for commuted operands.
254 // Min/max may also have non-canonical compare predicate (eg, the compare for
255 // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
256 // compare.
257 // TODO: We should also detect FP min/max.
258 if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
259 SPF == SPF_UMIN || SPF == SPF_UMAX) {
260 if (A > B)
261 std::swap(A, B);
262 return hash_combine(Inst->getOpcode(), SPF, A, B);
263 }
264
265 // Hash general selects to allow matching commuted true/false operands.
266
267 // If we do not have a compare as the condition, just hash in the condition.
268 CmpInst::Predicate Pred;
269 Value *X, *Y;
270 if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
271 return hash_combine(Inst->getOpcode(), Cond, A, B);
272
273 // Similar to cmp normalization (above) - canonicalize the predicate value:
274 // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
275 if (CmpInst::getInversePredicate(Pred) < Pred) {
276 Pred = CmpInst::getInversePredicate(Pred);
277 std::swap(A, B);
278 }
279 return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
280 }
281
282 if (CastInst *CI = dyn_cast<CastInst>(Inst))
283 return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
284
285 if (FreezeInst *FI = dyn_cast<FreezeInst>(Inst))
286 return hash_combine(FI->getOpcode(), FI->getOperand(0));
287
288 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
289 return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
290 hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
291
292 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
293 return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
294 IVI->getOperand(1),
295 hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
296
297 assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
298 isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
299 isa<ShuffleVectorInst>(Inst) || isa<UnaryOperator>(Inst) ||
300 isa<FreezeInst>(Inst)) &&
301 "Invalid/unknown instruction");
302
303 // Handle intrinsics with commutative operands.
304 // TODO: Extend this to handle intrinsics with >2 operands where the 1st
305 // 2 operands are commutative.
306 auto *II = dyn_cast<IntrinsicInst>(Inst);
307 if (II && II->isCommutative() && II->arg_size() == 2) {
308 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
309 if (LHS > RHS)
310 std::swap(LHS, RHS);
311 return hash_combine(II->getOpcode(), LHS, RHS);
312 }
313
314 // gc.relocate is 'special' call: its second and third operands are
315 // not real values, but indices into statepoint's argument list.
316 // Get values they point to.
317 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(Inst))
318 return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
319 GCR->getBasePtr(), GCR->getDerivedPtr());
320
321 // Mix in the opcode.
322 return hash_combine(
323 Inst->getOpcode(),
324 hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
325 }
326
getHashValue(SimpleValue Val)327 unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
328 #ifndef NDEBUG
329 // If -earlycse-debug-hash was specified, return a constant -- this
330 // will force all hashing to collide, so we'll exhaustively search
331 // the table for a match, and the assertion in isEqual will fire if
332 // there's a bug causing equal keys to hash differently.
333 if (EarlyCSEDebugHash)
334 return 0;
335 #endif
336 return getHashValueImpl(Val);
337 }
338
isEqualImpl(SimpleValue LHS,SimpleValue RHS)339 static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
340 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
341
342 if (LHS.isSentinel() || RHS.isSentinel())
343 return LHSI == RHSI;
344
345 if (LHSI->getOpcode() != RHSI->getOpcode())
346 return false;
347 if (LHSI->isIdenticalToWhenDefined(RHSI))
348 return true;
349
350 // If we're not strictly identical, we still might be a commutable instruction
351 if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
352 if (!LHSBinOp->isCommutative())
353 return false;
354
355 assert(isa<BinaryOperator>(RHSI) &&
356 "same opcode, but different instruction type?");
357 BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
358
359 // Commuted equality
360 return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
361 LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
362 }
363 if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
364 assert(isa<CmpInst>(RHSI) &&
365 "same opcode, but different instruction type?");
366 CmpInst *RHSCmp = cast<CmpInst>(RHSI);
367 // Commuted equality
368 return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
369 LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
370 LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
371 }
372
373 // TODO: Extend this for >2 args by matching the trailing N-2 args.
374 auto *LII = dyn_cast<IntrinsicInst>(LHSI);
375 auto *RII = dyn_cast<IntrinsicInst>(RHSI);
376 if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
377 LII->isCommutative() && LII->arg_size() == 2) {
378 return LII->getArgOperand(0) == RII->getArgOperand(1) &&
379 LII->getArgOperand(1) == RII->getArgOperand(0);
380 }
381
382 // See comment above in `getHashValue()`.
383 if (const GCRelocateInst *GCR1 = dyn_cast<GCRelocateInst>(LHSI))
384 if (const GCRelocateInst *GCR2 = dyn_cast<GCRelocateInst>(RHSI))
385 return GCR1->getOperand(0) == GCR2->getOperand(0) &&
386 GCR1->getBasePtr() == GCR2->getBasePtr() &&
387 GCR1->getDerivedPtr() == GCR2->getDerivedPtr();
388
389 // Min/max can occur with commuted operands, non-canonical predicates,
390 // and/or non-canonical operands.
391 // Selects can be non-trivially equivalent via inverted conditions and swaps.
392 SelectPatternFlavor LSPF, RSPF;
393 Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
394 if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
395 matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
396 if (LSPF == RSPF) {
397 // TODO: We should also detect FP min/max.
398 if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
399 LSPF == SPF_UMIN || LSPF == SPF_UMAX)
400 return ((LHSA == RHSA && LHSB == RHSB) ||
401 (LHSA == RHSB && LHSB == RHSA));
402
403 // select Cond, A, B <--> select not(Cond), B, A
404 if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
405 return true;
406 }
407
408 // If the true/false operands are swapped and the conditions are compares
409 // with inverted predicates, the selects are equal:
410 // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
411 //
412 // This also handles patterns with a double-negation in the sense of not +
413 // inverse, because we looked through a 'not' in the matching function and
414 // swapped A/B:
415 // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
416 //
417 // This intentionally does NOT handle patterns with a double-negation in
418 // the sense of not + not, because doing so could result in values
419 // comparing
420 // as equal that hash differently in the min/max cases like:
421 // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
422 // ^ hashes as min ^ would not hash as min
423 // In the context of the EarlyCSE pass, however, such cases never reach
424 // this code, as we simplify the double-negation before hashing the second
425 // select (and so still succeed at CSEing them).
426 if (LHSA == RHSB && LHSB == RHSA) {
427 CmpInst::Predicate PredL, PredR;
428 Value *X, *Y;
429 if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
430 match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
431 CmpInst::getInversePredicate(PredL) == PredR)
432 return true;
433 }
434 }
435
436 return false;
437 }
438
isEqual(SimpleValue LHS,SimpleValue RHS)439 bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
440 // These comparisons are nontrivial, so assert that equality implies
441 // hash equality (DenseMap demands this as an invariant).
442 bool Result = isEqualImpl(LHS, RHS);
443 assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
444 getHashValueImpl(LHS) == getHashValueImpl(RHS));
445 return Result;
446 }
447
448 //===----------------------------------------------------------------------===//
449 // CallValue
450 //===----------------------------------------------------------------------===//
451
452 namespace {
453
454 /// Struct representing the available call values in the scoped hash
455 /// table.
456 struct CallValue {
457 Instruction *Inst;
458
CallValue__anon76457fe20211::CallValue459 CallValue(Instruction *I) : Inst(I) {
460 assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
461 }
462
isSentinel__anon76457fe20211::CallValue463 bool isSentinel() const {
464 return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
465 Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
466 }
467
canHandle__anon76457fe20211::CallValue468 static bool canHandle(Instruction *Inst) {
469 // Don't value number anything that returns void.
470 if (Inst->getType()->isVoidTy())
471 return false;
472
473 CallInst *CI = dyn_cast<CallInst>(Inst);
474 if (!CI || !CI->onlyReadsMemory() ||
475 // FIXME: Currently the calls which may access the thread id may
476 // be considered as not accessing the memory. But this is
477 // problematic for coroutines, since coroutines may resume in a
478 // different thread. So we disable the optimization here for the
479 // correctness. However, it may block many other correct
480 // optimizations. Revert this one when we detect the memory
481 // accessing kind more precisely.
482 CI->getFunction()->isPresplitCoroutine())
483 return false;
484 return true;
485 }
486 };
487
488 } // end anonymous namespace
489
490 namespace llvm {
491
492 template <> struct DenseMapInfo<CallValue> {
getEmptyKeyllvm::DenseMapInfo493 static inline CallValue getEmptyKey() {
494 return DenseMapInfo<Instruction *>::getEmptyKey();
495 }
496
getTombstoneKeyllvm::DenseMapInfo497 static inline CallValue getTombstoneKey() {
498 return DenseMapInfo<Instruction *>::getTombstoneKey();
499 }
500
501 static unsigned getHashValue(CallValue Val);
502 static bool isEqual(CallValue LHS, CallValue RHS);
503 };
504
505 } // end namespace llvm
506
getHashValue(CallValue Val)507 unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
508 Instruction *Inst = Val.Inst;
509
510 // Hash all of the operands as pointers and mix in the opcode.
511 return hash_combine(
512 Inst->getOpcode(),
513 hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
514 }
515
isEqual(CallValue LHS,CallValue RHS)516 bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
517 Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
518 if (LHS.isSentinel() || RHS.isSentinel())
519 return LHSI == RHSI;
520
521 return LHSI->isIdenticalTo(RHSI);
522 }
523
524 //===----------------------------------------------------------------------===//
525 // EarlyCSE implementation
526 //===----------------------------------------------------------------------===//
527
528 namespace {
529
530 /// A simple and fast domtree-based CSE pass.
531 ///
532 /// This pass does a simple depth-first walk over the dominator tree,
533 /// eliminating trivially redundant instructions and using instsimplify to
534 /// canonicalize things as it goes. It is intended to be fast and catch obvious
535 /// cases so that instcombine and other passes are more effective. It is
536 /// expected that a later pass of GVN will catch the interesting/hard cases.
537 class EarlyCSE {
538 public:
539 const TargetLibraryInfo &TLI;
540 const TargetTransformInfo &TTI;
541 DominatorTree &DT;
542 AssumptionCache &AC;
543 const SimplifyQuery SQ;
544 MemorySSA *MSSA;
545 std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
546
547 using AllocatorTy =
548 RecyclingAllocator<BumpPtrAllocator,
549 ScopedHashTableVal<SimpleValue, Value *>>;
550 using ScopedHTType =
551 ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
552 AllocatorTy>;
553
554 /// A scoped hash table of the current values of all of our simple
555 /// scalar expressions.
556 ///
557 /// As we walk down the domtree, we look to see if instructions are in this:
558 /// if so, we replace them with what we find, otherwise we insert them so
559 /// that dominated values can succeed in their lookup.
560 ScopedHTType AvailableValues;
561
562 /// A scoped hash table of the current values of previously encountered
563 /// memory locations.
564 ///
565 /// This allows us to get efficient access to dominating loads or stores when
566 /// we have a fully redundant load. In addition to the most recent load, we
567 /// keep track of a generation count of the read, which is compared against
568 /// the current generation count. The current generation count is incremented
569 /// after every possibly writing memory operation, which ensures that we only
570 /// CSE loads with other loads that have no intervening store. Ordering
571 /// events (such as fences or atomic instructions) increment the generation
572 /// count as well; essentially, we model these as writes to all possible
573 /// locations. Note that atomic and/or volatile loads and stores can be
574 /// present the table; it is the responsibility of the consumer to inspect
575 /// the atomicity/volatility if needed.
576 struct LoadValue {
577 Instruction *DefInst = nullptr;
578 unsigned Generation = 0;
579 int MatchingId = -1;
580 bool IsAtomic = false;
581
582 LoadValue() = default;
LoadValue__anon76457fe20311::EarlyCSE::LoadValue583 LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
584 bool IsAtomic)
585 : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
586 IsAtomic(IsAtomic) {}
587 };
588
589 using LoadMapAllocator =
590 RecyclingAllocator<BumpPtrAllocator,
591 ScopedHashTableVal<Value *, LoadValue>>;
592 using LoadHTType =
593 ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
594 LoadMapAllocator>;
595
596 LoadHTType AvailableLoads;
597
598 // A scoped hash table mapping memory locations (represented as typed
599 // addresses) to generation numbers at which that memory location became
600 // (henceforth indefinitely) invariant.
601 using InvariantMapAllocator =
602 RecyclingAllocator<BumpPtrAllocator,
603 ScopedHashTableVal<MemoryLocation, unsigned>>;
604 using InvariantHTType =
605 ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,
606 InvariantMapAllocator>;
607 InvariantHTType AvailableInvariants;
608
609 /// A scoped hash table of the current values of read-only call
610 /// values.
611 ///
612 /// It uses the same generation count as loads.
613 using CallHTType =
614 ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
615 CallHTType AvailableCalls;
616
617 /// This is the current generation of the memory value.
618 unsigned CurrentGeneration = 0;
619
620 /// Set up the EarlyCSE runner for a particular function.
EarlyCSE(const DataLayout & DL,const TargetLibraryInfo & TLI,const TargetTransformInfo & TTI,DominatorTree & DT,AssumptionCache & AC,MemorySSA * MSSA)621 EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
622 const TargetTransformInfo &TTI, DominatorTree &DT,
623 AssumptionCache &AC, MemorySSA *MSSA)
624 : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
625 MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {}
626
627 bool run();
628
629 private:
630 unsigned ClobberCounter = 0;
631 // Almost a POD, but needs to call the constructors for the scoped hash
632 // tables so that a new scope gets pushed on. These are RAII so that the
633 // scope gets popped when the NodeScope is destroyed.
634 class NodeScope {
635 public:
NodeScope(ScopedHTType & AvailableValues,LoadHTType & AvailableLoads,InvariantHTType & AvailableInvariants,CallHTType & AvailableCalls)636 NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
637 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
638 : Scope(AvailableValues), LoadScope(AvailableLoads),
639 InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
640 NodeScope(const NodeScope &) = delete;
641 NodeScope &operator=(const NodeScope &) = delete;
642
643 private:
644 ScopedHTType::ScopeTy Scope;
645 LoadHTType::ScopeTy LoadScope;
646 InvariantHTType::ScopeTy InvariantScope;
647 CallHTType::ScopeTy CallScope;
648 };
649
650 // Contains all the needed information to create a stack for doing a depth
651 // first traversal of the tree. This includes scopes for values, loads, and
652 // calls as well as the generation. There is a child iterator so that the
653 // children do not need to be store separately.
654 class StackNode {
655 public:
StackNode(ScopedHTType & AvailableValues,LoadHTType & AvailableLoads,InvariantHTType & AvailableInvariants,CallHTType & AvailableCalls,unsigned cg,DomTreeNode * n,DomTreeNode::const_iterator child,DomTreeNode::const_iterator end)656 StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
657 InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
658 unsigned cg, DomTreeNode *n, DomTreeNode::const_iterator child,
659 DomTreeNode::const_iterator end)
660 : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
661 EndIter(end),
662 Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
663 AvailableCalls)
664 {}
665 StackNode(const StackNode &) = delete;
666 StackNode &operator=(const StackNode &) = delete;
667
668 // Accessors.
currentGeneration() const669 unsigned currentGeneration() const { return CurrentGeneration; }
childGeneration() const670 unsigned childGeneration() const { return ChildGeneration; }
childGeneration(unsigned generation)671 void childGeneration(unsigned generation) { ChildGeneration = generation; }
node()672 DomTreeNode *node() { return Node; }
childIter() const673 DomTreeNode::const_iterator childIter() const { return ChildIter; }
674
nextChild()675 DomTreeNode *nextChild() {
676 DomTreeNode *child = *ChildIter;
677 ++ChildIter;
678 return child;
679 }
680
end() const681 DomTreeNode::const_iterator end() const { return EndIter; }
isProcessed() const682 bool isProcessed() const { return Processed; }
process()683 void process() { Processed = true; }
684
685 private:
686 unsigned CurrentGeneration;
687 unsigned ChildGeneration;
688 DomTreeNode *Node;
689 DomTreeNode::const_iterator ChildIter;
690 DomTreeNode::const_iterator EndIter;
691 NodeScope Scopes;
692 bool Processed = false;
693 };
694
695 /// Wrapper class to handle memory instructions, including loads,
696 /// stores and intrinsic loads and stores defined by the target.
697 class ParseMemoryInst {
698 public:
ParseMemoryInst(Instruction * Inst,const TargetTransformInfo & TTI)699 ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
700 : Inst(Inst) {
701 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
702 IntrID = II->getIntrinsicID();
703 if (TTI.getTgtMemIntrinsic(II, Info))
704 return;
705 if (isHandledNonTargetIntrinsic(IntrID)) {
706 switch (IntrID) {
707 case Intrinsic::masked_load:
708 Info.PtrVal = Inst->getOperand(0);
709 Info.MatchingId = Intrinsic::masked_load;
710 Info.ReadMem = true;
711 Info.WriteMem = false;
712 Info.IsVolatile = false;
713 break;
714 case Intrinsic::masked_store:
715 Info.PtrVal = Inst->getOperand(1);
716 // Use the ID of masked load as the "matching id". This will
717 // prevent matching non-masked loads/stores with masked ones
718 // (which could be done), but at the moment, the code here
719 // does not support matching intrinsics with non-intrinsics,
720 // so keep the MatchingIds specific to masked instructions
721 // for now (TODO).
722 Info.MatchingId = Intrinsic::masked_load;
723 Info.ReadMem = false;
724 Info.WriteMem = true;
725 Info.IsVolatile = false;
726 break;
727 }
728 }
729 }
730 }
731
get()732 Instruction *get() { return Inst; }
get() const733 const Instruction *get() const { return Inst; }
734
isLoad() const735 bool isLoad() const {
736 if (IntrID != 0)
737 return Info.ReadMem;
738 return isa<LoadInst>(Inst);
739 }
740
isStore() const741 bool isStore() const {
742 if (IntrID != 0)
743 return Info.WriteMem;
744 return isa<StoreInst>(Inst);
745 }
746
isAtomic() const747 bool isAtomic() const {
748 if (IntrID != 0)
749 return Info.Ordering != AtomicOrdering::NotAtomic;
750 return Inst->isAtomic();
751 }
752
isUnordered() const753 bool isUnordered() const {
754 if (IntrID != 0)
755 return Info.isUnordered();
756
757 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
758 return LI->isUnordered();
759 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
760 return SI->isUnordered();
761 }
762 // Conservative answer
763 return !Inst->isAtomic();
764 }
765
isVolatile() const766 bool isVolatile() const {
767 if (IntrID != 0)
768 return Info.IsVolatile;
769
770 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
771 return LI->isVolatile();
772 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
773 return SI->isVolatile();
774 }
775 // Conservative answer
776 return true;
777 }
778
isInvariantLoad() const779 bool isInvariantLoad() const {
780 if (auto *LI = dyn_cast<LoadInst>(Inst))
781 return LI->hasMetadata(LLVMContext::MD_invariant_load);
782 return false;
783 }
784
isValid() const785 bool isValid() const { return getPointerOperand() != nullptr; }
786
787 // For regular (non-intrinsic) loads/stores, this is set to -1. For
788 // intrinsic loads/stores, the id is retrieved from the corresponding
789 // field in the MemIntrinsicInfo structure. That field contains
790 // non-negative values only.
getMatchingId() const791 int getMatchingId() const {
792 if (IntrID != 0)
793 return Info.MatchingId;
794 return -1;
795 }
796
getPointerOperand() const797 Value *getPointerOperand() const {
798 if (IntrID != 0)
799 return Info.PtrVal;
800 return getLoadStorePointerOperand(Inst);
801 }
802
getValueType() const803 Type *getValueType() const {
804 // TODO: handle target-specific intrinsics.
805 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
806 switch (II->getIntrinsicID()) {
807 case Intrinsic::masked_load:
808 return II->getType();
809 case Intrinsic::masked_store:
810 return II->getArgOperand(0)->getType();
811 default:
812 return nullptr;
813 }
814 }
815 return getLoadStoreType(Inst);
816 }
817
mayReadFromMemory() const818 bool mayReadFromMemory() const {
819 if (IntrID != 0)
820 return Info.ReadMem;
821 return Inst->mayReadFromMemory();
822 }
823
mayWriteToMemory() const824 bool mayWriteToMemory() const {
825 if (IntrID != 0)
826 return Info.WriteMem;
827 return Inst->mayWriteToMemory();
828 }
829
830 private:
831 Intrinsic::ID IntrID = 0;
832 MemIntrinsicInfo Info;
833 Instruction *Inst;
834 };
835
836 // This function is to prevent accidentally passing a non-target
837 // intrinsic ID to TargetTransformInfo.
isHandledNonTargetIntrinsic(Intrinsic::ID ID)838 static bool isHandledNonTargetIntrinsic(Intrinsic::ID ID) {
839 switch (ID) {
840 case Intrinsic::masked_load:
841 case Intrinsic::masked_store:
842 return true;
843 }
844 return false;
845 }
isHandledNonTargetIntrinsic(const Value * V)846 static bool isHandledNonTargetIntrinsic(const Value *V) {
847 if (auto *II = dyn_cast<IntrinsicInst>(V))
848 return isHandledNonTargetIntrinsic(II->getIntrinsicID());
849 return false;
850 }
851
852 bool processNode(DomTreeNode *Node);
853
854 bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
855 const BasicBlock *BB, const BasicBlock *Pred);
856
857 Value *getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
858 unsigned CurrentGeneration);
859
860 bool overridingStores(const ParseMemoryInst &Earlier,
861 const ParseMemoryInst &Later);
862
getOrCreateResult(Value * Inst,Type * ExpectedType) const863 Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
864 // TODO: We could insert relevant casts on type mismatch here.
865 if (auto *LI = dyn_cast<LoadInst>(Inst))
866 return LI->getType() == ExpectedType ? LI : nullptr;
867 if (auto *SI = dyn_cast<StoreInst>(Inst)) {
868 Value *V = SI->getValueOperand();
869 return V->getType() == ExpectedType ? V : nullptr;
870 }
871 assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
872 auto *II = cast<IntrinsicInst>(Inst);
873 if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
874 return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
875 return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
876 }
877
getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst * II,Type * ExpectedType) const878 Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
879 Type *ExpectedType) const {
880 // TODO: We could insert relevant casts on type mismatch here.
881 switch (II->getIntrinsicID()) {
882 case Intrinsic::masked_load:
883 return II->getType() == ExpectedType ? II : nullptr;
884 case Intrinsic::masked_store: {
885 Value *V = II->getOperand(0);
886 return V->getType() == ExpectedType ? V : nullptr;
887 }
888 }
889 return nullptr;
890 }
891
892 /// Return true if the instruction is known to only operate on memory
893 /// provably invariant in the given "generation".
894 bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
895
896 bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
897 Instruction *EarlierInst, Instruction *LaterInst);
898
isNonTargetIntrinsicMatch(const IntrinsicInst * Earlier,const IntrinsicInst * Later)899 bool isNonTargetIntrinsicMatch(const IntrinsicInst *Earlier,
900 const IntrinsicInst *Later) {
901 auto IsSubmask = [](const Value *Mask0, const Value *Mask1) {
902 // Is Mask0 a submask of Mask1?
903 if (Mask0 == Mask1)
904 return true;
905 if (isa<UndefValue>(Mask0) || isa<UndefValue>(Mask1))
906 return false;
907 auto *Vec0 = dyn_cast<ConstantVector>(Mask0);
908 auto *Vec1 = dyn_cast<ConstantVector>(Mask1);
909 if (!Vec0 || !Vec1)
910 return false;
911 if (Vec0->getType() != Vec1->getType())
912 return false;
913 for (int i = 0, e = Vec0->getNumOperands(); i != e; ++i) {
914 Constant *Elem0 = Vec0->getOperand(i);
915 Constant *Elem1 = Vec1->getOperand(i);
916 auto *Int0 = dyn_cast<ConstantInt>(Elem0);
917 if (Int0 && Int0->isZero())
918 continue;
919 auto *Int1 = dyn_cast<ConstantInt>(Elem1);
920 if (Int1 && !Int1->isZero())
921 continue;
922 if (isa<UndefValue>(Elem0) || isa<UndefValue>(Elem1))
923 return false;
924 if (Elem0 == Elem1)
925 continue;
926 return false;
927 }
928 return true;
929 };
930 auto PtrOp = [](const IntrinsicInst *II) {
931 if (II->getIntrinsicID() == Intrinsic::masked_load)
932 return II->getOperand(0);
933 if (II->getIntrinsicID() == Intrinsic::masked_store)
934 return II->getOperand(1);
935 llvm_unreachable("Unexpected IntrinsicInst");
936 };
937 auto MaskOp = [](const IntrinsicInst *II) {
938 if (II->getIntrinsicID() == Intrinsic::masked_load)
939 return II->getOperand(2);
940 if (II->getIntrinsicID() == Intrinsic::masked_store)
941 return II->getOperand(3);
942 llvm_unreachable("Unexpected IntrinsicInst");
943 };
944 auto ThruOp = [](const IntrinsicInst *II) {
945 if (II->getIntrinsicID() == Intrinsic::masked_load)
946 return II->getOperand(3);
947 llvm_unreachable("Unexpected IntrinsicInst");
948 };
949
950 if (PtrOp(Earlier) != PtrOp(Later))
951 return false;
952
953 Intrinsic::ID IDE = Earlier->getIntrinsicID();
954 Intrinsic::ID IDL = Later->getIntrinsicID();
955 // We could really use specific intrinsic classes for masked loads
956 // and stores in IntrinsicInst.h.
957 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_load) {
958 // Trying to replace later masked load with the earlier one.
959 // Check that the pointers are the same, and
960 // - masks and pass-throughs are the same, or
961 // - replacee's pass-through is "undef" and replacer's mask is a
962 // super-set of the replacee's mask.
963 if (MaskOp(Earlier) == MaskOp(Later) && ThruOp(Earlier) == ThruOp(Later))
964 return true;
965 if (!isa<UndefValue>(ThruOp(Later)))
966 return false;
967 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
968 }
969 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_load) {
970 // Trying to replace a load of a stored value with the store's value.
971 // Check that the pointers are the same, and
972 // - load's mask is a subset of store's mask, and
973 // - load's pass-through is "undef".
974 if (!IsSubmask(MaskOp(Later), MaskOp(Earlier)))
975 return false;
976 return isa<UndefValue>(ThruOp(Later));
977 }
978 if (IDE == Intrinsic::masked_load && IDL == Intrinsic::masked_store) {
979 // Trying to remove a store of the loaded value.
980 // Check that the pointers are the same, and
981 // - store's mask is a subset of the load's mask.
982 return IsSubmask(MaskOp(Later), MaskOp(Earlier));
983 }
984 if (IDE == Intrinsic::masked_store && IDL == Intrinsic::masked_store) {
985 // Trying to remove a dead store (earlier).
986 // Check that the pointers are the same,
987 // - the to-be-removed store's mask is a subset of the other store's
988 // mask.
989 return IsSubmask(MaskOp(Earlier), MaskOp(Later));
990 }
991 return false;
992 }
993
removeMSSA(Instruction & Inst)994 void removeMSSA(Instruction &Inst) {
995 if (!MSSA)
996 return;
997 if (VerifyMemorySSA)
998 MSSA->verifyMemorySSA();
999 // Removing a store here can leave MemorySSA in an unoptimized state by
1000 // creating MemoryPhis that have identical arguments and by creating
1001 // MemoryUses whose defining access is not an actual clobber. The phi case
1002 // is handled by MemorySSA when passing OptimizePhis = true to
1003 // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated
1004 // by MemorySSA's getClobberingMemoryAccess.
1005 MSSAUpdater->removeMemoryAccess(&Inst, true);
1006 }
1007 };
1008
1009 } // end anonymous namespace
1010
1011 /// Determine if the memory referenced by LaterInst is from the same heap
1012 /// version as EarlierInst.
1013 /// This is currently called in two scenarios:
1014 ///
1015 /// load p
1016 /// ...
1017 /// load p
1018 ///
1019 /// and
1020 ///
1021 /// x = load p
1022 /// ...
1023 /// store x, p
1024 ///
1025 /// in both cases we want to verify that there are no possible writes to the
1026 /// memory referenced by p between the earlier and later instruction.
isSameMemGeneration(unsigned EarlierGeneration,unsigned LaterGeneration,Instruction * EarlierInst,Instruction * LaterInst)1027 bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
1028 unsigned LaterGeneration,
1029 Instruction *EarlierInst,
1030 Instruction *LaterInst) {
1031 // Check the simple memory generation tracking first.
1032 if (EarlierGeneration == LaterGeneration)
1033 return true;
1034
1035 if (!MSSA)
1036 return false;
1037
1038 // If MemorySSA has determined that one of EarlierInst or LaterInst does not
1039 // read/write memory, then we can safely return true here.
1040 // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
1041 // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
1042 // by also checking the MemorySSA MemoryAccess on the instruction. Initial
1043 // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
1044 // with the default optimization pipeline.
1045 auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
1046 if (!EarlierMA)
1047 return true;
1048 auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
1049 if (!LaterMA)
1050 return true;
1051
1052 // Since we know LaterDef dominates LaterInst and EarlierInst dominates
1053 // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
1054 // EarlierInst and LaterInst and neither can any other write that potentially
1055 // clobbers LaterInst.
1056 MemoryAccess *LaterDef;
1057 if (ClobberCounter < EarlyCSEMssaOptCap) {
1058 LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
1059 ClobberCounter++;
1060 } else
1061 LaterDef = LaterMA->getDefiningAccess();
1062
1063 return MSSA->dominates(LaterDef, EarlierMA);
1064 }
1065
isOperatingOnInvariantMemAt(Instruction * I,unsigned GenAt)1066 bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
1067 // A location loaded from with an invariant_load is assumed to *never* change
1068 // within the visible scope of the compilation.
1069 if (auto *LI = dyn_cast<LoadInst>(I))
1070 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1071 return true;
1072
1073 auto MemLocOpt = MemoryLocation::getOrNone(I);
1074 if (!MemLocOpt)
1075 // "target" intrinsic forms of loads aren't currently known to
1076 // MemoryLocation::get. TODO
1077 return false;
1078 MemoryLocation MemLoc = *MemLocOpt;
1079 if (!AvailableInvariants.count(MemLoc))
1080 return false;
1081
1082 // Is the generation at which this became invariant older than the
1083 // current one?
1084 return AvailableInvariants.lookup(MemLoc) <= GenAt;
1085 }
1086
handleBranchCondition(Instruction * CondInst,const BranchInst * BI,const BasicBlock * BB,const BasicBlock * Pred)1087 bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
1088 const BranchInst *BI, const BasicBlock *BB,
1089 const BasicBlock *Pred) {
1090 assert(BI->isConditional() && "Should be a conditional branch!");
1091 assert(BI->getCondition() == CondInst && "Wrong condition?");
1092 assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
1093 auto *TorF = (BI->getSuccessor(0) == BB)
1094 ? ConstantInt::getTrue(BB->getContext())
1095 : ConstantInt::getFalse(BB->getContext());
1096 auto MatchBinOp = [](Instruction *I, unsigned Opcode, Value *&LHS,
1097 Value *&RHS) {
1098 if (Opcode == Instruction::And &&
1099 match(I, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1100 return true;
1101 else if (Opcode == Instruction::Or &&
1102 match(I, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1103 return true;
1104 return false;
1105 };
1106 // If the condition is AND operation, we can propagate its operands into the
1107 // true branch. If it is OR operation, we can propagate them into the false
1108 // branch.
1109 unsigned PropagateOpcode =
1110 (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
1111
1112 bool MadeChanges = false;
1113 SmallVector<Instruction *, 4> WorkList;
1114 SmallPtrSet<Instruction *, 4> Visited;
1115 WorkList.push_back(CondInst);
1116 while (!WorkList.empty()) {
1117 Instruction *Curr = WorkList.pop_back_val();
1118
1119 AvailableValues.insert(Curr, TorF);
1120 LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
1121 << Curr->getName() << "' as " << *TorF << " in "
1122 << BB->getName() << "\n");
1123 if (!DebugCounter::shouldExecute(CSECounter)) {
1124 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1125 } else {
1126 // Replace all dominated uses with the known value.
1127 if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
1128 BasicBlockEdge(Pred, BB))) {
1129 NumCSECVP += Count;
1130 MadeChanges = true;
1131 }
1132 }
1133
1134 Value *LHS, *RHS;
1135 if (MatchBinOp(Curr, PropagateOpcode, LHS, RHS))
1136 for (auto *Op : { LHS, RHS })
1137 if (Instruction *OPI = dyn_cast<Instruction>(Op))
1138 if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
1139 WorkList.push_back(OPI);
1140 }
1141
1142 return MadeChanges;
1143 }
1144
getMatchingValue(LoadValue & InVal,ParseMemoryInst & MemInst,unsigned CurrentGeneration)1145 Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
1146 unsigned CurrentGeneration) {
1147 if (InVal.DefInst == nullptr)
1148 return nullptr;
1149 if (InVal.MatchingId != MemInst.getMatchingId())
1150 return nullptr;
1151 // We don't yet handle removing loads with ordering of any kind.
1152 if (MemInst.isVolatile() || !MemInst.isUnordered())
1153 return nullptr;
1154 // We can't replace an atomic load with one which isn't also atomic.
1155 if (MemInst.isLoad() && !InVal.IsAtomic && MemInst.isAtomic())
1156 return nullptr;
1157 // The value V returned from this function is used differently depending
1158 // on whether MemInst is a load or a store. If it's a load, we will replace
1159 // MemInst with V, if it's a store, we will check if V is the same as the
1160 // available value.
1161 bool MemInstMatching = !MemInst.isLoad();
1162 Instruction *Matching = MemInstMatching ? MemInst.get() : InVal.DefInst;
1163 Instruction *Other = MemInstMatching ? InVal.DefInst : MemInst.get();
1164
1165 // For stores check the result values before checking memory generation
1166 // (otherwise isSameMemGeneration may crash).
1167 Value *Result = MemInst.isStore()
1168 ? getOrCreateResult(Matching, Other->getType())
1169 : nullptr;
1170 if (MemInst.isStore() && InVal.DefInst != Result)
1171 return nullptr;
1172
1173 // Deal with non-target memory intrinsics.
1174 bool MatchingNTI = isHandledNonTargetIntrinsic(Matching);
1175 bool OtherNTI = isHandledNonTargetIntrinsic(Other);
1176 if (OtherNTI != MatchingNTI)
1177 return nullptr;
1178 if (OtherNTI && MatchingNTI) {
1179 if (!isNonTargetIntrinsicMatch(cast<IntrinsicInst>(InVal.DefInst),
1180 cast<IntrinsicInst>(MemInst.get())))
1181 return nullptr;
1182 }
1183
1184 if (!isOperatingOnInvariantMemAt(MemInst.get(), InVal.Generation) &&
1185 !isSameMemGeneration(InVal.Generation, CurrentGeneration, InVal.DefInst,
1186 MemInst.get()))
1187 return nullptr;
1188
1189 if (!Result)
1190 Result = getOrCreateResult(Matching, Other->getType());
1191 return Result;
1192 }
1193
overridingStores(const ParseMemoryInst & Earlier,const ParseMemoryInst & Later)1194 bool EarlyCSE::overridingStores(const ParseMemoryInst &Earlier,
1195 const ParseMemoryInst &Later) {
1196 // Can we remove Earlier store because of Later store?
1197
1198 assert(Earlier.isUnordered() && !Earlier.isVolatile() &&
1199 "Violated invariant");
1200 if (Earlier.getPointerOperand() != Later.getPointerOperand())
1201 return false;
1202 if (!Earlier.getValueType() || !Later.getValueType() ||
1203 Earlier.getValueType() != Later.getValueType())
1204 return false;
1205 if (Earlier.getMatchingId() != Later.getMatchingId())
1206 return false;
1207 // At the moment, we don't remove ordered stores, but do remove
1208 // unordered atomic stores. There's no special requirement (for
1209 // unordered atomics) about removing atomic stores only in favor of
1210 // other atomic stores since we were going to execute the non-atomic
1211 // one anyway and the atomic one might never have become visible.
1212 if (!Earlier.isUnordered() || !Later.isUnordered())
1213 return false;
1214
1215 // Deal with non-target memory intrinsics.
1216 bool ENTI = isHandledNonTargetIntrinsic(Earlier.get());
1217 bool LNTI = isHandledNonTargetIntrinsic(Later.get());
1218 if (ENTI && LNTI)
1219 return isNonTargetIntrinsicMatch(cast<IntrinsicInst>(Earlier.get()),
1220 cast<IntrinsicInst>(Later.get()));
1221
1222 // Because of the check above, at least one of them is false.
1223 // For now disallow matching intrinsics with non-intrinsics,
1224 // so assume that the stores match if neither is an intrinsic.
1225 return ENTI == LNTI;
1226 }
1227
processNode(DomTreeNode * Node)1228 bool EarlyCSE::processNode(DomTreeNode *Node) {
1229 bool Changed = false;
1230 BasicBlock *BB = Node->getBlock();
1231
1232 // If this block has a single predecessor, then the predecessor is the parent
1233 // of the domtree node and all of the live out memory values are still current
1234 // in this block. If this block has multiple predecessors, then they could
1235 // have invalidated the live-out memory values of our parent value. For now,
1236 // just be conservative and invalidate memory if this block has multiple
1237 // predecessors.
1238 if (!BB->getSinglePredecessor())
1239 ++CurrentGeneration;
1240
1241 // If this node has a single predecessor which ends in a conditional branch,
1242 // we can infer the value of the branch condition given that we took this
1243 // path. We need the single predecessor to ensure there's not another path
1244 // which reaches this block where the condition might hold a different
1245 // value. Since we're adding this to the scoped hash table (like any other
1246 // def), it will have been popped if we encounter a future merge block.
1247 if (BasicBlock *Pred = BB->getSinglePredecessor()) {
1248 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1249 if (BI && BI->isConditional()) {
1250 auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
1251 if (CondInst && SimpleValue::canHandle(CondInst))
1252 Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
1253 }
1254 }
1255
1256 /// LastStore - Keep track of the last non-volatile store that we saw... for
1257 /// as long as there in no instruction that reads memory. If we see a store
1258 /// to the same location, we delete the dead store. This zaps trivial dead
1259 /// stores which can occur in bitfield code among other things.
1260 Instruction *LastStore = nullptr;
1261
1262 // See if any instructions in the block can be eliminated. If so, do it. If
1263 // not, add them to AvailableValues.
1264 for (Instruction &Inst : make_early_inc_range(*BB)) {
1265 // Dead instructions should just be removed.
1266 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1267 LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << Inst << '\n');
1268 if (!DebugCounter::shouldExecute(CSECounter)) {
1269 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1270 continue;
1271 }
1272
1273 salvageKnowledge(&Inst, &AC);
1274 salvageDebugInfo(Inst);
1275 removeMSSA(Inst);
1276 Inst.eraseFromParent();
1277 Changed = true;
1278 ++NumSimplify;
1279 continue;
1280 }
1281
1282 // Skip assume intrinsics, they don't really have side effects (although
1283 // they're marked as such to ensure preservation of control dependencies),
1284 // and this pass will not bother with its removal. However, we should mark
1285 // its condition as true for all dominated blocks.
1286 if (auto *Assume = dyn_cast<AssumeInst>(&Inst)) {
1287 auto *CondI = dyn_cast<Instruction>(Assume->getArgOperand(0));
1288 if (CondI && SimpleValue::canHandle(CondI)) {
1289 LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << Inst
1290 << '\n');
1291 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1292 } else
1293 LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n');
1294 continue;
1295 }
1296
1297 // Likewise, noalias intrinsics don't actually write.
1298 if (match(&Inst,
1299 m_Intrinsic<Intrinsic::experimental_noalias_scope_decl>())) {
1300 LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst
1301 << '\n');
1302 continue;
1303 }
1304
1305 // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
1306 if (match(&Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
1307 LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n');
1308 continue;
1309 }
1310
1311 // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
1312 if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
1313 LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
1314 continue;
1315 }
1316
1317 // We can skip all invariant.start intrinsics since they only read memory,
1318 // and we can forward values across it. For invariant starts without
1319 // invariant ends, we can use the fact that the invariantness never ends to
1320 // start a scope in the current generaton which is true for all future
1321 // generations. Also, we dont need to consume the last store since the
1322 // semantics of invariant.start allow us to perform DSE of the last
1323 // store, if there was a store following invariant.start. Consider:
1324 //
1325 // store 30, i8* p
1326 // invariant.start(p)
1327 // store 40, i8* p
1328 // We can DSE the store to 30, since the store 40 to invariant location p
1329 // causes undefined behaviour.
1330 if (match(&Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
1331 // If there are any uses, the scope might end.
1332 if (!Inst.use_empty())
1333 continue;
1334 MemoryLocation MemLoc =
1335 MemoryLocation::getForArgument(&cast<CallInst>(Inst), 1, TLI);
1336 // Don't start a scope if we already have a better one pushed
1337 if (!AvailableInvariants.count(MemLoc))
1338 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1339 continue;
1340 }
1341
1342 if (isGuard(&Inst)) {
1343 if (auto *CondI =
1344 dyn_cast<Instruction>(cast<CallInst>(Inst).getArgOperand(0))) {
1345 if (SimpleValue::canHandle(CondI)) {
1346 // Do we already know the actual value of this condition?
1347 if (auto *KnownCond = AvailableValues.lookup(CondI)) {
1348 // Is the condition known to be true?
1349 if (isa<ConstantInt>(KnownCond) &&
1350 cast<ConstantInt>(KnownCond)->isOne()) {
1351 LLVM_DEBUG(dbgs()
1352 << "EarlyCSE removing guard: " << Inst << '\n');
1353 salvageKnowledge(&Inst, &AC);
1354 removeMSSA(Inst);
1355 Inst.eraseFromParent();
1356 Changed = true;
1357 continue;
1358 } else
1359 // Use the known value if it wasn't true.
1360 cast<CallInst>(Inst).setArgOperand(0, KnownCond);
1361 }
1362 // The condition we're on guarding here is true for all dominated
1363 // locations.
1364 AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
1365 }
1366 }
1367
1368 // Guard intrinsics read all memory, but don't write any memory.
1369 // Accordingly, don't update the generation but consume the last store (to
1370 // avoid an incorrect DSE).
1371 LastStore = nullptr;
1372 continue;
1373 }
1374
1375 // If the instruction can be simplified (e.g. X+0 = X) then replace it with
1376 // its simpler value.
1377 if (Value *V = simplifyInstruction(&Inst, SQ)) {
1378 LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << Inst << " to: " << *V
1379 << '\n');
1380 if (!DebugCounter::shouldExecute(CSECounter)) {
1381 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1382 } else {
1383 bool Killed = false;
1384 if (!Inst.use_empty()) {
1385 Inst.replaceAllUsesWith(V);
1386 Changed = true;
1387 }
1388 if (isInstructionTriviallyDead(&Inst, &TLI)) {
1389 salvageKnowledge(&Inst, &AC);
1390 removeMSSA(Inst);
1391 Inst.eraseFromParent();
1392 Changed = true;
1393 Killed = true;
1394 }
1395 if (Changed)
1396 ++NumSimplify;
1397 if (Killed)
1398 continue;
1399 }
1400 }
1401
1402 // If this is a simple instruction that we can value number, process it.
1403 if (SimpleValue::canHandle(&Inst)) {
1404 if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(&Inst)) {
1405 assert(CI->getExceptionBehavior() != fp::ebStrict &&
1406 "Unexpected ebStrict from SimpleValue::canHandle()");
1407 assert((!CI->getRoundingMode() ||
1408 CI->getRoundingMode() != RoundingMode::Dynamic) &&
1409 "Unexpected dynamic rounding from SimpleValue::canHandle()");
1410 }
1411 // See if the instruction has an available value. If so, use it.
1412 if (Value *V = AvailableValues.lookup(&Inst)) {
1413 LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << Inst << " to: " << *V
1414 << '\n');
1415 if (!DebugCounter::shouldExecute(CSECounter)) {
1416 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1417 continue;
1418 }
1419 if (auto *I = dyn_cast<Instruction>(V)) {
1420 // If I being poison triggers UB, there is no need to drop those
1421 // flags. Otherwise, only retain flags present on both I and Inst.
1422 // TODO: Currently some fast-math flags are not treated as
1423 // poison-generating even though they should. Until this is fixed,
1424 // always retain flags present on both I and Inst for floating point
1425 // instructions.
1426 if (isa<FPMathOperator>(I) || (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)))
1427 I->andIRFlags(&Inst);
1428 }
1429 Inst.replaceAllUsesWith(V);
1430 salvageKnowledge(&Inst, &AC);
1431 removeMSSA(Inst);
1432 Inst.eraseFromParent();
1433 Changed = true;
1434 ++NumCSE;
1435 continue;
1436 }
1437
1438 // Otherwise, just remember that this value is available.
1439 AvailableValues.insert(&Inst, &Inst);
1440 continue;
1441 }
1442
1443 ParseMemoryInst MemInst(&Inst, TTI);
1444 // If this is a non-volatile load, process it.
1445 if (MemInst.isValid() && MemInst.isLoad()) {
1446 // (conservatively) we can't peak past the ordering implied by this
1447 // operation, but we can add this load to our set of available values
1448 if (MemInst.isVolatile() || !MemInst.isUnordered()) {
1449 LastStore = nullptr;
1450 ++CurrentGeneration;
1451 }
1452
1453 if (MemInst.isInvariantLoad()) {
1454 // If we pass an invariant load, we know that memory location is
1455 // indefinitely constant from the moment of first dereferenceability.
1456 // We conservatively treat the invariant_load as that moment. If we
1457 // pass a invariant load after already establishing a scope, don't
1458 // restart it since we want to preserve the earliest point seen.
1459 auto MemLoc = MemoryLocation::get(&Inst);
1460 if (!AvailableInvariants.count(MemLoc))
1461 AvailableInvariants.insert(MemLoc, CurrentGeneration);
1462 }
1463
1464 // If we have an available version of this load, and if it is the right
1465 // generation or the load is known to be from an invariant location,
1466 // replace this instruction.
1467 //
1468 // If either the dominating load or the current load are invariant, then
1469 // we can assume the current load loads the same value as the dominating
1470 // load.
1471 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1472 if (Value *Op = getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1473 LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << Inst
1474 << " to: " << *InVal.DefInst << '\n');
1475 if (!DebugCounter::shouldExecute(CSECounter)) {
1476 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1477 continue;
1478 }
1479 if (!Inst.use_empty())
1480 Inst.replaceAllUsesWith(Op);
1481 salvageKnowledge(&Inst, &AC);
1482 removeMSSA(Inst);
1483 Inst.eraseFromParent();
1484 Changed = true;
1485 ++NumCSELoad;
1486 continue;
1487 }
1488
1489 // Otherwise, remember that we have this instruction.
1490 AvailableLoads.insert(MemInst.getPointerOperand(),
1491 LoadValue(&Inst, CurrentGeneration,
1492 MemInst.getMatchingId(),
1493 MemInst.isAtomic()));
1494 LastStore = nullptr;
1495 continue;
1496 }
1497
1498 // If this instruction may read from memory or throw (and potentially read
1499 // from memory in the exception handler), forget LastStore. Load/store
1500 // intrinsics will indicate both a read and a write to memory. The target
1501 // may override this (e.g. so that a store intrinsic does not read from
1502 // memory, and thus will be treated the same as a regular store for
1503 // commoning purposes).
1504 if ((Inst.mayReadFromMemory() || Inst.mayThrow()) &&
1505 !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
1506 LastStore = nullptr;
1507
1508 // If this is a read-only call, process it.
1509 if (CallValue::canHandle(&Inst)) {
1510 // If we have an available version of this call, and if it is the right
1511 // generation, replace this instruction.
1512 std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(&Inst);
1513 if (InVal.first != nullptr &&
1514 isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
1515 &Inst)) {
1516 LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << Inst
1517 << " to: " << *InVal.first << '\n');
1518 if (!DebugCounter::shouldExecute(CSECounter)) {
1519 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1520 continue;
1521 }
1522 if (!Inst.use_empty())
1523 Inst.replaceAllUsesWith(InVal.first);
1524 salvageKnowledge(&Inst, &AC);
1525 removeMSSA(Inst);
1526 Inst.eraseFromParent();
1527 Changed = true;
1528 ++NumCSECall;
1529 continue;
1530 }
1531
1532 // Otherwise, remember that we have this instruction.
1533 AvailableCalls.insert(&Inst, std::make_pair(&Inst, CurrentGeneration));
1534 continue;
1535 }
1536
1537 // A release fence requires that all stores complete before it, but does
1538 // not prevent the reordering of following loads 'before' the fence. As a
1539 // result, we don't need to consider it as writing to memory and don't need
1540 // to advance the generation. We do need to prevent DSE across the fence,
1541 // but that's handled above.
1542 if (auto *FI = dyn_cast<FenceInst>(&Inst))
1543 if (FI->getOrdering() == AtomicOrdering::Release) {
1544 assert(Inst.mayReadFromMemory() && "relied on to prevent DSE above");
1545 continue;
1546 }
1547
1548 // write back DSE - If we write back the same value we just loaded from
1549 // the same location and haven't passed any intervening writes or ordering
1550 // operations, we can remove the write. The primary benefit is in allowing
1551 // the available load table to remain valid and value forward past where
1552 // the store originally was.
1553 if (MemInst.isValid() && MemInst.isStore()) {
1554 LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
1555 if (InVal.DefInst &&
1556 InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) {
1557 // It is okay to have a LastStore to a different pointer here if MemorySSA
1558 // tells us that the load and store are from the same memory generation.
1559 // In that case, LastStore should keep its present value since we're
1560 // removing the current store.
1561 assert((!LastStore ||
1562 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
1563 MemInst.getPointerOperand() ||
1564 MSSA) &&
1565 "can't have an intervening store if not using MemorySSA!");
1566 LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << Inst << '\n');
1567 if (!DebugCounter::shouldExecute(CSECounter)) {
1568 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1569 continue;
1570 }
1571 salvageKnowledge(&Inst, &AC);
1572 removeMSSA(Inst);
1573 Inst.eraseFromParent();
1574 Changed = true;
1575 ++NumDSE;
1576 // We can avoid incrementing the generation count since we were able
1577 // to eliminate this store.
1578 continue;
1579 }
1580 }
1581
1582 // Okay, this isn't something we can CSE at all. Check to see if it is
1583 // something that could modify memory. If so, our available memory values
1584 // cannot be used so bump the generation count.
1585 if (Inst.mayWriteToMemory()) {
1586 ++CurrentGeneration;
1587
1588 if (MemInst.isValid() && MemInst.isStore()) {
1589 // We do a trivial form of DSE if there are two stores to the same
1590 // location with no intervening loads. Delete the earlier store.
1591 if (LastStore) {
1592 if (overridingStores(ParseMemoryInst(LastStore, TTI), MemInst)) {
1593 LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
1594 << " due to: " << Inst << '\n');
1595 if (!DebugCounter::shouldExecute(CSECounter)) {
1596 LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
1597 } else {
1598 salvageKnowledge(&Inst, &AC);
1599 removeMSSA(*LastStore);
1600 LastStore->eraseFromParent();
1601 Changed = true;
1602 ++NumDSE;
1603 LastStore = nullptr;
1604 }
1605 }
1606 // fallthrough - we can exploit information about this store
1607 }
1608
1609 // Okay, we just invalidated anything we knew about loaded values. Try
1610 // to salvage *something* by remembering that the stored value is a live
1611 // version of the pointer. It is safe to forward from volatile stores
1612 // to non-volatile loads, so we don't have to check for volatility of
1613 // the store.
1614 AvailableLoads.insert(MemInst.getPointerOperand(),
1615 LoadValue(&Inst, CurrentGeneration,
1616 MemInst.getMatchingId(),
1617 MemInst.isAtomic()));
1618
1619 // Remember that this was the last unordered store we saw for DSE. We
1620 // don't yet handle DSE on ordered or volatile stores since we don't
1621 // have a good way to model the ordering requirement for following
1622 // passes once the store is removed. We could insert a fence, but
1623 // since fences are slightly stronger than stores in their ordering,
1624 // it's not clear this is a profitable transform. Another option would
1625 // be to merge the ordering with that of the post dominating store.
1626 if (MemInst.isUnordered() && !MemInst.isVolatile())
1627 LastStore = &Inst;
1628 else
1629 LastStore = nullptr;
1630 }
1631 }
1632 }
1633
1634 return Changed;
1635 }
1636
run()1637 bool EarlyCSE::run() {
1638 // Note, deque is being used here because there is significant performance
1639 // gains over vector when the container becomes very large due to the
1640 // specific access patterns. For more information see the mailing list
1641 // discussion on this:
1642 // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
1643 std::deque<StackNode *> nodesToProcess;
1644
1645 bool Changed = false;
1646
1647 // Process the root node.
1648 nodesToProcess.push_back(new StackNode(
1649 AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
1650 CurrentGeneration, DT.getRootNode(),
1651 DT.getRootNode()->begin(), DT.getRootNode()->end()));
1652
1653 assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
1654
1655 // Process the stack.
1656 while (!nodesToProcess.empty()) {
1657 // Grab the first item off the stack. Set the current generation, remove
1658 // the node from the stack, and process it.
1659 StackNode *NodeToProcess = nodesToProcess.back();
1660
1661 // Initialize class members.
1662 CurrentGeneration = NodeToProcess->currentGeneration();
1663
1664 // Check if the node needs to be processed.
1665 if (!NodeToProcess->isProcessed()) {
1666 // Process the node.
1667 Changed |= processNode(NodeToProcess->node());
1668 NodeToProcess->childGeneration(CurrentGeneration);
1669 NodeToProcess->process();
1670 } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
1671 // Push the next child onto the stack.
1672 DomTreeNode *child = NodeToProcess->nextChild();
1673 nodesToProcess.push_back(
1674 new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
1675 AvailableCalls, NodeToProcess->childGeneration(),
1676 child, child->begin(), child->end()));
1677 } else {
1678 // It has been processed, and there are no more children to process,
1679 // so delete it and pop it off the stack.
1680 delete NodeToProcess;
1681 nodesToProcess.pop_back();
1682 }
1683 } // while (!nodes...)
1684
1685 return Changed;
1686 }
1687
run(Function & F,FunctionAnalysisManager & AM)1688 PreservedAnalyses EarlyCSEPass::run(Function &F,
1689 FunctionAnalysisManager &AM) {
1690 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1691 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1692 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1693 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1694 auto *MSSA =
1695 UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
1696
1697 EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1698
1699 if (!CSE.run())
1700 return PreservedAnalyses::all();
1701
1702 PreservedAnalyses PA;
1703 PA.preserveSet<CFGAnalyses>();
1704 if (UseMemorySSA)
1705 PA.preserve<MemorySSAAnalysis>();
1706 return PA;
1707 }
1708
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)1709 void EarlyCSEPass::printPipeline(
1710 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1711 static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
1712 OS, MapClassName2PassName);
1713 OS << "<";
1714 if (UseMemorySSA)
1715 OS << "memssa";
1716 OS << ">";
1717 }
1718
1719 namespace {
1720
1721 /// A simple and fast domtree-based CSE pass.
1722 ///
1723 /// This pass does a simple depth-first walk over the dominator tree,
1724 /// eliminating trivially redundant instructions and using instsimplify to
1725 /// canonicalize things as it goes. It is intended to be fast and catch obvious
1726 /// cases so that instcombine and other passes are more effective. It is
1727 /// expected that a later pass of GVN will catch the interesting/hard cases.
1728 template<bool UseMemorySSA>
1729 class EarlyCSELegacyCommonPass : public FunctionPass {
1730 public:
1731 static char ID;
1732
EarlyCSELegacyCommonPass()1733 EarlyCSELegacyCommonPass() : FunctionPass(ID) {
1734 if (UseMemorySSA)
1735 initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
1736 else
1737 initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
1738 }
1739
runOnFunction(Function & F)1740 bool runOnFunction(Function &F) override {
1741 if (skipFunction(F))
1742 return false;
1743
1744 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1745 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1746 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1747 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1748 auto *MSSA =
1749 UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
1750
1751 EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
1752
1753 return CSE.run();
1754 }
1755
getAnalysisUsage(AnalysisUsage & AU) const1756 void getAnalysisUsage(AnalysisUsage &AU) const override {
1757 AU.addRequired<AssumptionCacheTracker>();
1758 AU.addRequired<DominatorTreeWrapperPass>();
1759 AU.addRequired<TargetLibraryInfoWrapperPass>();
1760 AU.addRequired<TargetTransformInfoWrapperPass>();
1761 if (UseMemorySSA) {
1762 AU.addRequired<AAResultsWrapperPass>();
1763 AU.addRequired<MemorySSAWrapperPass>();
1764 AU.addPreserved<MemorySSAWrapperPass>();
1765 }
1766 AU.addPreserved<GlobalsAAWrapperPass>();
1767 AU.addPreserved<AAResultsWrapperPass>();
1768 AU.setPreservesCFG();
1769 }
1770 };
1771
1772 } // end anonymous namespace
1773
1774 using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
1775
1776 template<>
1777 char EarlyCSELegacyPass::ID = 0;
1778
1779 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
1780 false)
1781 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1782 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1783 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1784 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1785 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
1786
1787 using EarlyCSEMemSSALegacyPass =
1788 EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
1789
1790 template<>
1791 char EarlyCSEMemSSALegacyPass::ID = 0;
1792
createEarlyCSEPass(bool UseMemorySSA)1793 FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
1794 if (UseMemorySSA)
1795 return new EarlyCSEMemSSALegacyPass();
1796 else
1797 return new EarlyCSELegacyPass();
1798 }
1799
1800 INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1801 "Early CSE w/ MemorySSA", false, false)
1802 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1803 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1804 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1805 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1806 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1807 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
1808 INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
1809 "Early CSE w/ MemorySSA", false, false)
1810