1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs global value numbering to eliminate fully redundant 10 // instructions. It also performs simple dead load elimination. 11 // 12 // Note that this pass does the value numbering itself; it does not use the 13 // ValueNumbering analysis passes. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Scalar/GVN.h" 18 #include "llvm/ADT/DenseMap.h" 19 #include "llvm/ADT/DepthFirstIterator.h" 20 #include "llvm/ADT/Hashing.h" 21 #include "llvm/ADT/MapVector.h" 22 #include "llvm/ADT/PostOrderIterator.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SetVector.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/Analysis/AliasAnalysis.h" 29 #include "llvm/Analysis/AssumeBundleQueries.h" 30 #include "llvm/Analysis/AssumptionCache.h" 31 #include "llvm/Analysis/CFG.h" 32 #include "llvm/Analysis/DomTreeUpdater.h" 33 #include "llvm/Analysis/GlobalsModRef.h" 34 #include "llvm/Analysis/InstructionPrecedenceTracking.h" 35 #include "llvm/Analysis/InstructionSimplify.h" 36 #include "llvm/Analysis/Loads.h" 37 #include "llvm/Analysis/LoopInfo.h" 38 #include "llvm/Analysis/MemoryBuiltins.h" 39 #include "llvm/Analysis/MemoryDependenceAnalysis.h" 40 #include "llvm/Analysis/MemorySSA.h" 41 #include "llvm/Analysis/MemorySSAUpdater.h" 42 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 43 #include "llvm/Analysis/PHITransAddr.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/ValueTracking.h" 46 #include "llvm/IR/Attributes.h" 47 #include "llvm/IR/BasicBlock.h" 48 #include "llvm/IR/Constant.h" 49 #include "llvm/IR/Constants.h" 50 #include "llvm/IR/DebugLoc.h" 51 #include "llvm/IR/Dominators.h" 52 #include "llvm/IR/Function.h" 53 #include "llvm/IR/InstrTypes.h" 54 #include "llvm/IR/Instruction.h" 55 #include "llvm/IR/Instructions.h" 56 #include "llvm/IR/IntrinsicInst.h" 57 #include "llvm/IR/LLVMContext.h" 58 #include "llvm/IR/Metadata.h" 59 #include "llvm/IR/Module.h" 60 #include "llvm/IR/PassManager.h" 61 #include "llvm/IR/PatternMatch.h" 62 #include "llvm/IR/Type.h" 63 #include "llvm/IR/Use.h" 64 #include "llvm/IR/Value.h" 65 #include "llvm/InitializePasses.h" 66 #include "llvm/Pass.h" 67 #include "llvm/Support/Casting.h" 68 #include "llvm/Support/CommandLine.h" 69 #include "llvm/Support/Compiler.h" 70 #include "llvm/Support/Debug.h" 71 #include "llvm/Support/raw_ostream.h" 72 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" 73 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 74 #include "llvm/Transforms/Utils/Local.h" 75 #include "llvm/Transforms/Utils/SSAUpdater.h" 76 #include "llvm/Transforms/Utils/VNCoercion.h" 77 #include <algorithm> 78 #include <cassert> 79 #include <cstdint> 80 #include <optional> 81 #include <utility> 82 83 using namespace llvm; 84 using namespace llvm::gvn; 85 using namespace llvm::VNCoercion; 86 using namespace PatternMatch; 87 88 #define DEBUG_TYPE "gvn" 89 90 STATISTIC(NumGVNInstr, "Number of instructions deleted"); 91 STATISTIC(NumGVNLoad, "Number of loads deleted"); 92 STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); 93 STATISTIC(NumGVNBlocks, "Number of blocks merged"); 94 STATISTIC(NumGVNSimpl, "Number of instructions simplified"); 95 STATISTIC(NumGVNEqProp, "Number of equalities propagated"); 96 STATISTIC(NumPRELoad, "Number of loads PRE'd"); 97 STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd"); 98 STATISTIC(NumPRELoadMoved2CEPred, 99 "Number of loads moved to predecessor of a critical edge in PRE"); 100 101 STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax, 102 "Number of blocks speculated as available in " 103 "IsValueFullyAvailableInBlock(), max"); 104 STATISTIC(MaxBBSpeculationCutoffReachedTimes, 105 "Number of times we we reached gvn-max-block-speculations cut-off " 106 "preventing further exploration"); 107 108 static cl::opt<bool> GVNEnablePRE("enable-pre", cl::init(true), cl::Hidden); 109 static cl::opt<bool> GVNEnableLoadPRE("enable-load-pre", cl::init(true)); 110 static cl::opt<bool> GVNEnableLoadInLoopPRE("enable-load-in-loop-pre", 111 cl::init(true)); 112 static cl::opt<bool> 113 GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre", 114 cl::init(false)); 115 static cl::opt<bool> GVNEnableMemDep("enable-gvn-memdep", cl::init(true)); 116 static cl::opt<bool> GVNEnableMemorySSA("enable-gvn-memoryssa", 117 cl::init(false)); 118 119 static cl::opt<uint32_t> MaxNumDeps( 120 "gvn-max-num-deps", cl::Hidden, cl::init(100), 121 cl::desc("Max number of dependences to attempt Load PRE (default = 100)")); 122 123 // This is based on IsValueFullyAvailableInBlockNumSpeculationsMax stat. 124 static cl::opt<uint32_t> MaxBBSpeculations( 125 "gvn-max-block-speculations", cl::Hidden, cl::init(600), 126 cl::desc("Max number of blocks we're willing to speculate on (and recurse " 127 "into) when deducing if a value is fully available or not in GVN " 128 "(default = 600)")); 129 130 static cl::opt<uint32_t> MaxNumVisitedInsts( 131 "gvn-max-num-visited-insts", cl::Hidden, cl::init(100), 132 cl::desc("Max number of visited instructions when trying to find " 133 "dominating value of select dependency (default = 100)")); 134 135 static cl::opt<uint32_t> MaxNumInsnsPerBlock( 136 "gvn-max-num-insns", cl::Hidden, cl::init(100), 137 cl::desc("Max number of instructions to scan in each basic block in GVN " 138 "(default = 100)")); 139 140 struct llvm::GVNPass::Expression { 141 uint32_t opcode; 142 bool commutative = false; 143 // The type is not necessarily the result type of the expression, it may be 144 // any additional type needed to disambiguate the expression. 145 Type *type = nullptr; 146 SmallVector<uint32_t, 4> varargs; 147 148 AttributeList attrs; 149 150 Expression(uint32_t o = ~2U) : opcode(o) {} 151 152 bool operator==(const Expression &other) const { 153 if (opcode != other.opcode) 154 return false; 155 if (opcode == ~0U || opcode == ~1U) 156 return true; 157 if (type != other.type) 158 return false; 159 if (varargs != other.varargs) 160 return false; 161 if ((!attrs.isEmpty() || !other.attrs.isEmpty()) && 162 !attrs.intersectWith(type->getContext(), other.attrs).has_value()) 163 return false; 164 return true; 165 } 166 167 friend hash_code hash_value(const Expression &Value) { 168 return hash_combine( 169 Value.opcode, Value.type, 170 hash_combine_range(Value.varargs.begin(), Value.varargs.end())); 171 } 172 }; 173 174 namespace llvm { 175 176 template <> struct DenseMapInfo<GVNPass::Expression> { 177 static inline GVNPass::Expression getEmptyKey() { return ~0U; } 178 static inline GVNPass::Expression getTombstoneKey() { return ~1U; } 179 180 static unsigned getHashValue(const GVNPass::Expression &e) { 181 using llvm::hash_value; 182 183 return static_cast<unsigned>(hash_value(e)); 184 } 185 186 static bool isEqual(const GVNPass::Expression &LHS, 187 const GVNPass::Expression &RHS) { 188 return LHS == RHS; 189 } 190 }; 191 192 } // end namespace llvm 193 194 /// Represents a particular available value that we know how to materialize. 195 /// Materialization of an AvailableValue never fails. An AvailableValue is 196 /// implicitly associated with a rematerialization point which is the 197 /// location of the instruction from which it was formed. 198 struct llvm::gvn::AvailableValue { 199 enum class ValType { 200 SimpleVal, // A simple offsetted value that is accessed. 201 LoadVal, // A value produced by a load. 202 MemIntrin, // A memory intrinsic which is loaded from. 203 UndefVal, // A UndefValue representing a value from dead block (which 204 // is not yet physically removed from the CFG). 205 SelectVal, // A pointer select which is loaded from and for which the load 206 // can be replace by a value select. 207 }; 208 209 /// Val - The value that is live out of the block. 210 Value *Val; 211 /// Kind of the live-out value. 212 ValType Kind; 213 214 /// Offset - The byte offset in Val that is interesting for the load query. 215 unsigned Offset = 0; 216 /// V1, V2 - The dominating non-clobbered values of SelectVal. 217 Value *V1 = nullptr, *V2 = nullptr; 218 219 static AvailableValue get(Value *V, unsigned Offset = 0) { 220 AvailableValue Res; 221 Res.Val = V; 222 Res.Kind = ValType::SimpleVal; 223 Res.Offset = Offset; 224 return Res; 225 } 226 227 static AvailableValue getMI(MemIntrinsic *MI, unsigned Offset = 0) { 228 AvailableValue Res; 229 Res.Val = MI; 230 Res.Kind = ValType::MemIntrin; 231 Res.Offset = Offset; 232 return Res; 233 } 234 235 static AvailableValue getLoad(LoadInst *Load, unsigned Offset = 0) { 236 AvailableValue Res; 237 Res.Val = Load; 238 Res.Kind = ValType::LoadVal; 239 Res.Offset = Offset; 240 return Res; 241 } 242 243 static AvailableValue getUndef() { 244 AvailableValue Res; 245 Res.Val = nullptr; 246 Res.Kind = ValType::UndefVal; 247 Res.Offset = 0; 248 return Res; 249 } 250 251 static AvailableValue getSelect(SelectInst *Sel, Value *V1, Value *V2) { 252 AvailableValue Res; 253 Res.Val = Sel; 254 Res.Kind = ValType::SelectVal; 255 Res.Offset = 0; 256 Res.V1 = V1; 257 Res.V2 = V2; 258 return Res; 259 } 260 261 bool isSimpleValue() const { return Kind == ValType::SimpleVal; } 262 bool isCoercedLoadValue() const { return Kind == ValType::LoadVal; } 263 bool isMemIntrinValue() const { return Kind == ValType::MemIntrin; } 264 bool isUndefValue() const { return Kind == ValType::UndefVal; } 265 bool isSelectValue() const { return Kind == ValType::SelectVal; } 266 267 Value *getSimpleValue() const { 268 assert(isSimpleValue() && "Wrong accessor"); 269 return Val; 270 } 271 272 LoadInst *getCoercedLoadValue() const { 273 assert(isCoercedLoadValue() && "Wrong accessor"); 274 return cast<LoadInst>(Val); 275 } 276 277 MemIntrinsic *getMemIntrinValue() const { 278 assert(isMemIntrinValue() && "Wrong accessor"); 279 return cast<MemIntrinsic>(Val); 280 } 281 282 SelectInst *getSelectValue() const { 283 assert(isSelectValue() && "Wrong accessor"); 284 return cast<SelectInst>(Val); 285 } 286 287 /// Emit code at the specified insertion point to adjust the value defined 288 /// here to the specified type. This handles various coercion cases. 289 Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt, 290 GVNPass &gvn) const; 291 }; 292 293 /// Represents an AvailableValue which can be rematerialized at the end of 294 /// the associated BasicBlock. 295 struct llvm::gvn::AvailableValueInBlock { 296 /// BB - The basic block in question. 297 BasicBlock *BB = nullptr; 298 299 /// AV - The actual available value 300 AvailableValue AV; 301 302 static AvailableValueInBlock get(BasicBlock *BB, AvailableValue &&AV) { 303 AvailableValueInBlock Res; 304 Res.BB = BB; 305 Res.AV = std::move(AV); 306 return Res; 307 } 308 309 static AvailableValueInBlock get(BasicBlock *BB, Value *V, 310 unsigned Offset = 0) { 311 return get(BB, AvailableValue::get(V, Offset)); 312 } 313 314 static AvailableValueInBlock getUndef(BasicBlock *BB) { 315 return get(BB, AvailableValue::getUndef()); 316 } 317 318 static AvailableValueInBlock getSelect(BasicBlock *BB, SelectInst *Sel, 319 Value *V1, Value *V2) { 320 return get(BB, AvailableValue::getSelect(Sel, V1, V2)); 321 } 322 323 /// Emit code at the end of this block to adjust the value defined here to 324 /// the specified type. This handles various coercion cases. 325 Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const { 326 return AV.MaterializeAdjustedValue(Load, BB->getTerminator(), gvn); 327 } 328 }; 329 330 //===----------------------------------------------------------------------===// 331 // ValueTable Internal Functions 332 //===----------------------------------------------------------------------===// 333 334 GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) { 335 Expression e; 336 e.type = I->getType(); 337 e.opcode = I->getOpcode(); 338 if (const GCRelocateInst *GCR = dyn_cast<GCRelocateInst>(I)) { 339 // gc.relocate is 'special' call: its second and third operands are 340 // not real values, but indices into statepoint's argument list. 341 // Use the refered to values for purposes of identity. 342 e.varargs.push_back(lookupOrAdd(GCR->getOperand(0))); 343 e.varargs.push_back(lookupOrAdd(GCR->getBasePtr())); 344 e.varargs.push_back(lookupOrAdd(GCR->getDerivedPtr())); 345 } else { 346 for (Use &Op : I->operands()) 347 e.varargs.push_back(lookupOrAdd(Op)); 348 } 349 if (I->isCommutative()) { 350 // Ensure that commutative instructions that only differ by a permutation 351 // of their operands get the same value number by sorting the operand value 352 // numbers. Since commutative operands are the 1st two operands it is more 353 // efficient to sort by hand rather than using, say, std::sort. 354 assert(I->getNumOperands() >= 2 && "Unsupported commutative instruction!"); 355 if (e.varargs[0] > e.varargs[1]) 356 std::swap(e.varargs[0], e.varargs[1]); 357 e.commutative = true; 358 } 359 360 if (auto *C = dyn_cast<CmpInst>(I)) { 361 // Sort the operand value numbers so x<y and y>x get the same value number. 362 CmpInst::Predicate Predicate = C->getPredicate(); 363 if (e.varargs[0] > e.varargs[1]) { 364 std::swap(e.varargs[0], e.varargs[1]); 365 Predicate = CmpInst::getSwappedPredicate(Predicate); 366 } 367 e.opcode = (C->getOpcode() << 8) | Predicate; 368 e.commutative = true; 369 } else if (auto *E = dyn_cast<InsertValueInst>(I)) { 370 e.varargs.append(E->idx_begin(), E->idx_end()); 371 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) { 372 ArrayRef<int> ShuffleMask = SVI->getShuffleMask(); 373 e.varargs.append(ShuffleMask.begin(), ShuffleMask.end()); 374 } else if (auto *CB = dyn_cast<CallBase>(I)) { 375 e.attrs = CB->getAttributes(); 376 } 377 378 return e; 379 } 380 381 GVNPass::Expression GVNPass::ValueTable::createCmpExpr( 382 unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) { 383 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && 384 "Not a comparison!"); 385 Expression e; 386 e.type = CmpInst::makeCmpResultType(LHS->getType()); 387 e.varargs.push_back(lookupOrAdd(LHS)); 388 e.varargs.push_back(lookupOrAdd(RHS)); 389 390 // Sort the operand value numbers so x<y and y>x get the same value number. 391 if (e.varargs[0] > e.varargs[1]) { 392 std::swap(e.varargs[0], e.varargs[1]); 393 Predicate = CmpInst::getSwappedPredicate(Predicate); 394 } 395 e.opcode = (Opcode << 8) | Predicate; 396 e.commutative = true; 397 return e; 398 } 399 400 GVNPass::Expression 401 GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) { 402 assert(EI && "Not an ExtractValueInst?"); 403 Expression e; 404 e.type = EI->getType(); 405 e.opcode = 0; 406 407 WithOverflowInst *WO = dyn_cast<WithOverflowInst>(EI->getAggregateOperand()); 408 if (WO != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) { 409 // EI is an extract from one of our with.overflow intrinsics. Synthesize 410 // a semantically equivalent expression instead of an extract value 411 // expression. 412 e.opcode = WO->getBinaryOp(); 413 e.varargs.push_back(lookupOrAdd(WO->getLHS())); 414 e.varargs.push_back(lookupOrAdd(WO->getRHS())); 415 return e; 416 } 417 418 // Not a recognised intrinsic. Fall back to producing an extract value 419 // expression. 420 e.opcode = EI->getOpcode(); 421 for (Use &Op : EI->operands()) 422 e.varargs.push_back(lookupOrAdd(Op)); 423 424 append_range(e.varargs, EI->indices()); 425 426 return e; 427 } 428 429 GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) { 430 Expression E; 431 Type *PtrTy = GEP->getType()->getScalarType(); 432 const DataLayout &DL = GEP->getDataLayout(); 433 unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy); 434 SmallMapVector<Value *, APInt, 4> VariableOffsets; 435 APInt ConstantOffset(BitWidth, 0); 436 if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) { 437 // Convert into offset representation, to recognize equivalent address 438 // calculations that use different type encoding. 439 LLVMContext &Context = GEP->getContext(); 440 E.opcode = GEP->getOpcode(); 441 E.type = nullptr; 442 E.varargs.push_back(lookupOrAdd(GEP->getPointerOperand())); 443 for (const auto &Pair : VariableOffsets) { 444 E.varargs.push_back(lookupOrAdd(Pair.first)); 445 E.varargs.push_back(lookupOrAdd(ConstantInt::get(Context, Pair.second))); 446 } 447 if (!ConstantOffset.isZero()) 448 E.varargs.push_back( 449 lookupOrAdd(ConstantInt::get(Context, ConstantOffset))); 450 } else { 451 // If converting to offset representation fails (for scalable vectors), 452 // fall back to type-based implementation: 453 E.opcode = GEP->getOpcode(); 454 E.type = GEP->getSourceElementType(); 455 for (Use &Op : GEP->operands()) 456 E.varargs.push_back(lookupOrAdd(Op)); 457 } 458 return E; 459 } 460 461 //===----------------------------------------------------------------------===// 462 // ValueTable External Functions 463 //===----------------------------------------------------------------------===// 464 465 GVNPass::ValueTable::ValueTable() = default; 466 GVNPass::ValueTable::ValueTable(const ValueTable &) = default; 467 GVNPass::ValueTable::ValueTable(ValueTable &&) = default; 468 GVNPass::ValueTable::~ValueTable() = default; 469 GVNPass::ValueTable & 470 GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default; 471 472 /// add - Insert a value into the table with a specified value number. 473 void GVNPass::ValueTable::add(Value *V, uint32_t num) { 474 valueNumbering.insert(std::make_pair(V, num)); 475 if (PHINode *PN = dyn_cast<PHINode>(V)) 476 NumberingPhi[num] = PN; 477 } 478 479 uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) { 480 // FIXME: Currently the calls which may access the thread id may 481 // be considered as not accessing the memory. But this is 482 // problematic for coroutines, since coroutines may resume in a 483 // different thread. So we disable the optimization here for the 484 // correctness. However, it may block many other correct 485 // optimizations. Revert this one when we detect the memory 486 // accessing kind more precisely. 487 if (C->getFunction()->isPresplitCoroutine()) { 488 valueNumbering[C] = nextValueNumber; 489 return nextValueNumber++; 490 } 491 492 // Do not combine convergent calls since they implicitly depend on the set of 493 // threads that is currently executing, and they might be in different basic 494 // blocks. 495 if (C->isConvergent()) { 496 valueNumbering[C] = nextValueNumber; 497 return nextValueNumber++; 498 } 499 500 if (AA->doesNotAccessMemory(C)) { 501 Expression exp = createExpr(C); 502 uint32_t e = assignExpNewValueNum(exp).first; 503 valueNumbering[C] = e; 504 return e; 505 } 506 507 if (MD && AA->onlyReadsMemory(C)) { 508 Expression exp = createExpr(C); 509 auto ValNum = assignExpNewValueNum(exp); 510 if (ValNum.second) { 511 valueNumbering[C] = ValNum.first; 512 return ValNum.first; 513 } 514 515 MemDepResult local_dep = MD->getDependency(C); 516 517 if (!local_dep.isDef() && !local_dep.isNonLocal()) { 518 valueNumbering[C] = nextValueNumber; 519 return nextValueNumber++; 520 } 521 522 if (local_dep.isDef()) { 523 // For masked load/store intrinsics, the local_dep may actually be 524 // a normal load or store instruction. 525 CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst()); 526 527 if (!local_cdep || local_cdep->arg_size() != C->arg_size()) { 528 valueNumbering[C] = nextValueNumber; 529 return nextValueNumber++; 530 } 531 532 for (unsigned i = 0, e = C->arg_size(); i < e; ++i) { 533 uint32_t c_vn = lookupOrAdd(C->getArgOperand(i)); 534 uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i)); 535 if (c_vn != cd_vn) { 536 valueNumbering[C] = nextValueNumber; 537 return nextValueNumber++; 538 } 539 } 540 541 uint32_t v = lookupOrAdd(local_cdep); 542 valueNumbering[C] = v; 543 return v; 544 } 545 546 // Non-local case. 547 const MemoryDependenceResults::NonLocalDepInfo &deps = 548 MD->getNonLocalCallDependency(C); 549 // FIXME: Move the checking logic to MemDep! 550 CallInst* cdep = nullptr; 551 552 // Check to see if we have a single dominating call instruction that is 553 // identical to C. 554 for (const NonLocalDepEntry &I : deps) { 555 if (I.getResult().isNonLocal()) 556 continue; 557 558 // We don't handle non-definitions. If we already have a call, reject 559 // instruction dependencies. 560 if (!I.getResult().isDef() || cdep != nullptr) { 561 cdep = nullptr; 562 break; 563 } 564 565 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I.getResult().getInst()); 566 // FIXME: All duplicated with non-local case. 567 if (NonLocalDepCall && DT->properlyDominates(I.getBB(), C->getParent())) { 568 cdep = NonLocalDepCall; 569 continue; 570 } 571 572 cdep = nullptr; 573 break; 574 } 575 576 if (!cdep) { 577 valueNumbering[C] = nextValueNumber; 578 return nextValueNumber++; 579 } 580 581 if (cdep->arg_size() != C->arg_size()) { 582 valueNumbering[C] = nextValueNumber; 583 return nextValueNumber++; 584 } 585 for (unsigned i = 0, e = C->arg_size(); i < e; ++i) { 586 uint32_t c_vn = lookupOrAdd(C->getArgOperand(i)); 587 uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i)); 588 if (c_vn != cd_vn) { 589 valueNumbering[C] = nextValueNumber; 590 return nextValueNumber++; 591 } 592 } 593 594 uint32_t v = lookupOrAdd(cdep); 595 valueNumbering[C] = v; 596 return v; 597 } 598 599 valueNumbering[C] = nextValueNumber; 600 return nextValueNumber++; 601 } 602 603 /// Returns true if a value number exists for the specified value. 604 bool GVNPass::ValueTable::exists(Value *V) const { 605 return valueNumbering.contains(V); 606 } 607 608 /// lookup_or_add - Returns the value number for the specified value, assigning 609 /// it a new number if it did not have one before. 610 uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) { 611 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V); 612 if (VI != valueNumbering.end()) 613 return VI->second; 614 615 auto *I = dyn_cast<Instruction>(V); 616 if (!I) { 617 valueNumbering[V] = nextValueNumber; 618 return nextValueNumber++; 619 } 620 621 Expression exp; 622 switch (I->getOpcode()) { 623 case Instruction::Call: 624 return lookupOrAddCall(cast<CallInst>(I)); 625 case Instruction::FNeg: 626 case Instruction::Add: 627 case Instruction::FAdd: 628 case Instruction::Sub: 629 case Instruction::FSub: 630 case Instruction::Mul: 631 case Instruction::FMul: 632 case Instruction::UDiv: 633 case Instruction::SDiv: 634 case Instruction::FDiv: 635 case Instruction::URem: 636 case Instruction::SRem: 637 case Instruction::FRem: 638 case Instruction::Shl: 639 case Instruction::LShr: 640 case Instruction::AShr: 641 case Instruction::And: 642 case Instruction::Or: 643 case Instruction::Xor: 644 case Instruction::ICmp: 645 case Instruction::FCmp: 646 case Instruction::Trunc: 647 case Instruction::ZExt: 648 case Instruction::SExt: 649 case Instruction::FPToUI: 650 case Instruction::FPToSI: 651 case Instruction::UIToFP: 652 case Instruction::SIToFP: 653 case Instruction::FPTrunc: 654 case Instruction::FPExt: 655 case Instruction::PtrToInt: 656 case Instruction::IntToPtr: 657 case Instruction::AddrSpaceCast: 658 case Instruction::BitCast: 659 case Instruction::Select: 660 case Instruction::Freeze: 661 case Instruction::ExtractElement: 662 case Instruction::InsertElement: 663 case Instruction::ShuffleVector: 664 case Instruction::InsertValue: 665 exp = createExpr(I); 666 break; 667 case Instruction::GetElementPtr: 668 exp = createGEPExpr(cast<GetElementPtrInst>(I)); 669 break; 670 case Instruction::ExtractValue: 671 exp = createExtractvalueExpr(cast<ExtractValueInst>(I)); 672 break; 673 case Instruction::PHI: 674 valueNumbering[V] = nextValueNumber; 675 NumberingPhi[nextValueNumber] = cast<PHINode>(V); 676 return nextValueNumber++; 677 default: 678 valueNumbering[V] = nextValueNumber; 679 return nextValueNumber++; 680 } 681 682 uint32_t e = assignExpNewValueNum(exp).first; 683 valueNumbering[V] = e; 684 return e; 685 } 686 687 /// Returns the value number of the specified value. Fails if 688 /// the value has not yet been numbered. 689 uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const { 690 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); 691 if (Verify) { 692 assert(VI != valueNumbering.end() && "Value not numbered?"); 693 return VI->second; 694 } 695 return (VI != valueNumbering.end()) ? VI->second : 0; 696 } 697 698 /// Returns the value number of the given comparison, 699 /// assigning it a new number if it did not have one before. Useful when 700 /// we deduced the result of a comparison, but don't immediately have an 701 /// instruction realizing that comparison to hand. 702 uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode, 703 CmpInst::Predicate Predicate, 704 Value *LHS, Value *RHS) { 705 Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS); 706 return assignExpNewValueNum(exp).first; 707 } 708 709 /// Remove all entries from the ValueTable. 710 void GVNPass::ValueTable::clear() { 711 valueNumbering.clear(); 712 expressionNumbering.clear(); 713 NumberingPhi.clear(); 714 PhiTranslateTable.clear(); 715 nextValueNumber = 1; 716 Expressions.clear(); 717 ExprIdx.clear(); 718 nextExprNumber = 0; 719 } 720 721 /// Remove a value from the value numbering. 722 void GVNPass::ValueTable::erase(Value *V) { 723 uint32_t Num = valueNumbering.lookup(V); 724 valueNumbering.erase(V); 725 // If V is PHINode, V <--> value number is an one-to-one mapping. 726 if (isa<PHINode>(V)) 727 NumberingPhi.erase(Num); 728 } 729 730 /// verifyRemoved - Verify that the value is removed from all internal data 731 /// structures. 732 void GVNPass::ValueTable::verifyRemoved(const Value *V) const { 733 assert(!valueNumbering.contains(V) && 734 "Inst still occurs in value numbering map!"); 735 } 736 737 //===----------------------------------------------------------------------===// 738 // LeaderMap External Functions 739 //===----------------------------------------------------------------------===// 740 741 /// Push a new Value to the LeaderTable onto the list for its value number. 742 void GVNPass::LeaderMap::insert(uint32_t N, Value *V, const BasicBlock *BB) { 743 LeaderListNode &Curr = NumToLeaders[N]; 744 if (!Curr.Entry.Val) { 745 Curr.Entry.Val = V; 746 Curr.Entry.BB = BB; 747 return; 748 } 749 750 LeaderListNode *Node = TableAllocator.Allocate<LeaderListNode>(); 751 Node->Entry.Val = V; 752 Node->Entry.BB = BB; 753 Node->Next = Curr.Next; 754 Curr.Next = Node; 755 } 756 757 /// Scan the list of values corresponding to a given 758 /// value number, and remove the given instruction if encountered. 759 void GVNPass::LeaderMap::erase(uint32_t N, Instruction *I, 760 const BasicBlock *BB) { 761 LeaderListNode *Prev = nullptr; 762 LeaderListNode *Curr = &NumToLeaders[N]; 763 764 while (Curr && (Curr->Entry.Val != I || Curr->Entry.BB != BB)) { 765 Prev = Curr; 766 Curr = Curr->Next; 767 } 768 769 if (!Curr) 770 return; 771 772 if (Prev) { 773 Prev->Next = Curr->Next; 774 } else { 775 if (!Curr->Next) { 776 Curr->Entry.Val = nullptr; 777 Curr->Entry.BB = nullptr; 778 } else { 779 LeaderListNode *Next = Curr->Next; 780 Curr->Entry.Val = Next->Entry.Val; 781 Curr->Entry.BB = Next->Entry.BB; 782 Curr->Next = Next->Next; 783 } 784 } 785 } 786 787 void GVNPass::LeaderMap::verifyRemoved(const Value *V) const { 788 // Walk through the value number scope to make sure the instruction isn't 789 // ferreted away in it. 790 for (const auto &I : NumToLeaders) { 791 (void)I; 792 assert(I.second.Entry.Val != V && "Inst still in value numbering scope!"); 793 assert( 794 std::none_of(leader_iterator(&I.second), leader_iterator(nullptr), 795 [=](const LeaderTableEntry &E) { return E.Val == V; }) && 796 "Inst still in value numbering scope!"); 797 } 798 } 799 800 //===----------------------------------------------------------------------===// 801 // GVN Pass 802 //===----------------------------------------------------------------------===// 803 804 bool GVNPass::isPREEnabled() const { 805 return Options.AllowPRE.value_or(GVNEnablePRE); 806 } 807 808 bool GVNPass::isLoadPREEnabled() const { 809 return Options.AllowLoadPRE.value_or(GVNEnableLoadPRE); 810 } 811 812 bool GVNPass::isLoadInLoopPREEnabled() const { 813 return Options.AllowLoadInLoopPRE.value_or(GVNEnableLoadInLoopPRE); 814 } 815 816 bool GVNPass::isLoadPRESplitBackedgeEnabled() const { 817 return Options.AllowLoadPRESplitBackedge.value_or( 818 GVNEnableSplitBackedgeInLoadPRE); 819 } 820 821 bool GVNPass::isMemDepEnabled() const { 822 return Options.AllowMemDep.value_or(GVNEnableMemDep); 823 } 824 825 bool GVNPass::isMemorySSAEnabled() const { 826 return Options.AllowMemorySSA.value_or(GVNEnableMemorySSA); 827 } 828 829 PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) { 830 // FIXME: The order of evaluation of these 'getResult' calls is very 831 // significant! Re-ordering these variables will cause GVN when run alone to 832 // be less effective! We should fix memdep and basic-aa to not exhibit this 833 // behavior, but until then don't change the order here. 834 auto &AC = AM.getResult<AssumptionAnalysis>(F); 835 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 836 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 837 auto &AA = AM.getResult<AAManager>(F); 838 auto *MemDep = 839 isMemDepEnabled() ? &AM.getResult<MemoryDependenceAnalysis>(F) : nullptr; 840 auto &LI = AM.getResult<LoopAnalysis>(F); 841 auto *MSSA = AM.getCachedResult<MemorySSAAnalysis>(F); 842 if (isMemorySSAEnabled() && !MSSA) { 843 assert(!MemDep && 844 "On-demand computation of MemSSA implies that MemDep is disabled!"); 845 MSSA = &AM.getResult<MemorySSAAnalysis>(F); 846 } 847 auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); 848 bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, 849 MSSA ? &MSSA->getMSSA() : nullptr); 850 if (!Changed) 851 return PreservedAnalyses::all(); 852 PreservedAnalyses PA; 853 PA.preserve<DominatorTreeAnalysis>(); 854 PA.preserve<TargetLibraryAnalysis>(); 855 if (MSSA) 856 PA.preserve<MemorySSAAnalysis>(); 857 PA.preserve<LoopAnalysis>(); 858 return PA; 859 } 860 861 void GVNPass::printPipeline( 862 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 863 static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline( 864 OS, MapClassName2PassName); 865 866 OS << '<'; 867 if (Options.AllowPRE != std::nullopt) 868 OS << (*Options.AllowPRE ? "" : "no-") << "pre;"; 869 if (Options.AllowLoadPRE != std::nullopt) 870 OS << (*Options.AllowLoadPRE ? "" : "no-") << "load-pre;"; 871 if (Options.AllowLoadPRESplitBackedge != std::nullopt) 872 OS << (*Options.AllowLoadPRESplitBackedge ? "" : "no-") 873 << "split-backedge-load-pre;"; 874 if (Options.AllowMemDep != std::nullopt) 875 OS << (*Options.AllowMemDep ? "" : "no-") << "memdep;"; 876 if (Options.AllowMemorySSA != std::nullopt) 877 OS << (*Options.AllowMemorySSA ? "" : "no-") << "memoryssa"; 878 OS << '>'; 879 } 880 881 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 882 LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &d) const { 883 errs() << "{\n"; 884 for (auto &I : d) { 885 errs() << I.first << "\n"; 886 I.second->dump(); 887 } 888 errs() << "}\n"; 889 } 890 #endif 891 892 enum class AvailabilityState : char { 893 /// We know the block *is not* fully available. This is a fixpoint. 894 Unavailable = 0, 895 /// We know the block *is* fully available. This is a fixpoint. 896 Available = 1, 897 /// We do not know whether the block is fully available or not, 898 /// but we are currently speculating that it will be. 899 /// If it would have turned out that the block was, in fact, not fully 900 /// available, this would have been cleaned up into an Unavailable. 901 SpeculativelyAvailable = 2, 902 }; 903 904 /// Return true if we can prove that the value 905 /// we're analyzing is fully available in the specified block. As we go, keep 906 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This 907 /// map is actually a tri-state map with the following values: 908 /// 0) we know the block *is not* fully available. 909 /// 1) we know the block *is* fully available. 910 /// 2) we do not know whether the block is fully available or not, but we are 911 /// currently speculating that it will be. 912 static bool IsValueFullyAvailableInBlock( 913 BasicBlock *BB, 914 DenseMap<BasicBlock *, AvailabilityState> &FullyAvailableBlocks) { 915 SmallVector<BasicBlock *, 32> Worklist; 916 std::optional<BasicBlock *> UnavailableBB; 917 918 // The number of times we didn't find an entry for a block in a map and 919 // optimistically inserted an entry marking block as speculatively available. 920 unsigned NumNewNewSpeculativelyAvailableBBs = 0; 921 922 #ifndef NDEBUG 923 SmallSet<BasicBlock *, 32> NewSpeculativelyAvailableBBs; 924 SmallVector<BasicBlock *, 32> AvailableBBs; 925 #endif 926 927 Worklist.emplace_back(BB); 928 while (!Worklist.empty()) { 929 BasicBlock *CurrBB = Worklist.pop_back_val(); // LoadFO - depth-first! 930 // Optimistically assume that the block is Speculatively Available and check 931 // to see if we already know about this block in one lookup. 932 std::pair<DenseMap<BasicBlock *, AvailabilityState>::iterator, bool> IV = 933 FullyAvailableBlocks.try_emplace( 934 CurrBB, AvailabilityState::SpeculativelyAvailable); 935 AvailabilityState &State = IV.first->second; 936 937 // Did the entry already exist for this block? 938 if (!IV.second) { 939 if (State == AvailabilityState::Unavailable) { 940 UnavailableBB = CurrBB; 941 break; // Backpropagate unavailability info. 942 } 943 944 #ifndef NDEBUG 945 AvailableBBs.emplace_back(CurrBB); 946 #endif 947 continue; // Don't recurse further, but continue processing worklist. 948 } 949 950 // No entry found for block. 951 ++NumNewNewSpeculativelyAvailableBBs; 952 bool OutOfBudget = NumNewNewSpeculativelyAvailableBBs > MaxBBSpeculations; 953 954 // If we have exhausted our budget, mark this block as unavailable. 955 // Also, if this block has no predecessors, the value isn't live-in here. 956 if (OutOfBudget || pred_empty(CurrBB)) { 957 MaxBBSpeculationCutoffReachedTimes += (int)OutOfBudget; 958 State = AvailabilityState::Unavailable; 959 UnavailableBB = CurrBB; 960 break; // Backpropagate unavailability info. 961 } 962 963 // Tentatively consider this block as speculatively available. 964 #ifndef NDEBUG 965 NewSpeculativelyAvailableBBs.insert(CurrBB); 966 #endif 967 // And further recurse into block's predecessors, in depth-first order! 968 Worklist.append(pred_begin(CurrBB), pred_end(CurrBB)); 969 } 970 971 #if LLVM_ENABLE_STATS 972 IsValueFullyAvailableInBlockNumSpeculationsMax.updateMax( 973 NumNewNewSpeculativelyAvailableBBs); 974 #endif 975 976 // If the block isn't marked as fixpoint yet 977 // (the Unavailable and Available states are fixpoints) 978 auto MarkAsFixpointAndEnqueueSuccessors = 979 [&](BasicBlock *BB, AvailabilityState FixpointState) { 980 auto It = FullyAvailableBlocks.find(BB); 981 if (It == FullyAvailableBlocks.end()) 982 return; // Never queried this block, leave as-is. 983 switch (AvailabilityState &State = It->second) { 984 case AvailabilityState::Unavailable: 985 case AvailabilityState::Available: 986 return; // Don't backpropagate further, continue processing worklist. 987 case AvailabilityState::SpeculativelyAvailable: // Fix it! 988 State = FixpointState; 989 #ifndef NDEBUG 990 assert(NewSpeculativelyAvailableBBs.erase(BB) && 991 "Found a speculatively available successor leftover?"); 992 #endif 993 // Queue successors for further processing. 994 Worklist.append(succ_begin(BB), succ_end(BB)); 995 return; 996 } 997 }; 998 999 if (UnavailableBB) { 1000 // Okay, we have encountered an unavailable block. 1001 // Mark speculatively available blocks reachable from UnavailableBB as 1002 // unavailable as well. Paths are terminated when they reach blocks not in 1003 // FullyAvailableBlocks or they are not marked as speculatively available. 1004 Worklist.clear(); 1005 Worklist.append(succ_begin(*UnavailableBB), succ_end(*UnavailableBB)); 1006 while (!Worklist.empty()) 1007 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(), 1008 AvailabilityState::Unavailable); 1009 } 1010 1011 #ifndef NDEBUG 1012 Worklist.clear(); 1013 for (BasicBlock *AvailableBB : AvailableBBs) 1014 Worklist.append(succ_begin(AvailableBB), succ_end(AvailableBB)); 1015 while (!Worklist.empty()) 1016 MarkAsFixpointAndEnqueueSuccessors(Worklist.pop_back_val(), 1017 AvailabilityState::Available); 1018 1019 assert(NewSpeculativelyAvailableBBs.empty() && 1020 "Must have fixed all the new speculatively available blocks."); 1021 #endif 1022 1023 return !UnavailableBB; 1024 } 1025 1026 /// If the specified OldValue exists in ValuesPerBlock, replace its value with 1027 /// NewValue. 1028 static void replaceValuesPerBlockEntry( 1029 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue, 1030 Value *NewValue) { 1031 for (AvailableValueInBlock &V : ValuesPerBlock) { 1032 if (V.AV.Val == OldValue) 1033 V.AV.Val = NewValue; 1034 if (V.AV.isSelectValue()) { 1035 if (V.AV.V1 == OldValue) 1036 V.AV.V1 = NewValue; 1037 if (V.AV.V2 == OldValue) 1038 V.AV.V2 = NewValue; 1039 } 1040 } 1041 } 1042 1043 /// Given a set of loads specified by ValuesPerBlock, 1044 /// construct SSA form, allowing us to eliminate Load. This returns the value 1045 /// that should be used at Load's definition site. 1046 static Value * 1047 ConstructSSAForLoadSet(LoadInst *Load, 1048 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, 1049 GVNPass &gvn) { 1050 // Check for the fully redundant, dominating load case. In this case, we can 1051 // just use the dominating value directly. 1052 if (ValuesPerBlock.size() == 1 && 1053 gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, 1054 Load->getParent())) { 1055 assert(!ValuesPerBlock[0].AV.isUndefValue() && 1056 "Dead BB dominate this block"); 1057 return ValuesPerBlock[0].MaterializeAdjustedValue(Load, gvn); 1058 } 1059 1060 // Otherwise, we have to construct SSA form. 1061 SmallVector<PHINode*, 8> NewPHIs; 1062 SSAUpdater SSAUpdate(&NewPHIs); 1063 SSAUpdate.Initialize(Load->getType(), Load->getName()); 1064 1065 for (const AvailableValueInBlock &AV : ValuesPerBlock) { 1066 BasicBlock *BB = AV.BB; 1067 1068 if (AV.AV.isUndefValue()) 1069 continue; 1070 1071 if (SSAUpdate.HasValueForBlock(BB)) 1072 continue; 1073 1074 // If the value is the load that we will be eliminating, and the block it's 1075 // available in is the block that the load is in, then don't add it as 1076 // SSAUpdater will resolve the value to the relevant phi which may let it 1077 // avoid phi construction entirely if there's actually only one value. 1078 if (BB == Load->getParent() && 1079 ((AV.AV.isSimpleValue() && AV.AV.getSimpleValue() == Load) || 1080 (AV.AV.isCoercedLoadValue() && AV.AV.getCoercedLoadValue() == Load))) 1081 continue; 1082 1083 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(Load, gvn)); 1084 } 1085 1086 // Perform PHI construction. 1087 return SSAUpdate.GetValueInMiddleOfBlock(Load->getParent()); 1088 } 1089 1090 Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load, 1091 Instruction *InsertPt, 1092 GVNPass &gvn) const { 1093 Value *Res; 1094 Type *LoadTy = Load->getType(); 1095 const DataLayout &DL = Load->getDataLayout(); 1096 if (isSimpleValue()) { 1097 Res = getSimpleValue(); 1098 if (Res->getType() != LoadTy) { 1099 Res = getValueForLoad(Res, Offset, LoadTy, InsertPt, DL); 1100 1101 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset 1102 << " " << *getSimpleValue() << '\n' 1103 << *Res << '\n' 1104 << "\n\n\n"); 1105 } 1106 } else if (isCoercedLoadValue()) { 1107 LoadInst *CoercedLoad = getCoercedLoadValue(); 1108 if (CoercedLoad->getType() == LoadTy && Offset == 0) { 1109 Res = CoercedLoad; 1110 combineMetadataForCSE(CoercedLoad, Load, false); 1111 } else { 1112 Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt, DL); 1113 // We are adding a new user for this load, for which the original 1114 // metadata may not hold. Additionally, the new load may have a different 1115 // size and type, so their metadata cannot be combined in any 1116 // straightforward way. 1117 // Drop all metadata that is not known to cause immediate UB on violation, 1118 // unless the load has !noundef, in which case all metadata violations 1119 // will be promoted to UB. 1120 // TODO: We can combine noalias/alias.scope metadata here, because it is 1121 // independent of the load type. 1122 if (!CoercedLoad->hasMetadata(LLVMContext::MD_noundef)) 1123 CoercedLoad->dropUnknownNonDebugMetadata( 1124 {LLVMContext::MD_dereferenceable, 1125 LLVMContext::MD_dereferenceable_or_null, 1126 LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group}); 1127 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset 1128 << " " << *getCoercedLoadValue() << '\n' 1129 << *Res << '\n' 1130 << "\n\n\n"); 1131 } 1132 } else if (isMemIntrinValue()) { 1133 Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, 1134 InsertPt, DL); 1135 LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset 1136 << " " << *getMemIntrinValue() << '\n' 1137 << *Res << '\n' 1138 << "\n\n\n"); 1139 } else if (isSelectValue()) { 1140 // Introduce a new value select for a load from an eligible pointer select. 1141 SelectInst *Sel = getSelectValue(); 1142 assert(V1 && V2 && "both value operands of the select must be present"); 1143 Res = 1144 SelectInst::Create(Sel->getCondition(), V1, V2, "", Sel->getIterator()); 1145 // We use the DebugLoc from the original load here, as this instruction 1146 // materializes the value that would previously have been loaded. 1147 cast<SelectInst>(Res)->setDebugLoc(Load->getDebugLoc()); 1148 } else { 1149 llvm_unreachable("Should not materialize value from dead block"); 1150 } 1151 assert(Res && "failed to materialize?"); 1152 return Res; 1153 } 1154 1155 static bool isLifetimeStart(const Instruction *Inst) { 1156 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst)) 1157 return II->getIntrinsicID() == Intrinsic::lifetime_start; 1158 return false; 1159 } 1160 1161 /// Assuming To can be reached from both From and Between, does Between lie on 1162 /// every path from From to To? 1163 static bool liesBetween(const Instruction *From, Instruction *Between, 1164 const Instruction *To, DominatorTree *DT) { 1165 if (From->getParent() == Between->getParent()) 1166 return DT->dominates(From, Between); 1167 SmallSet<BasicBlock *, 1> Exclusion; 1168 Exclusion.insert(Between->getParent()); 1169 return !isPotentiallyReachable(From, To, &Exclusion, DT); 1170 } 1171 1172 /// Try to locate the three instruction involved in a missed 1173 /// load-elimination case that is due to an intervening store. 1174 static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo, 1175 DominatorTree *DT, 1176 OptimizationRemarkEmitter *ORE) { 1177 using namespace ore; 1178 1179 Instruction *OtherAccess = nullptr; 1180 1181 OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", Load); 1182 R << "load of type " << NV("Type", Load->getType()) << " not eliminated" 1183 << setExtraArgs(); 1184 1185 for (auto *U : Load->getPointerOperand()->users()) { 1186 if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) { 1187 auto *I = cast<Instruction>(U); 1188 if (I->getFunction() == Load->getFunction() && DT->dominates(I, Load)) { 1189 // Use the most immediately dominating value 1190 if (OtherAccess) { 1191 if (DT->dominates(OtherAccess, I)) 1192 OtherAccess = I; 1193 else 1194 assert(U == OtherAccess || DT->dominates(I, OtherAccess)); 1195 } else 1196 OtherAccess = I; 1197 } 1198 } 1199 } 1200 1201 if (!OtherAccess) { 1202 // There is no dominating use, check if we can find a closest non-dominating 1203 // use that lies between any other potentially available use and Load. 1204 for (auto *U : Load->getPointerOperand()->users()) { 1205 if (U != Load && (isa<LoadInst>(U) || isa<StoreInst>(U))) { 1206 auto *I = cast<Instruction>(U); 1207 if (I->getFunction() == Load->getFunction() && 1208 isPotentiallyReachable(I, Load, nullptr, DT)) { 1209 if (OtherAccess) { 1210 if (liesBetween(OtherAccess, I, Load, DT)) { 1211 OtherAccess = I; 1212 } else if (!liesBetween(I, OtherAccess, Load, DT)) { 1213 // These uses are both partially available at Load were it not for 1214 // the clobber, but neither lies strictly after the other. 1215 OtherAccess = nullptr; 1216 break; 1217 } // else: keep current OtherAccess since it lies between U and Load 1218 } else { 1219 OtherAccess = I; 1220 } 1221 } 1222 } 1223 } 1224 } 1225 1226 if (OtherAccess) 1227 R << " in favor of " << NV("OtherAccess", OtherAccess); 1228 1229 R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst()); 1230 1231 ORE->emit(R); 1232 } 1233 1234 // Find non-clobbered value for Loc memory location in extended basic block 1235 // (chain of basic blocks with single predecessors) starting From instruction. 1236 static Value *findDominatingValue(const MemoryLocation &Loc, Type *LoadTy, 1237 Instruction *From, AAResults *AA) { 1238 uint32_t NumVisitedInsts = 0; 1239 BasicBlock *FromBB = From->getParent(); 1240 BatchAAResults BatchAA(*AA); 1241 for (BasicBlock *BB = FromBB; BB; BB = BB->getSinglePredecessor()) 1242 for (auto *Inst = BB == FromBB ? From : BB->getTerminator(); 1243 Inst != nullptr; Inst = Inst->getPrevNonDebugInstruction()) { 1244 // Stop the search if limit is reached. 1245 if (++NumVisitedInsts > MaxNumVisitedInsts) 1246 return nullptr; 1247 if (isModSet(BatchAA.getModRefInfo(Inst, Loc))) 1248 return nullptr; 1249 if (auto *LI = dyn_cast<LoadInst>(Inst)) 1250 if (LI->getPointerOperand() == Loc.Ptr && LI->getType() == LoadTy) 1251 return LI; 1252 } 1253 return nullptr; 1254 } 1255 1256 std::optional<AvailableValue> 1257 GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, 1258 Value *Address) { 1259 assert(Load->isUnordered() && "rules below are incorrect for ordered access"); 1260 assert(DepInfo.isLocal() && "expected a local dependence"); 1261 1262 Instruction *DepInst = DepInfo.getInst(); 1263 1264 const DataLayout &DL = Load->getDataLayout(); 1265 if (DepInfo.isClobber()) { 1266 // If the dependence is to a store that writes to a superset of the bits 1267 // read by the load, we can extract the bits we need for the load from the 1268 // stored value. 1269 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { 1270 // Can't forward from non-atomic to atomic without violating memory model. 1271 if (Address && Load->isAtomic() <= DepSI->isAtomic()) { 1272 int Offset = 1273 analyzeLoadFromClobberingStore(Load->getType(), Address, DepSI, DL); 1274 if (Offset != -1) 1275 return AvailableValue::get(DepSI->getValueOperand(), Offset); 1276 } 1277 } 1278 1279 // Check to see if we have something like this: 1280 // load i32* P 1281 // load i8* (P+1) 1282 // if we have this, replace the later with an extraction from the former. 1283 if (LoadInst *DepLoad = dyn_cast<LoadInst>(DepInst)) { 1284 // If this is a clobber and L is the first instruction in its block, then 1285 // we have the first instruction in the entry block. 1286 // Can't forward from non-atomic to atomic without violating memory model. 1287 if (DepLoad != Load && Address && 1288 Load->isAtomic() <= DepLoad->isAtomic()) { 1289 Type *LoadType = Load->getType(); 1290 int Offset = -1; 1291 1292 // If MD reported clobber, check it was nested. 1293 if (DepInfo.isClobber() && 1294 canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) { 1295 const auto ClobberOff = MD->getClobberOffset(DepLoad); 1296 // GVN has no deal with a negative offset. 1297 Offset = (ClobberOff == std::nullopt || *ClobberOff < 0) 1298 ? -1 1299 : *ClobberOff; 1300 } 1301 if (Offset == -1) 1302 Offset = 1303 analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL); 1304 if (Offset != -1) 1305 return AvailableValue::getLoad(DepLoad, Offset); 1306 } 1307 } 1308 1309 // If the clobbering value is a memset/memcpy/memmove, see if we can 1310 // forward a value on from it. 1311 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) { 1312 if (Address && !Load->isAtomic()) { 1313 int Offset = analyzeLoadFromClobberingMemInst(Load->getType(), Address, 1314 DepMI, DL); 1315 if (Offset != -1) 1316 return AvailableValue::getMI(DepMI, Offset); 1317 } 1318 } 1319 1320 // Nothing known about this clobber, have to be conservative 1321 LLVM_DEBUG( 1322 // fast print dep, using operator<< on instruction is too slow. 1323 dbgs() << "GVN: load "; Load->printAsOperand(dbgs()); 1324 dbgs() << " is clobbered by " << *DepInst << '\n';); 1325 if (ORE->allowExtraAnalysis(DEBUG_TYPE)) 1326 reportMayClobberedLoad(Load, DepInfo, DT, ORE); 1327 1328 return std::nullopt; 1329 } 1330 assert(DepInfo.isDef() && "follows from above"); 1331 1332 // Loading the alloca -> undef. 1333 // Loading immediately after lifetime begin -> undef. 1334 if (isa<AllocaInst>(DepInst) || isLifetimeStart(DepInst)) 1335 return AvailableValue::get(UndefValue::get(Load->getType())); 1336 1337 if (Constant *InitVal = 1338 getInitialValueOfAllocation(DepInst, TLI, Load->getType())) 1339 return AvailableValue::get(InitVal); 1340 1341 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { 1342 // Reject loads and stores that are to the same address but are of 1343 // different types if we have to. If the stored value is convertable to 1344 // the loaded value, we can reuse it. 1345 if (!canCoerceMustAliasedValueToLoad(S->getValueOperand(), Load->getType(), 1346 DL)) 1347 return std::nullopt; 1348 1349 // Can't forward from non-atomic to atomic without violating memory model. 1350 if (S->isAtomic() < Load->isAtomic()) 1351 return std::nullopt; 1352 1353 return AvailableValue::get(S->getValueOperand()); 1354 } 1355 1356 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { 1357 // If the types mismatch and we can't handle it, reject reuse of the load. 1358 // If the stored value is larger or equal to the loaded value, we can reuse 1359 // it. 1360 if (!canCoerceMustAliasedValueToLoad(LD, Load->getType(), DL)) 1361 return std::nullopt; 1362 1363 // Can't forward from non-atomic to atomic without violating memory model. 1364 if (LD->isAtomic() < Load->isAtomic()) 1365 return std::nullopt; 1366 1367 return AvailableValue::getLoad(LD); 1368 } 1369 1370 // Check if load with Addr dependent from select can be converted to select 1371 // between load values. There must be no instructions between the found 1372 // loads and DepInst that may clobber the loads. 1373 if (auto *Sel = dyn_cast<SelectInst>(DepInst)) { 1374 assert(Sel->getType() == Load->getPointerOperandType()); 1375 auto Loc = MemoryLocation::get(Load); 1376 Value *V1 = 1377 findDominatingValue(Loc.getWithNewPtr(Sel->getTrueValue()), 1378 Load->getType(), DepInst, getAliasAnalysis()); 1379 if (!V1) 1380 return std::nullopt; 1381 Value *V2 = 1382 findDominatingValue(Loc.getWithNewPtr(Sel->getFalseValue()), 1383 Load->getType(), DepInst, getAliasAnalysis()); 1384 if (!V2) 1385 return std::nullopt; 1386 return AvailableValue::getSelect(Sel, V1, V2); 1387 } 1388 1389 // Unknown def - must be conservative 1390 LLVM_DEBUG( 1391 // fast print dep, using operator<< on instruction is too slow. 1392 dbgs() << "GVN: load "; Load->printAsOperand(dbgs()); 1393 dbgs() << " has unknown def " << *DepInst << '\n';); 1394 return std::nullopt; 1395 } 1396 1397 void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps, 1398 AvailValInBlkVect &ValuesPerBlock, 1399 UnavailBlkVect &UnavailableBlocks) { 1400 // Filter out useless results (non-locals, etc). Keep track of the blocks 1401 // where we have a value available in repl, also keep track of whether we see 1402 // dependencies that produce an unknown value for the load (such as a call 1403 // that could potentially clobber the load). 1404 for (const auto &Dep : Deps) { 1405 BasicBlock *DepBB = Dep.getBB(); 1406 MemDepResult DepInfo = Dep.getResult(); 1407 1408 if (DeadBlocks.count(DepBB)) { 1409 // Dead dependent mem-op disguise as a load evaluating the same value 1410 // as the load in question. 1411 ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB)); 1412 continue; 1413 } 1414 1415 if (!DepInfo.isLocal()) { 1416 UnavailableBlocks.push_back(DepBB); 1417 continue; 1418 } 1419 1420 // The address being loaded in this non-local block may not be the same as 1421 // the pointer operand of the load if PHI translation occurs. Make sure 1422 // to consider the right address. 1423 if (auto AV = AnalyzeLoadAvailability(Load, DepInfo, Dep.getAddress())) { 1424 // subtlety: because we know this was a non-local dependency, we know 1425 // it's safe to materialize anywhere between the instruction within 1426 // DepInfo and the end of it's block. 1427 ValuesPerBlock.push_back( 1428 AvailableValueInBlock::get(DepBB, std::move(*AV))); 1429 } else { 1430 UnavailableBlocks.push_back(DepBB); 1431 } 1432 } 1433 1434 assert(Deps.size() == ValuesPerBlock.size() + UnavailableBlocks.size() && 1435 "post condition violation"); 1436 } 1437 1438 /// Given the following code, v1 is partially available on some edges, but not 1439 /// available on the edge from PredBB. This function tries to find if there is 1440 /// another identical load in the other successor of PredBB. 1441 /// 1442 /// v0 = load %addr 1443 /// br %LoadBB 1444 /// 1445 /// LoadBB: 1446 /// v1 = load %addr 1447 /// ... 1448 /// 1449 /// PredBB: 1450 /// ... 1451 /// br %cond, label %LoadBB, label %SuccBB 1452 /// 1453 /// SuccBB: 1454 /// v2 = load %addr 1455 /// ... 1456 /// 1457 LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB, 1458 LoadInst *Load) { 1459 // For simplicity we handle a Pred has 2 successors only. 1460 auto *Term = Pred->getTerminator(); 1461 if (Term->getNumSuccessors() != 2 || Term->isSpecialTerminator()) 1462 return nullptr; 1463 auto *SuccBB = Term->getSuccessor(0); 1464 if (SuccBB == LoadBB) 1465 SuccBB = Term->getSuccessor(1); 1466 if (!SuccBB->getSinglePredecessor()) 1467 return nullptr; 1468 1469 unsigned int NumInsts = MaxNumInsnsPerBlock; 1470 for (Instruction &Inst : *SuccBB) { 1471 if (Inst.isDebugOrPseudoInst()) 1472 continue; 1473 if (--NumInsts == 0) 1474 return nullptr; 1475 1476 if (!Inst.isIdenticalTo(Load)) 1477 continue; 1478 1479 MemDepResult Dep = MD->getDependency(&Inst); 1480 // If an identical load doesn't depends on any local instructions, it can 1481 // be safely moved to PredBB. 1482 // Also check for the implicit control flow instructions. See the comments 1483 // in PerformLoadPRE for details. 1484 if (Dep.isNonLocal() && !ICF->isDominatedByICFIFromSameBlock(&Inst)) 1485 return cast<LoadInst>(&Inst); 1486 1487 // Otherwise there is something in the same BB clobbers the memory, we can't 1488 // move this and later load to PredBB. 1489 return nullptr; 1490 } 1491 1492 return nullptr; 1493 } 1494 1495 void GVNPass::eliminatePartiallyRedundantLoad( 1496 LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, 1497 MapVector<BasicBlock *, Value *> &AvailableLoads, 1498 MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) { 1499 for (const auto &AvailableLoad : AvailableLoads) { 1500 BasicBlock *UnavailableBlock = AvailableLoad.first; 1501 Value *LoadPtr = AvailableLoad.second; 1502 1503 auto *NewLoad = new LoadInst( 1504 Load->getType(), LoadPtr, Load->getName() + ".pre", Load->isVolatile(), 1505 Load->getAlign(), Load->getOrdering(), Load->getSyncScopeID(), 1506 UnavailableBlock->getTerminator()->getIterator()); 1507 NewLoad->setDebugLoc(Load->getDebugLoc()); 1508 if (MSSAU) { 1509 auto *NewAccess = MSSAU->createMemoryAccessInBB( 1510 NewLoad, nullptr, NewLoad->getParent(), MemorySSA::BeforeTerminator); 1511 if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess)) 1512 MSSAU->insertDef(NewDef, /*RenameUses=*/true); 1513 else 1514 MSSAU->insertUse(cast<MemoryUse>(NewAccess), /*RenameUses=*/true); 1515 } 1516 1517 // Transfer the old load's AA tags to the new load. 1518 AAMDNodes Tags = Load->getAAMetadata(); 1519 if (Tags) 1520 NewLoad->setAAMetadata(Tags); 1521 1522 if (auto *MD = Load->getMetadata(LLVMContext::MD_invariant_load)) 1523 NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD); 1524 if (auto *InvGroupMD = Load->getMetadata(LLVMContext::MD_invariant_group)) 1525 NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); 1526 if (auto *RangeMD = Load->getMetadata(LLVMContext::MD_range)) 1527 NewLoad->setMetadata(LLVMContext::MD_range, RangeMD); 1528 if (auto *AccessMD = Load->getMetadata(LLVMContext::MD_access_group)) 1529 if (LI->getLoopFor(Load->getParent()) == LI->getLoopFor(UnavailableBlock)) 1530 NewLoad->setMetadata(LLVMContext::MD_access_group, AccessMD); 1531 1532 // We do not propagate the old load's debug location, because the new 1533 // load now lives in a different BB, and we want to avoid a jumpy line 1534 // table. 1535 // FIXME: How do we retain source locations without causing poor debugging 1536 // behavior? 1537 1538 // Add the newly created load. 1539 ValuesPerBlock.push_back( 1540 AvailableValueInBlock::get(UnavailableBlock, NewLoad)); 1541 MD->invalidateCachedPointerInfo(LoadPtr); 1542 LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); 1543 1544 // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old 1545 // load instruction with the new created load instruction. 1546 if (CriticalEdgePredAndLoad) { 1547 auto I = CriticalEdgePredAndLoad->find(UnavailableBlock); 1548 if (I != CriticalEdgePredAndLoad->end()) { 1549 ++NumPRELoadMoved2CEPred; 1550 ICF->insertInstructionTo(NewLoad, UnavailableBlock); 1551 LoadInst *OldLoad = I->second; 1552 combineMetadataForCSE(NewLoad, OldLoad, false); 1553 OldLoad->replaceAllUsesWith(NewLoad); 1554 replaceValuesPerBlockEntry(ValuesPerBlock, OldLoad, NewLoad); 1555 if (uint32_t ValNo = VN.lookup(OldLoad, false)) 1556 LeaderTable.erase(ValNo, OldLoad, OldLoad->getParent()); 1557 VN.erase(OldLoad); 1558 removeInstruction(OldLoad); 1559 } 1560 } 1561 } 1562 1563 // Perform PHI construction. 1564 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this); 1565 // ConstructSSAForLoadSet is responsible for combining metadata. 1566 ICF->removeUsersOf(Load); 1567 Load->replaceAllUsesWith(V); 1568 if (isa<PHINode>(V)) 1569 V->takeName(Load); 1570 if (Instruction *I = dyn_cast<Instruction>(V)) 1571 I->setDebugLoc(Load->getDebugLoc()); 1572 if (V->getType()->isPtrOrPtrVectorTy()) 1573 MD->invalidateCachedPointerInfo(V); 1574 markInstructionForDeletion(Load); 1575 ORE->emit([&]() { 1576 return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load) 1577 << "load eliminated by PRE"; 1578 }); 1579 } 1580 1581 bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, 1582 UnavailBlkVect &UnavailableBlocks) { 1583 // Okay, we have *some* definitions of the value. This means that the value 1584 // is available in some of our (transitive) predecessors. Lets think about 1585 // doing PRE of this load. This will involve inserting a new load into the 1586 // predecessor when it's not available. We could do this in general, but 1587 // prefer to not increase code size. As such, we only do this when we know 1588 // that we only have to insert *one* load (which means we're basically moving 1589 // the load, not inserting a new one). 1590 1591 SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(), 1592 UnavailableBlocks.end()); 1593 1594 // Let's find the first basic block with more than one predecessor. Walk 1595 // backwards through predecessors if needed. 1596 BasicBlock *LoadBB = Load->getParent(); 1597 BasicBlock *TmpBB = LoadBB; 1598 1599 // Check that there is no implicit control flow instructions above our load in 1600 // its block. If there is an instruction that doesn't always pass the 1601 // execution to the following instruction, then moving through it may become 1602 // invalid. For example: 1603 // 1604 // int arr[LEN]; 1605 // int index = ???; 1606 // ... 1607 // guard(0 <= index && index < LEN); 1608 // use(arr[index]); 1609 // 1610 // It is illegal to move the array access to any point above the guard, 1611 // because if the index is out of bounds we should deoptimize rather than 1612 // access the array. 1613 // Check that there is no guard in this block above our instruction. 1614 bool MustEnsureSafetyOfSpeculativeExecution = 1615 ICF->isDominatedByICFIFromSameBlock(Load); 1616 1617 while (TmpBB->getSinglePredecessor()) { 1618 TmpBB = TmpBB->getSinglePredecessor(); 1619 if (TmpBB == LoadBB) // Infinite (unreachable) loop. 1620 return false; 1621 if (Blockers.count(TmpBB)) 1622 return false; 1623 1624 // If any of these blocks has more than one successor (i.e. if the edge we 1625 // just traversed was critical), then there are other paths through this 1626 // block along which the load may not be anticipated. Hoisting the load 1627 // above this block would be adding the load to execution paths along 1628 // which it was not previously executed. 1629 if (TmpBB->getTerminator()->getNumSuccessors() != 1) 1630 return false; 1631 1632 // Check that there is no implicit control flow in a block above. 1633 MustEnsureSafetyOfSpeculativeExecution = 1634 MustEnsureSafetyOfSpeculativeExecution || ICF->hasICF(TmpBB); 1635 } 1636 1637 assert(TmpBB); 1638 LoadBB = TmpBB; 1639 1640 // Check to see how many predecessors have the loaded value fully 1641 // available. 1642 MapVector<BasicBlock *, Value *> PredLoads; 1643 DenseMap<BasicBlock *, AvailabilityState> FullyAvailableBlocks; 1644 for (const AvailableValueInBlock &AV : ValuesPerBlock) 1645 FullyAvailableBlocks[AV.BB] = AvailabilityState::Available; 1646 for (BasicBlock *UnavailableBB : UnavailableBlocks) 1647 FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable; 1648 1649 // The edge from Pred to LoadBB is a critical edge will be splitted. 1650 SmallVector<BasicBlock *, 4> CriticalEdgePredSplit; 1651 // The edge from Pred to LoadBB is a critical edge, another successor of Pred 1652 // contains a load can be moved to Pred. This data structure maps the Pred to 1653 // the movable load. 1654 MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad; 1655 for (BasicBlock *Pred : predecessors(LoadBB)) { 1656 // If any predecessor block is an EH pad that does not allow non-PHI 1657 // instructions before the terminator, we can't PRE the load. 1658 if (Pred->getTerminator()->isEHPad()) { 1659 LLVM_DEBUG( 1660 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '" 1661 << Pred->getName() << "': " << *Load << '\n'); 1662 return false; 1663 } 1664 1665 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { 1666 continue; 1667 } 1668 1669 if (Pred->getTerminator()->getNumSuccessors() != 1) { 1670 if (isa<IndirectBrInst>(Pred->getTerminator())) { 1671 LLVM_DEBUG( 1672 dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" 1673 << Pred->getName() << "': " << *Load << '\n'); 1674 return false; 1675 } 1676 1677 if (LoadBB->isEHPad()) { 1678 LLVM_DEBUG( 1679 dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '" 1680 << Pred->getName() << "': " << *Load << '\n'); 1681 return false; 1682 } 1683 1684 // Do not split backedge as it will break the canonical loop form. 1685 if (!isLoadPRESplitBackedgeEnabled()) 1686 if (DT->dominates(LoadBB, Pred)) { 1687 LLVM_DEBUG( 1688 dbgs() 1689 << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '" 1690 << Pred->getName() << "': " << *Load << '\n'); 1691 return false; 1692 } 1693 1694 if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load)) 1695 CriticalEdgePredAndLoad[Pred] = LI; 1696 else 1697 CriticalEdgePredSplit.push_back(Pred); 1698 } else { 1699 // Only add the predecessors that will not be split for now. 1700 PredLoads[Pred] = nullptr; 1701 } 1702 } 1703 1704 // Decide whether PRE is profitable for this load. 1705 unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size(); 1706 unsigned NumUnavailablePreds = NumInsertPreds + 1707 CriticalEdgePredAndLoad.size(); 1708 assert(NumUnavailablePreds != 0 && 1709 "Fully available value should already be eliminated!"); 1710 (void)NumUnavailablePreds; 1711 1712 // If we need to insert new load in multiple predecessors, reject it. 1713 // FIXME: If we could restructure the CFG, we could make a common pred with 1714 // all the preds that don't have an available Load and insert a new load into 1715 // that one block. 1716 if (NumInsertPreds > 1) 1717 return false; 1718 1719 // Now we know where we will insert load. We must ensure that it is safe 1720 // to speculatively execute the load at that points. 1721 if (MustEnsureSafetyOfSpeculativeExecution) { 1722 if (CriticalEdgePredSplit.size()) 1723 if (!isSafeToSpeculativelyExecute(Load, &*LoadBB->getFirstNonPHIIt(), AC, 1724 DT)) 1725 return false; 1726 for (auto &PL : PredLoads) 1727 if (!isSafeToSpeculativelyExecute(Load, PL.first->getTerminator(), AC, 1728 DT)) 1729 return false; 1730 for (auto &CEP : CriticalEdgePredAndLoad) 1731 if (!isSafeToSpeculativelyExecute(Load, CEP.first->getTerminator(), AC, 1732 DT)) 1733 return false; 1734 } 1735 1736 // Split critical edges, and update the unavailable predecessors accordingly. 1737 for (BasicBlock *OrigPred : CriticalEdgePredSplit) { 1738 BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); 1739 assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!"); 1740 PredLoads[NewPred] = nullptr; 1741 LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" 1742 << LoadBB->getName() << '\n'); 1743 } 1744 1745 for (auto &CEP : CriticalEdgePredAndLoad) 1746 PredLoads[CEP.first] = nullptr; 1747 1748 // Check if the load can safely be moved to all the unavailable predecessors. 1749 bool CanDoPRE = true; 1750 const DataLayout &DL = Load->getDataLayout(); 1751 SmallVector<Instruction*, 8> NewInsts; 1752 for (auto &PredLoad : PredLoads) { 1753 BasicBlock *UnavailablePred = PredLoad.first; 1754 1755 // Do PHI translation to get its value in the predecessor if necessary. The 1756 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. 1757 // We do the translation for each edge we skipped by going from Load's block 1758 // to LoadBB, otherwise we might miss pieces needing translation. 1759 1760 // If all preds have a single successor, then we know it is safe to insert 1761 // the load on the pred (?!?), so we can insert code to materialize the 1762 // pointer if it is not available. 1763 Value *LoadPtr = Load->getPointerOperand(); 1764 BasicBlock *Cur = Load->getParent(); 1765 while (Cur != LoadBB) { 1766 PHITransAddr Address(LoadPtr, DL, AC); 1767 LoadPtr = Address.translateWithInsertion(Cur, Cur->getSinglePredecessor(), 1768 *DT, NewInsts); 1769 if (!LoadPtr) { 1770 CanDoPRE = false; 1771 break; 1772 } 1773 Cur = Cur->getSinglePredecessor(); 1774 } 1775 1776 if (LoadPtr) { 1777 PHITransAddr Address(LoadPtr, DL, AC); 1778 LoadPtr = Address.translateWithInsertion(LoadBB, UnavailablePred, *DT, 1779 NewInsts); 1780 } 1781 // If we couldn't find or insert a computation of this phi translated value, 1782 // we fail PRE. 1783 if (!LoadPtr) { 1784 LLVM_DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " 1785 << *Load->getPointerOperand() << "\n"); 1786 CanDoPRE = false; 1787 break; 1788 } 1789 1790 PredLoad.second = LoadPtr; 1791 } 1792 1793 if (!CanDoPRE) { 1794 while (!NewInsts.empty()) { 1795 // Erase instructions generated by the failed PHI translation before 1796 // trying to number them. PHI translation might insert instructions 1797 // in basic blocks other than the current one, and we delete them 1798 // directly, as markInstructionForDeletion only allows removing from the 1799 // current basic block. 1800 NewInsts.pop_back_val()->eraseFromParent(); 1801 } 1802 // HINT: Don't revert the edge-splitting as following transformation may 1803 // also need to split these critical edges. 1804 return !CriticalEdgePredSplit.empty(); 1805 } 1806 1807 // Okay, we can eliminate this load by inserting a reload in the predecessor 1808 // and using PHI construction to get the value in the other predecessors, do 1809 // it. 1810 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *Load << '\n'); 1811 LLVM_DEBUG(if (!NewInsts.empty()) dbgs() << "INSERTED " << NewInsts.size() 1812 << " INSTS: " << *NewInsts.back() 1813 << '\n'); 1814 1815 // Assign value numbers to the new instructions. 1816 for (Instruction *I : NewInsts) { 1817 // Instructions that have been inserted in predecessor(s) to materialize 1818 // the load address do not retain their original debug locations. Doing 1819 // so could lead to confusing (but correct) source attributions. 1820 I->updateLocationAfterHoist(); 1821 1822 // FIXME: We really _ought_ to insert these value numbers into their 1823 // parent's availability map. However, in doing so, we risk getting into 1824 // ordering issues. If a block hasn't been processed yet, we would be 1825 // marking a value as AVAIL-IN, which isn't what we intend. 1826 VN.lookupOrAdd(I); 1827 } 1828 1829 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, PredLoads, 1830 &CriticalEdgePredAndLoad); 1831 ++NumPRELoad; 1832 return true; 1833 } 1834 1835 bool GVNPass::performLoopLoadPRE(LoadInst *Load, 1836 AvailValInBlkVect &ValuesPerBlock, 1837 UnavailBlkVect &UnavailableBlocks) { 1838 const Loop *L = LI->getLoopFor(Load->getParent()); 1839 // TODO: Generalize to other loop blocks that dominate the latch. 1840 if (!L || L->getHeader() != Load->getParent()) 1841 return false; 1842 1843 BasicBlock *Preheader = L->getLoopPreheader(); 1844 BasicBlock *Latch = L->getLoopLatch(); 1845 if (!Preheader || !Latch) 1846 return false; 1847 1848 Value *LoadPtr = Load->getPointerOperand(); 1849 // Must be available in preheader. 1850 if (!L->isLoopInvariant(LoadPtr)) 1851 return false; 1852 1853 // We plan to hoist the load to preheader without introducing a new fault. 1854 // In order to do it, we need to prove that we cannot side-exit the loop 1855 // once loop header is first entered before execution of the load. 1856 if (ICF->isDominatedByICFIFromSameBlock(Load)) 1857 return false; 1858 1859 BasicBlock *LoopBlock = nullptr; 1860 for (auto *Blocker : UnavailableBlocks) { 1861 // Blockers from outside the loop are handled in preheader. 1862 if (!L->contains(Blocker)) 1863 continue; 1864 1865 // Only allow one loop block. Loop header is not less frequently executed 1866 // than each loop block, and likely it is much more frequently executed. But 1867 // in case of multiple loop blocks, we need extra information (such as block 1868 // frequency info) to understand whether it is profitable to PRE into 1869 // multiple loop blocks. 1870 if (LoopBlock) 1871 return false; 1872 1873 // Do not sink into inner loops. This may be non-profitable. 1874 if (L != LI->getLoopFor(Blocker)) 1875 return false; 1876 1877 // Blocks that dominate the latch execute on every single iteration, maybe 1878 // except the last one. So PREing into these blocks doesn't make much sense 1879 // in most cases. But the blocks that do not necessarily execute on each 1880 // iteration are sometimes much colder than the header, and this is when 1881 // PRE is potentially profitable. 1882 if (DT->dominates(Blocker, Latch)) 1883 return false; 1884 1885 // Make sure that the terminator itself doesn't clobber. 1886 if (Blocker->getTerminator()->mayWriteToMemory()) 1887 return false; 1888 1889 LoopBlock = Blocker; 1890 } 1891 1892 if (!LoopBlock) 1893 return false; 1894 1895 // Make sure the memory at this pointer cannot be freed, therefore we can 1896 // safely reload from it after clobber. 1897 if (LoadPtr->canBeFreed()) 1898 return false; 1899 1900 // TODO: Support critical edge splitting if blocker has more than 1 successor. 1901 MapVector<BasicBlock *, Value *> AvailableLoads; 1902 AvailableLoads[LoopBlock] = LoadPtr; 1903 AvailableLoads[Preheader] = LoadPtr; 1904 1905 LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n'); 1906 eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads, 1907 /*CriticalEdgePredAndLoad*/ nullptr); 1908 ++NumPRELoopLoad; 1909 return true; 1910 } 1911 1912 static void reportLoadElim(LoadInst *Load, Value *AvailableValue, 1913 OptimizationRemarkEmitter *ORE) { 1914 using namespace ore; 1915 1916 ORE->emit([&]() { 1917 return OptimizationRemark(DEBUG_TYPE, "LoadElim", Load) 1918 << "load of type " << NV("Type", Load->getType()) << " eliminated" 1919 << setExtraArgs() << " in favor of " 1920 << NV("InfavorOfValue", AvailableValue); 1921 }); 1922 } 1923 1924 /// Attempt to eliminate a load whose dependencies are 1925 /// non-local by performing PHI construction. 1926 bool GVNPass::processNonLocalLoad(LoadInst *Load) { 1927 // non-local speculations are not allowed under asan. 1928 if (Load->getParent()->getParent()->hasFnAttribute( 1929 Attribute::SanitizeAddress) || 1930 Load->getParent()->getParent()->hasFnAttribute( 1931 Attribute::SanitizeHWAddress)) 1932 return false; 1933 1934 // Step 1: Find the non-local dependencies of the load. 1935 LoadDepVect Deps; 1936 MD->getNonLocalPointerDependency(Load, Deps); 1937 1938 // If we had to process more than one hundred blocks to find the 1939 // dependencies, this load isn't worth worrying about. Optimizing 1940 // it will be too expensive. 1941 unsigned NumDeps = Deps.size(); 1942 if (NumDeps > MaxNumDeps) 1943 return false; 1944 1945 // If we had a phi translation failure, we'll have a single entry which is a 1946 // clobber in the current block. Reject this early. 1947 if (NumDeps == 1 && 1948 !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { 1949 LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs()); 1950 dbgs() << " has unknown dependencies\n";); 1951 return false; 1952 } 1953 1954 bool Changed = false; 1955 // If this load follows a GEP, see if we can PRE the indices before analyzing. 1956 if (GetElementPtrInst *GEP = 1957 dyn_cast<GetElementPtrInst>(Load->getOperand(0))) { 1958 for (Use &U : GEP->indices()) 1959 if (Instruction *I = dyn_cast<Instruction>(U.get())) 1960 Changed |= performScalarPRE(I); 1961 } 1962 1963 // Step 2: Analyze the availability of the load 1964 AvailValInBlkVect ValuesPerBlock; 1965 UnavailBlkVect UnavailableBlocks; 1966 AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks); 1967 1968 // If we have no predecessors that produce a known value for this load, exit 1969 // early. 1970 if (ValuesPerBlock.empty()) 1971 return Changed; 1972 1973 // Step 3: Eliminate fully redundancy. 1974 // 1975 // If all of the instructions we depend on produce a known value for this 1976 // load, then it is fully redundant and we can use PHI insertion to compute 1977 // its value. Insert PHIs and remove the fully redundant value now. 1978 if (UnavailableBlocks.empty()) { 1979 LLVM_DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *Load << '\n'); 1980 1981 // Perform PHI construction. 1982 Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this); 1983 // ConstructSSAForLoadSet is responsible for combining metadata. 1984 ICF->removeUsersOf(Load); 1985 Load->replaceAllUsesWith(V); 1986 1987 if (isa<PHINode>(V)) 1988 V->takeName(Load); 1989 if (Instruction *I = dyn_cast<Instruction>(V)) 1990 // If instruction I has debug info, then we should not update it. 1991 // Also, if I has a null DebugLoc, then it is still potentially incorrect 1992 // to propagate Load's DebugLoc because Load may not post-dominate I. 1993 if (Load->getDebugLoc() && Load->getParent() == I->getParent()) 1994 I->setDebugLoc(Load->getDebugLoc()); 1995 if (V->getType()->isPtrOrPtrVectorTy()) 1996 MD->invalidateCachedPointerInfo(V); 1997 markInstructionForDeletion(Load); 1998 ++NumGVNLoad; 1999 reportLoadElim(Load, V, ORE); 2000 return true; 2001 } 2002 2003 // Step 4: Eliminate partial redundancy. 2004 if (!isPREEnabled() || !isLoadPREEnabled()) 2005 return Changed; 2006 if (!isLoadInLoopPREEnabled() && LI->getLoopFor(Load->getParent())) 2007 return Changed; 2008 2009 if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) || 2010 PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks)) 2011 return true; 2012 2013 return Changed; 2014 } 2015 2016 static bool hasUsersIn(Value *V, BasicBlock *BB) { 2017 return llvm::any_of(V->users(), [BB](User *U) { 2018 auto *I = dyn_cast<Instruction>(U); 2019 return I && I->getParent() == BB; 2020 }); 2021 } 2022 2023 bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) { 2024 Value *V = IntrinsicI->getArgOperand(0); 2025 2026 if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) { 2027 if (Cond->isZero()) { 2028 Type *Int8Ty = Type::getInt8Ty(V->getContext()); 2029 Type *PtrTy = PointerType::get(V->getContext(), 0); 2030 // Insert a new store to null instruction before the load to indicate that 2031 // this code is not reachable. FIXME: We could insert unreachable 2032 // instruction directly because we can modify the CFG. 2033 auto *NewS = 2034 new StoreInst(PoisonValue::get(Int8Ty), Constant::getNullValue(PtrTy), 2035 IntrinsicI->getIterator()); 2036 if (MSSAU) { 2037 const MemoryUseOrDef *FirstNonDom = nullptr; 2038 const auto *AL = 2039 MSSAU->getMemorySSA()->getBlockAccesses(IntrinsicI->getParent()); 2040 2041 // If there are accesses in the current basic block, find the first one 2042 // that does not come before NewS. The new memory access is inserted 2043 // after the found access or before the terminator if no such access is 2044 // found. 2045 if (AL) { 2046 for (const auto &Acc : *AL) { 2047 if (auto *Current = dyn_cast<MemoryUseOrDef>(&Acc)) 2048 if (!Current->getMemoryInst()->comesBefore(NewS)) { 2049 FirstNonDom = Current; 2050 break; 2051 } 2052 } 2053 } 2054 2055 auto *NewDef = 2056 FirstNonDom ? MSSAU->createMemoryAccessBefore( 2057 NewS, nullptr, 2058 const_cast<MemoryUseOrDef *>(FirstNonDom)) 2059 : MSSAU->createMemoryAccessInBB( 2060 NewS, nullptr, 2061 NewS->getParent(), MemorySSA::BeforeTerminator); 2062 2063 MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false); 2064 } 2065 } 2066 if (isAssumeWithEmptyBundle(*IntrinsicI)) { 2067 markInstructionForDeletion(IntrinsicI); 2068 return true; 2069 } 2070 return false; 2071 } 2072 2073 if (isa<Constant>(V)) { 2074 // If it's not false, and constant, it must evaluate to true. This means our 2075 // assume is assume(true), and thus, pointless, and we don't want to do 2076 // anything more here. 2077 return false; 2078 } 2079 2080 Constant *True = ConstantInt::getTrue(V->getContext()); 2081 bool Changed = false; 2082 2083 for (BasicBlock *Successor : successors(IntrinsicI->getParent())) { 2084 BasicBlockEdge Edge(IntrinsicI->getParent(), Successor); 2085 2086 // This property is only true in dominated successors, propagateEquality 2087 // will check dominance for us. 2088 Changed |= propagateEquality(V, True, Edge, false); 2089 } 2090 2091 // We can replace assume value with true, which covers cases like this: 2092 // call void @llvm.assume(i1 %cmp) 2093 // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true 2094 ReplaceOperandsWithMap[V] = True; 2095 2096 // Similarly, after assume(!NotV) we know that NotV == false. 2097 Value *NotV; 2098 if (match(V, m_Not(m_Value(NotV)))) 2099 ReplaceOperandsWithMap[NotV] = ConstantInt::getFalse(V->getContext()); 2100 2101 // If we find an equality fact, canonicalize all dominated uses in this block 2102 // to one of the two values. We heuristically choice the "oldest" of the 2103 // two where age is determined by value number. (Note that propagateEquality 2104 // above handles the cross block case.) 2105 // 2106 // Key case to cover are: 2107 // 1) 2108 // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen 2109 // call void @llvm.assume(i1 %cmp) 2110 // ret float %0 ; will change it to ret float 3.000000e+00 2111 // 2) 2112 // %load = load float, float* %addr 2113 // %cmp = fcmp oeq float %load, %0 2114 // call void @llvm.assume(i1 %cmp) 2115 // ret float %load ; will change it to ret float %0 2116 if (auto *CmpI = dyn_cast<CmpInst>(V)) { 2117 if (CmpI->isEquivalence()) { 2118 Value *CmpLHS = CmpI->getOperand(0); 2119 Value *CmpRHS = CmpI->getOperand(1); 2120 // Heuristically pick the better replacement -- the choice of heuristic 2121 // isn't terribly important here, but the fact we canonicalize on some 2122 // replacement is for exposing other simplifications. 2123 // TODO: pull this out as a helper function and reuse w/existing 2124 // (slightly different) logic. 2125 if (isa<Constant>(CmpLHS) && !isa<Constant>(CmpRHS)) 2126 std::swap(CmpLHS, CmpRHS); 2127 if (!isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS)) 2128 std::swap(CmpLHS, CmpRHS); 2129 if ((isa<Argument>(CmpLHS) && isa<Argument>(CmpRHS)) || 2130 (isa<Instruction>(CmpLHS) && isa<Instruction>(CmpRHS))) { 2131 // Move the 'oldest' value to the right-hand side, using the value 2132 // number as a proxy for age. 2133 uint32_t LVN = VN.lookupOrAdd(CmpLHS); 2134 uint32_t RVN = VN.lookupOrAdd(CmpRHS); 2135 if (LVN < RVN) 2136 std::swap(CmpLHS, CmpRHS); 2137 } 2138 2139 // Handle degenerate case where we either haven't pruned a dead path or a 2140 // removed a trivial assume yet. 2141 if (isa<Constant>(CmpLHS) && isa<Constant>(CmpRHS)) 2142 return Changed; 2143 2144 LLVM_DEBUG(dbgs() << "Replacing dominated uses of " 2145 << *CmpLHS << " with " 2146 << *CmpRHS << " in block " 2147 << IntrinsicI->getParent()->getName() << "\n"); 2148 2149 2150 // Setup the replacement map - this handles uses within the same block 2151 if (hasUsersIn(CmpLHS, IntrinsicI->getParent())) 2152 ReplaceOperandsWithMap[CmpLHS] = CmpRHS; 2153 2154 // NOTE: The non-block local cases are handled by the call to 2155 // propagateEquality above; this block is just about handling the block 2156 // local cases. TODO: There's a bunch of logic in propagateEqualiy which 2157 // isn't duplicated for the block local case, can we share it somehow? 2158 } 2159 } 2160 return Changed; 2161 } 2162 2163 static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { 2164 patchReplacementInstruction(I, Repl); 2165 I->replaceAllUsesWith(Repl); 2166 } 2167 2168 /// Attempt to eliminate a load, first by eliminating it 2169 /// locally, and then attempting non-local elimination if that fails. 2170 bool GVNPass::processLoad(LoadInst *L) { 2171 if (!MD) 2172 return false; 2173 2174 // This code hasn't been audited for ordered or volatile memory access 2175 if (!L->isUnordered()) 2176 return false; 2177 2178 if (L->use_empty()) { 2179 markInstructionForDeletion(L); 2180 return true; 2181 } 2182 2183 // ... to a pointer that has been loaded from before... 2184 MemDepResult Dep = MD->getDependency(L); 2185 2186 // If it is defined in another block, try harder. 2187 if (Dep.isNonLocal()) 2188 return processNonLocalLoad(L); 2189 2190 // Only handle the local case below 2191 if (!Dep.isLocal()) { 2192 // This might be a NonFuncLocal or an Unknown 2193 LLVM_DEBUG( 2194 // fast print dep, using operator<< on instruction is too slow. 2195 dbgs() << "GVN: load "; L->printAsOperand(dbgs()); 2196 dbgs() << " has unknown dependence\n";); 2197 return false; 2198 } 2199 2200 auto AV = AnalyzeLoadAvailability(L, Dep, L->getPointerOperand()); 2201 if (!AV) 2202 return false; 2203 2204 Value *AvailableValue = AV->MaterializeAdjustedValue(L, L, *this); 2205 2206 // MaterializeAdjustedValue is responsible for combining metadata. 2207 ICF->removeUsersOf(L); 2208 L->replaceAllUsesWith(AvailableValue); 2209 markInstructionForDeletion(L); 2210 if (MSSAU) 2211 MSSAU->removeMemoryAccess(L); 2212 ++NumGVNLoad; 2213 reportLoadElim(L, AvailableValue, ORE); 2214 // Tell MDA to reexamine the reused pointer since we might have more 2215 // information after forwarding it. 2216 if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy()) 2217 MD->invalidateCachedPointerInfo(AvailableValue); 2218 return true; 2219 } 2220 2221 /// Return a pair the first field showing the value number of \p Exp and the 2222 /// second field showing whether it is a value number newly created. 2223 std::pair<uint32_t, bool> 2224 GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) { 2225 uint32_t &e = expressionNumbering[Exp]; 2226 bool CreateNewValNum = !e; 2227 if (CreateNewValNum) { 2228 Expressions.push_back(Exp); 2229 if (ExprIdx.size() < nextValueNumber + 1) 2230 ExprIdx.resize(nextValueNumber * 2); 2231 e = nextValueNumber; 2232 ExprIdx[nextValueNumber++] = nextExprNumber++; 2233 } 2234 return {e, CreateNewValNum}; 2235 } 2236 2237 /// Return whether all the values related with the same \p num are 2238 /// defined in \p BB. 2239 bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB, 2240 GVNPass &Gvn) { 2241 return all_of( 2242 Gvn.LeaderTable.getLeaders(Num), 2243 [=](const LeaderMap::LeaderTableEntry &L) { return L.BB == BB; }); 2244 } 2245 2246 /// Wrap phiTranslateImpl to provide caching functionality. 2247 uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred, 2248 const BasicBlock *PhiBlock, 2249 uint32_t Num, GVNPass &Gvn) { 2250 auto FindRes = PhiTranslateTable.find({Num, Pred}); 2251 if (FindRes != PhiTranslateTable.end()) 2252 return FindRes->second; 2253 uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn); 2254 PhiTranslateTable.insert({{Num, Pred}, NewNum}); 2255 return NewNum; 2256 } 2257 2258 // Return true if the value number \p Num and NewNum have equal value. 2259 // Return false if the result is unknown. 2260 bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum, 2261 const BasicBlock *Pred, 2262 const BasicBlock *PhiBlock, 2263 GVNPass &Gvn) { 2264 CallInst *Call = nullptr; 2265 auto Leaders = Gvn.LeaderTable.getLeaders(Num); 2266 for (const auto &Entry : Leaders) { 2267 Call = dyn_cast<CallInst>(Entry.Val); 2268 if (Call && Call->getParent() == PhiBlock) 2269 break; 2270 } 2271 2272 if (AA->doesNotAccessMemory(Call)) 2273 return true; 2274 2275 if (!MD || !AA->onlyReadsMemory(Call)) 2276 return false; 2277 2278 MemDepResult local_dep = MD->getDependency(Call); 2279 if (!local_dep.isNonLocal()) 2280 return false; 2281 2282 const MemoryDependenceResults::NonLocalDepInfo &deps = 2283 MD->getNonLocalCallDependency(Call); 2284 2285 // Check to see if the Call has no function local clobber. 2286 for (const NonLocalDepEntry &D : deps) { 2287 if (D.getResult().isNonFuncLocal()) 2288 return true; 2289 } 2290 return false; 2291 } 2292 2293 /// Translate value number \p Num using phis, so that it has the values of 2294 /// the phis in BB. 2295 uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred, 2296 const BasicBlock *PhiBlock, 2297 uint32_t Num, GVNPass &Gvn) { 2298 if (PHINode *PN = NumberingPhi[Num]) { 2299 for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { 2300 if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred) 2301 if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false)) 2302 return TransVal; 2303 } 2304 return Num; 2305 } 2306 2307 // If there is any value related with Num is defined in a BB other than 2308 // PhiBlock, it cannot depend on a phi in PhiBlock without going through 2309 // a backedge. We can do an early exit in that case to save compile time. 2310 if (!areAllValsInBB(Num, PhiBlock, Gvn)) 2311 return Num; 2312 2313 if (Num >= ExprIdx.size() || ExprIdx[Num] == 0) 2314 return Num; 2315 Expression Exp = Expressions[ExprIdx[Num]]; 2316 2317 for (unsigned i = 0; i < Exp.varargs.size(); i++) { 2318 // For InsertValue and ExtractValue, some varargs are index numbers 2319 // instead of value numbers. Those index numbers should not be 2320 // translated. 2321 if ((i > 1 && Exp.opcode == Instruction::InsertValue) || 2322 (i > 0 && Exp.opcode == Instruction::ExtractValue) || 2323 (i > 1 && Exp.opcode == Instruction::ShuffleVector)) 2324 continue; 2325 Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn); 2326 } 2327 2328 if (Exp.commutative) { 2329 assert(Exp.varargs.size() >= 2 && "Unsupported commutative instruction!"); 2330 if (Exp.varargs[0] > Exp.varargs[1]) { 2331 std::swap(Exp.varargs[0], Exp.varargs[1]); 2332 uint32_t Opcode = Exp.opcode >> 8; 2333 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) 2334 Exp.opcode = (Opcode << 8) | 2335 CmpInst::getSwappedPredicate( 2336 static_cast<CmpInst::Predicate>(Exp.opcode & 255)); 2337 } 2338 } 2339 2340 if (uint32_t NewNum = expressionNumbering[Exp]) { 2341 if (Exp.opcode == Instruction::Call && NewNum != Num) 2342 return areCallValsEqual(Num, NewNum, Pred, PhiBlock, Gvn) ? NewNum : Num; 2343 return NewNum; 2344 } 2345 return Num; 2346 } 2347 2348 /// Erase stale entry from phiTranslate cache so phiTranslate can be computed 2349 /// again. 2350 void GVNPass::ValueTable::eraseTranslateCacheEntry( 2351 uint32_t Num, const BasicBlock &CurrBlock) { 2352 for (const BasicBlock *Pred : predecessors(&CurrBlock)) 2353 PhiTranslateTable.erase({Num, Pred}); 2354 } 2355 2356 // In order to find a leader for a given value number at a 2357 // specific basic block, we first obtain the list of all Values for that number, 2358 // and then scan the list to find one whose block dominates the block in 2359 // question. This is fast because dominator tree queries consist of only 2360 // a few comparisons of DFS numbers. 2361 Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) { 2362 auto Leaders = LeaderTable.getLeaders(num); 2363 if (Leaders.empty()) 2364 return nullptr; 2365 2366 Value *Val = nullptr; 2367 for (const auto &Entry : Leaders) { 2368 if (DT->dominates(Entry.BB, BB)) { 2369 Val = Entry.Val; 2370 if (isa<Constant>(Val)) 2371 return Val; 2372 } 2373 } 2374 2375 return Val; 2376 } 2377 2378 /// There is an edge from 'Src' to 'Dst'. Return 2379 /// true if every path from the entry block to 'Dst' passes via this edge. In 2380 /// particular 'Dst' must not be reachable via another edge from 'Src'. 2381 static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, 2382 DominatorTree *DT) { 2383 // While in theory it is interesting to consider the case in which Dst has 2384 // more than one predecessor, because Dst might be part of a loop which is 2385 // only reachable from Src, in practice it is pointless since at the time 2386 // GVN runs all such loops have preheaders, which means that Dst will have 2387 // been changed to have only one predecessor, namely Src. 2388 const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); 2389 assert((!Pred || Pred == E.getStart()) && 2390 "No edge between these basic blocks!"); 2391 return Pred != nullptr; 2392 } 2393 2394 void GVNPass::assignBlockRPONumber(Function &F) { 2395 BlockRPONumber.clear(); 2396 uint32_t NextBlockNumber = 1; 2397 ReversePostOrderTraversal<Function *> RPOT(&F); 2398 for (BasicBlock *BB : RPOT) 2399 BlockRPONumber[BB] = NextBlockNumber++; 2400 InvalidBlockRPONumbers = false; 2401 } 2402 2403 bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const { 2404 bool Changed = false; 2405 for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) { 2406 Value *Operand = Instr->getOperand(OpNum); 2407 auto it = ReplaceOperandsWithMap.find(Operand); 2408 if (it != ReplaceOperandsWithMap.end()) { 2409 LLVM_DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " 2410 << *it->second << " in instruction " << *Instr << '\n'); 2411 Instr->setOperand(OpNum, it->second); 2412 Changed = true; 2413 } 2414 } 2415 return Changed; 2416 } 2417 2418 /// The given values are known to be equal in every block 2419 /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with 2420 /// 'RHS' everywhere in the scope. Returns whether a change was made. 2421 /// If DominatesByEdge is false, then it means that we will propagate the RHS 2422 /// value starting from the end of Root.Start. 2423 bool GVNPass::propagateEquality(Value *LHS, Value *RHS, 2424 const BasicBlockEdge &Root, 2425 bool DominatesByEdge) { 2426 SmallVector<std::pair<Value*, Value*>, 4> Worklist; 2427 Worklist.push_back(std::make_pair(LHS, RHS)); 2428 bool Changed = false; 2429 // For speed, compute a conservative fast approximation to 2430 // DT->dominates(Root, Root.getEnd()); 2431 const bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT); 2432 2433 while (!Worklist.empty()) { 2434 std::pair<Value*, Value*> Item = Worklist.pop_back_val(); 2435 LHS = Item.first; RHS = Item.second; 2436 2437 if (LHS == RHS) 2438 continue; 2439 assert(LHS->getType() == RHS->getType() && "Equality but unequal types!"); 2440 2441 // Don't try to propagate equalities between constants. 2442 if (isa<Constant>(LHS) && isa<Constant>(RHS)) 2443 continue; 2444 2445 // Prefer a constant on the right-hand side, or an Argument if no constants. 2446 if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS))) 2447 std::swap(LHS, RHS); 2448 assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!"); 2449 const DataLayout &DL = 2450 isa<Argument>(LHS) 2451 ? cast<Argument>(LHS)->getParent()->getDataLayout() 2452 : cast<Instruction>(LHS)->getDataLayout(); 2453 2454 // If there is no obvious reason to prefer the left-hand side over the 2455 // right-hand side, ensure the longest lived term is on the right-hand side, 2456 // so the shortest lived term will be replaced by the longest lived. 2457 // This tends to expose more simplifications. 2458 uint32_t LVN = VN.lookupOrAdd(LHS); 2459 if ((isa<Argument>(LHS) && isa<Argument>(RHS)) || 2460 (isa<Instruction>(LHS) && isa<Instruction>(RHS))) { 2461 // Move the 'oldest' value to the right-hand side, using the value number 2462 // as a proxy for age. 2463 uint32_t RVN = VN.lookupOrAdd(RHS); 2464 if (LVN < RVN) { 2465 std::swap(LHS, RHS); 2466 LVN = RVN; 2467 } 2468 } 2469 2470 // If value numbering later sees that an instruction in the scope is equal 2471 // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve 2472 // the invariant that instructions only occur in the leader table for their 2473 // own value number (this is used by removeFromLeaderTable), do not do this 2474 // if RHS is an instruction (if an instruction in the scope is morphed into 2475 // LHS then it will be turned into RHS by the next GVN iteration anyway, so 2476 // using the leader table is about compiling faster, not optimizing better). 2477 // The leader table only tracks basic blocks, not edges. Only add to if we 2478 // have the simple case where the edge dominates the end. 2479 if (RootDominatesEnd && !isa<Instruction>(RHS) && 2480 canReplacePointersIfEqual(LHS, RHS, DL)) 2481 LeaderTable.insert(LVN, RHS, Root.getEnd()); 2482 2483 // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As 2484 // LHS always has at least one use that is not dominated by Root, this will 2485 // never do anything if LHS has only one use. 2486 if (!LHS->hasOneUse()) { 2487 // Create a callback that captures the DL. 2488 auto canReplacePointersCallBack = [&DL](const Use &U, const Value *To) { 2489 return canReplacePointersInUseIfEqual(U, To, DL); 2490 }; 2491 unsigned NumReplacements = 2492 DominatesByEdge 2493 ? replaceDominatedUsesWithIf(LHS, RHS, *DT, Root, 2494 canReplacePointersCallBack) 2495 : replaceDominatedUsesWithIf(LHS, RHS, *DT, Root.getStart(), 2496 canReplacePointersCallBack); 2497 2498 if (NumReplacements > 0) { 2499 Changed = true; 2500 NumGVNEqProp += NumReplacements; 2501 // Cached information for anything that uses LHS will be invalid. 2502 if (MD) 2503 MD->invalidateCachedPointerInfo(LHS); 2504 } 2505 } 2506 2507 // Now try to deduce additional equalities from this one. For example, if 2508 // the known equality was "(A != B)" == "false" then it follows that A and B 2509 // are equal in the scope. Only boolean equalities with an explicit true or 2510 // false RHS are currently supported. 2511 if (!RHS->getType()->isIntegerTy(1)) 2512 // Not a boolean equality - bail out. 2513 continue; 2514 ConstantInt *CI = dyn_cast<ConstantInt>(RHS); 2515 if (!CI) 2516 // RHS neither 'true' nor 'false' - bail out. 2517 continue; 2518 // Whether RHS equals 'true'. Otherwise it equals 'false'. 2519 bool isKnownTrue = CI->isMinusOne(); 2520 bool isKnownFalse = !isKnownTrue; 2521 2522 // If "A && B" is known true then both A and B are known true. If "A || B" 2523 // is known false then both A and B are known false. 2524 Value *A, *B; 2525 if ((isKnownTrue && match(LHS, m_LogicalAnd(m_Value(A), m_Value(B)))) || 2526 (isKnownFalse && match(LHS, m_LogicalOr(m_Value(A), m_Value(B))))) { 2527 Worklist.push_back(std::make_pair(A, RHS)); 2528 Worklist.push_back(std::make_pair(B, RHS)); 2529 continue; 2530 } 2531 2532 // If we are propagating an equality like "(A == B)" == "true" then also 2533 // propagate the equality A == B. When propagating a comparison such as 2534 // "(A >= B)" == "true", replace all instances of "A < B" with "false". 2535 if (CmpInst *Cmp = dyn_cast<CmpInst>(LHS)) { 2536 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); 2537 2538 // If "A == B" is known true, or "A != B" is known false, then replace 2539 // A with B everywhere in the scope. For floating point operations, we 2540 // have to be careful since equality does not always imply equivalance. 2541 if (Cmp->isEquivalence(isKnownFalse)) 2542 Worklist.push_back(std::make_pair(Op0, Op1)); 2543 2544 // If "A >= B" is known true, replace "A < B" with false everywhere. 2545 CmpInst::Predicate NotPred = Cmp->getInversePredicate(); 2546 Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse); 2547 // Since we don't have the instruction "A < B" immediately to hand, work 2548 // out the value number that it would have and use that to find an 2549 // appropriate instruction (if any). 2550 uint32_t NextNum = VN.getNextUnusedValueNumber(); 2551 uint32_t Num = VN.lookupOrAddCmp(Cmp->getOpcode(), NotPred, Op0, Op1); 2552 // If the number we were assigned was brand new then there is no point in 2553 // looking for an instruction realizing it: there cannot be one! 2554 if (Num < NextNum) { 2555 Value *NotCmp = findLeader(Root.getEnd(), Num); 2556 if (NotCmp && isa<Instruction>(NotCmp)) { 2557 unsigned NumReplacements = 2558 DominatesByEdge 2559 ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root) 2560 : replaceDominatedUsesWith(NotCmp, NotVal, *DT, 2561 Root.getStart()); 2562 Changed |= NumReplacements > 0; 2563 NumGVNEqProp += NumReplacements; 2564 // Cached information for anything that uses NotCmp will be invalid. 2565 if (MD) 2566 MD->invalidateCachedPointerInfo(NotCmp); 2567 } 2568 } 2569 // Ensure that any instruction in scope that gets the "A < B" value number 2570 // is replaced with false. 2571 // The leader table only tracks basic blocks, not edges. Only add to if we 2572 // have the simple case where the edge dominates the end. 2573 if (RootDominatesEnd) 2574 LeaderTable.insert(Num, NotVal, Root.getEnd()); 2575 2576 continue; 2577 } 2578 } 2579 2580 return Changed; 2581 } 2582 2583 /// When calculating availability, handle an instruction 2584 /// by inserting it into the appropriate sets 2585 bool GVNPass::processInstruction(Instruction *I) { 2586 // Ignore dbg info intrinsics. 2587 if (isa<DbgInfoIntrinsic>(I)) 2588 return false; 2589 2590 // If the instruction can be easily simplified then do so now in preference 2591 // to value numbering it. Value numbering often exposes redundancies, for 2592 // example if it determines that %y is equal to %x then the instruction 2593 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. 2594 const DataLayout &DL = I->getDataLayout(); 2595 if (Value *V = simplifyInstruction(I, {DL, TLI, DT, AC})) { 2596 bool Changed = false; 2597 if (!I->use_empty()) { 2598 // Simplification can cause a special instruction to become not special. 2599 // For example, devirtualization to a willreturn function. 2600 ICF->removeUsersOf(I); 2601 I->replaceAllUsesWith(V); 2602 Changed = true; 2603 } 2604 if (isInstructionTriviallyDead(I, TLI)) { 2605 markInstructionForDeletion(I); 2606 Changed = true; 2607 } 2608 if (Changed) { 2609 if (MD && V->getType()->isPtrOrPtrVectorTy()) 2610 MD->invalidateCachedPointerInfo(V); 2611 ++NumGVNSimpl; 2612 return true; 2613 } 2614 } 2615 2616 if (auto *Assume = dyn_cast<AssumeInst>(I)) 2617 return processAssumeIntrinsic(Assume); 2618 2619 if (LoadInst *Load = dyn_cast<LoadInst>(I)) { 2620 if (processLoad(Load)) 2621 return true; 2622 2623 unsigned Num = VN.lookupOrAdd(Load); 2624 LeaderTable.insert(Num, Load, Load->getParent()); 2625 return false; 2626 } 2627 2628 // For conditional branches, we can perform simple conditional propagation on 2629 // the condition value itself. 2630 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 2631 if (!BI->isConditional()) 2632 return false; 2633 2634 if (isa<Constant>(BI->getCondition())) 2635 return processFoldableCondBr(BI); 2636 2637 Value *BranchCond = BI->getCondition(); 2638 BasicBlock *TrueSucc = BI->getSuccessor(0); 2639 BasicBlock *FalseSucc = BI->getSuccessor(1); 2640 // Avoid multiple edges early. 2641 if (TrueSucc == FalseSucc) 2642 return false; 2643 2644 BasicBlock *Parent = BI->getParent(); 2645 bool Changed = false; 2646 2647 Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); 2648 BasicBlockEdge TrueE(Parent, TrueSucc); 2649 Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true); 2650 2651 Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); 2652 BasicBlockEdge FalseE(Parent, FalseSucc); 2653 Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true); 2654 2655 return Changed; 2656 } 2657 2658 // For switches, propagate the case values into the case destinations. 2659 if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { 2660 Value *SwitchCond = SI->getCondition(); 2661 BasicBlock *Parent = SI->getParent(); 2662 bool Changed = false; 2663 2664 // Remember how many outgoing edges there are to every successor. 2665 SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges; 2666 for (BasicBlock *Succ : successors(Parent)) 2667 ++SwitchEdges[Succ]; 2668 2669 for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); 2670 i != e; ++i) { 2671 BasicBlock *Dst = i->getCaseSuccessor(); 2672 // If there is only a single edge, propagate the case value into it. 2673 if (SwitchEdges.lookup(Dst) == 1) { 2674 BasicBlockEdge E(Parent, Dst); 2675 Changed |= propagateEquality(SwitchCond, i->getCaseValue(), E, true); 2676 } 2677 } 2678 return Changed; 2679 } 2680 2681 // Instructions with void type don't return a value, so there's 2682 // no point in trying to find redundancies in them. 2683 if (I->getType()->isVoidTy()) 2684 return false; 2685 2686 uint32_t NextNum = VN.getNextUnusedValueNumber(); 2687 unsigned Num = VN.lookupOrAdd(I); 2688 2689 // Allocations are always uniquely numbered, so we can save time and memory 2690 // by fast failing them. 2691 if (isa<AllocaInst>(I) || I->isTerminator() || isa<PHINode>(I)) { 2692 LeaderTable.insert(Num, I, I->getParent()); 2693 return false; 2694 } 2695 2696 // If the number we were assigned was a brand new VN, then we don't 2697 // need to do a lookup to see if the number already exists 2698 // somewhere in the domtree: it can't! 2699 if (Num >= NextNum) { 2700 LeaderTable.insert(Num, I, I->getParent()); 2701 return false; 2702 } 2703 2704 // Perform fast-path value-number based elimination of values inherited from 2705 // dominators. 2706 Value *Repl = findLeader(I->getParent(), Num); 2707 if (!Repl) { 2708 // Failure, just remember this instance for future use. 2709 LeaderTable.insert(Num, I, I->getParent()); 2710 return false; 2711 } 2712 2713 if (Repl == I) { 2714 // If I was the result of a shortcut PRE, it might already be in the table 2715 // and the best replacement for itself. Nothing to do. 2716 return false; 2717 } 2718 2719 // Remove it! 2720 patchAndReplaceAllUsesWith(I, Repl); 2721 if (MD && Repl->getType()->isPtrOrPtrVectorTy()) 2722 MD->invalidateCachedPointerInfo(Repl); 2723 markInstructionForDeletion(I); 2724 return true; 2725 } 2726 2727 /// runOnFunction - This is the main transformation entry point for a function. 2728 bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, 2729 const TargetLibraryInfo &RunTLI, AAResults &RunAA, 2730 MemoryDependenceResults *RunMD, LoopInfo &LI, 2731 OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) { 2732 AC = &RunAC; 2733 DT = &RunDT; 2734 VN.setDomTree(DT); 2735 TLI = &RunTLI; 2736 VN.setAliasAnalysis(&RunAA); 2737 MD = RunMD; 2738 ImplicitControlFlowTracking ImplicitCFT; 2739 ICF = &ImplicitCFT; 2740 this->LI = &LI; 2741 VN.setMemDep(MD); 2742 ORE = RunORE; 2743 InvalidBlockRPONumbers = true; 2744 MemorySSAUpdater Updater(MSSA); 2745 MSSAU = MSSA ? &Updater : nullptr; 2746 2747 bool Changed = false; 2748 bool ShouldContinue = true; 2749 2750 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); 2751 // Merge unconditional branches, allowing PRE to catch more 2752 // optimization opportunities. 2753 for (BasicBlock &BB : llvm::make_early_inc_range(F)) { 2754 bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, &LI, MSSAU, MD); 2755 if (removedBlock) 2756 ++NumGVNBlocks; 2757 2758 Changed |= removedBlock; 2759 } 2760 DTU.flush(); 2761 2762 unsigned Iteration = 0; 2763 while (ShouldContinue) { 2764 LLVM_DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); 2765 (void) Iteration; 2766 ShouldContinue = iterateOnFunction(F); 2767 Changed |= ShouldContinue; 2768 ++Iteration; 2769 } 2770 2771 if (isPREEnabled()) { 2772 // Fabricate val-num for dead-code in order to suppress assertion in 2773 // performPRE(). 2774 assignValNumForDeadCode(); 2775 bool PREChanged = true; 2776 while (PREChanged) { 2777 PREChanged = performPRE(F); 2778 Changed |= PREChanged; 2779 } 2780 } 2781 2782 // FIXME: Should perform GVN again after PRE does something. PRE can move 2783 // computations into blocks where they become fully redundant. Note that 2784 // we can't do this until PRE's critical edge splitting updates memdep. 2785 // Actually, when this happens, we should just fully integrate PRE into GVN. 2786 2787 cleanupGlobalSets(); 2788 // Do not cleanup DeadBlocks in cleanupGlobalSets() as it's called for each 2789 // iteration. 2790 DeadBlocks.clear(); 2791 2792 if (MSSA && VerifyMemorySSA) 2793 MSSA->verifyMemorySSA(); 2794 2795 return Changed; 2796 } 2797 2798 bool GVNPass::processBlock(BasicBlock *BB) { 2799 // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function 2800 // (and incrementing BI before processing an instruction). 2801 assert(InstrsToErase.empty() && 2802 "We expect InstrsToErase to be empty across iterations"); 2803 if (DeadBlocks.count(BB)) 2804 return false; 2805 2806 // Clearing map before every BB because it can be used only for single BB. 2807 ReplaceOperandsWithMap.clear(); 2808 bool ChangedFunction = false; 2809 2810 // Since we may not have visited the input blocks of the phis, we can't 2811 // use our normal hash approach for phis. Instead, simply look for 2812 // obvious duplicates. The first pass of GVN will tend to create 2813 // identical phis, and the second or later passes can eliminate them. 2814 SmallPtrSet<PHINode *, 8> PHINodesToRemove; 2815 ChangedFunction |= EliminateDuplicatePHINodes(BB, PHINodesToRemove); 2816 for (PHINode *PN : PHINodesToRemove) { 2817 VN.erase(PN); 2818 removeInstruction(PN); 2819 } 2820 2821 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); 2822 BI != BE;) { 2823 if (!ReplaceOperandsWithMap.empty()) 2824 ChangedFunction |= replaceOperandsForInBlockEquality(&*BI); 2825 ChangedFunction |= processInstruction(&*BI); 2826 2827 if (InstrsToErase.empty()) { 2828 ++BI; 2829 continue; 2830 } 2831 2832 // If we need some instructions deleted, do it now. 2833 NumGVNInstr += InstrsToErase.size(); 2834 2835 // Avoid iterator invalidation. 2836 bool AtStart = BI == BB->begin(); 2837 if (!AtStart) 2838 --BI; 2839 2840 for (auto *I : InstrsToErase) { 2841 assert(I->getParent() == BB && "Removing instruction from wrong block?"); 2842 LLVM_DEBUG(dbgs() << "GVN removed: " << *I << '\n'); 2843 salvageKnowledge(I, AC); 2844 salvageDebugInfo(*I); 2845 removeInstruction(I); 2846 } 2847 InstrsToErase.clear(); 2848 2849 if (AtStart) 2850 BI = BB->begin(); 2851 else 2852 ++BI; 2853 } 2854 2855 return ChangedFunction; 2856 } 2857 2858 // Instantiate an expression in a predecessor that lacked it. 2859 bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, 2860 BasicBlock *Curr, unsigned int ValNo) { 2861 // Because we are going top-down through the block, all value numbers 2862 // will be available in the predecessor by the time we need them. Any 2863 // that weren't originally present will have been instantiated earlier 2864 // in this loop. 2865 bool success = true; 2866 for (unsigned i = 0, e = Instr->getNumOperands(); i != e; ++i) { 2867 Value *Op = Instr->getOperand(i); 2868 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op)) 2869 continue; 2870 // This could be a newly inserted instruction, in which case, we won't 2871 // find a value number, and should give up before we hurt ourselves. 2872 // FIXME: Rewrite the infrastructure to let it easier to value number 2873 // and process newly inserted instructions. 2874 if (!VN.exists(Op)) { 2875 success = false; 2876 break; 2877 } 2878 uint32_t TValNo = 2879 VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this); 2880 if (Value *V = findLeader(Pred, TValNo)) { 2881 Instr->setOperand(i, V); 2882 } else { 2883 success = false; 2884 break; 2885 } 2886 } 2887 2888 // Fail out if we encounter an operand that is not available in 2889 // the PRE predecessor. This is typically because of loads which 2890 // are not value numbered precisely. 2891 if (!success) 2892 return false; 2893 2894 Instr->insertBefore(Pred->getTerminator()->getIterator()); 2895 Instr->setName(Instr->getName() + ".pre"); 2896 Instr->setDebugLoc(Instr->getDebugLoc()); 2897 2898 ICF->insertInstructionTo(Instr, Pred); 2899 2900 unsigned Num = VN.lookupOrAdd(Instr); 2901 VN.add(Instr, Num); 2902 2903 // Update the availability map to include the new instruction. 2904 LeaderTable.insert(Num, Instr, Pred); 2905 return true; 2906 } 2907 2908 bool GVNPass::performScalarPRE(Instruction *CurInst) { 2909 if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() || 2910 isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() || 2911 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || 2912 isa<DbgInfoIntrinsic>(CurInst)) 2913 return false; 2914 2915 // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from 2916 // sinking the compare again, and it would force the code generator to 2917 // move the i1 from processor flags or predicate registers into a general 2918 // purpose register. 2919 if (isa<CmpInst>(CurInst)) 2920 return false; 2921 2922 // Don't do PRE on GEPs. The inserted PHI would prevent CodeGenPrepare from 2923 // sinking the addressing mode computation back to its uses. Extending the 2924 // GEP's live range increases the register pressure, and therefore it can 2925 // introduce unnecessary spills. 2926 // 2927 // This doesn't prevent Load PRE. PHI translation will make the GEP available 2928 // to the load by moving it to the predecessor block if necessary. 2929 if (isa<GetElementPtrInst>(CurInst)) 2930 return false; 2931 2932 if (auto *CallB = dyn_cast<CallBase>(CurInst)) { 2933 // We don't currently value number ANY inline asm calls. 2934 if (CallB->isInlineAsm()) 2935 return false; 2936 } 2937 2938 uint32_t ValNo = VN.lookup(CurInst); 2939 2940 // Look for the predecessors for PRE opportunities. We're 2941 // only trying to solve the basic diamond case, where 2942 // a value is computed in the successor and one predecessor, 2943 // but not the other. We also explicitly disallow cases 2944 // where the successor is its own predecessor, because they're 2945 // more complicated to get right. 2946 unsigned NumWith = 0; 2947 unsigned NumWithout = 0; 2948 BasicBlock *PREPred = nullptr; 2949 BasicBlock *CurrentBlock = CurInst->getParent(); 2950 2951 // Update the RPO numbers for this function. 2952 if (InvalidBlockRPONumbers) 2953 assignBlockRPONumber(*CurrentBlock->getParent()); 2954 2955 SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap; 2956 for (BasicBlock *P : predecessors(CurrentBlock)) { 2957 // We're not interested in PRE where blocks with predecessors that are 2958 // not reachable. 2959 if (!DT->isReachableFromEntry(P)) { 2960 NumWithout = 2; 2961 break; 2962 } 2963 // It is not safe to do PRE when P->CurrentBlock is a loop backedge. 2964 assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) && 2965 "Invalid BlockRPONumber map."); 2966 if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) { 2967 NumWithout = 2; 2968 break; 2969 } 2970 2971 uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this); 2972 Value *predV = findLeader(P, TValNo); 2973 if (!predV) { 2974 predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); 2975 PREPred = P; 2976 ++NumWithout; 2977 } else if (predV == CurInst) { 2978 /* CurInst dominates this predecessor. */ 2979 NumWithout = 2; 2980 break; 2981 } else { 2982 predMap.push_back(std::make_pair(predV, P)); 2983 ++NumWith; 2984 } 2985 } 2986 2987 // Don't do PRE when it might increase code size, i.e. when 2988 // we would need to insert instructions in more than one pred. 2989 if (NumWithout > 1 || NumWith == 0) 2990 return false; 2991 2992 // We may have a case where all predecessors have the instruction, 2993 // and we just need to insert a phi node. Otherwise, perform 2994 // insertion. 2995 Instruction *PREInstr = nullptr; 2996 2997 if (NumWithout != 0) { 2998 if (!isSafeToSpeculativelyExecute(CurInst)) { 2999 // It is only valid to insert a new instruction if the current instruction 3000 // is always executed. An instruction with implicit control flow could 3001 // prevent us from doing it. If we cannot speculate the execution, then 3002 // PRE should be prohibited. 3003 if (ICF->isDominatedByICFIFromSameBlock(CurInst)) 3004 return false; 3005 } 3006 3007 // Don't do PRE across indirect branch. 3008 if (isa<IndirectBrInst>(PREPred->getTerminator())) 3009 return false; 3010 3011 // We can't do PRE safely on a critical edge, so instead we schedule 3012 // the edge to be split and perform the PRE the next time we iterate 3013 // on the function. 3014 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); 3015 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { 3016 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); 3017 return false; 3018 } 3019 // We need to insert somewhere, so let's give it a shot 3020 PREInstr = CurInst->clone(); 3021 if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) { 3022 // If we failed insertion, make sure we remove the instruction. 3023 #ifndef NDEBUG 3024 verifyRemoved(PREInstr); 3025 #endif 3026 PREInstr->deleteValue(); 3027 return false; 3028 } 3029 } 3030 3031 // Either we should have filled in the PRE instruction, or we should 3032 // not have needed insertions. 3033 assert(PREInstr != nullptr || NumWithout == 0); 3034 3035 ++NumGVNPRE; 3036 3037 // Create a PHI to make the value available in this block. 3038 PHINode *Phi = PHINode::Create(CurInst->getType(), predMap.size(), 3039 CurInst->getName() + ".pre-phi"); 3040 Phi->insertBefore(CurrentBlock->begin()); 3041 for (unsigned i = 0, e = predMap.size(); i != e; ++i) { 3042 if (Value *V = predMap[i].first) { 3043 // If we use an existing value in this phi, we have to patch the original 3044 // value because the phi will be used to replace a later value. 3045 patchReplacementInstruction(CurInst, V); 3046 Phi->addIncoming(V, predMap[i].second); 3047 } else 3048 Phi->addIncoming(PREInstr, PREPred); 3049 } 3050 3051 VN.add(Phi, ValNo); 3052 // After creating a new PHI for ValNo, the phi translate result for ValNo will 3053 // be changed, so erase the related stale entries in phi translate cache. 3054 VN.eraseTranslateCacheEntry(ValNo, *CurrentBlock); 3055 LeaderTable.insert(ValNo, Phi, CurrentBlock); 3056 Phi->setDebugLoc(CurInst->getDebugLoc()); 3057 CurInst->replaceAllUsesWith(Phi); 3058 if (MD && Phi->getType()->isPtrOrPtrVectorTy()) 3059 MD->invalidateCachedPointerInfo(Phi); 3060 VN.erase(CurInst); 3061 LeaderTable.erase(ValNo, CurInst, CurrentBlock); 3062 3063 LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); 3064 removeInstruction(CurInst); 3065 ++NumGVNInstr; 3066 3067 return true; 3068 } 3069 3070 /// Perform a purely local form of PRE that looks for diamond 3071 /// control flow patterns and attempts to perform simple PRE at the join point. 3072 bool GVNPass::performPRE(Function &F) { 3073 bool Changed = false; 3074 for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { 3075 // Nothing to PRE in the entry block. 3076 if (CurrentBlock == &F.getEntryBlock()) 3077 continue; 3078 3079 // Don't perform PRE on an EH pad. 3080 if (CurrentBlock->isEHPad()) 3081 continue; 3082 3083 for (BasicBlock::iterator BI = CurrentBlock->begin(), 3084 BE = CurrentBlock->end(); 3085 BI != BE;) { 3086 Instruction *CurInst = &*BI++; 3087 Changed |= performScalarPRE(CurInst); 3088 } 3089 } 3090 3091 if (splitCriticalEdges()) 3092 Changed = true; 3093 3094 return Changed; 3095 } 3096 3097 /// Split the critical edge connecting the given two blocks, and return 3098 /// the block inserted to the critical edge. 3099 BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) { 3100 // GVN does not require loop-simplify, do not try to preserve it if it is not 3101 // possible. 3102 BasicBlock *BB = SplitCriticalEdge( 3103 Pred, Succ, 3104 CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify()); 3105 if (BB) { 3106 if (MD) 3107 MD->invalidateCachedPredecessors(); 3108 InvalidBlockRPONumbers = true; 3109 } 3110 return BB; 3111 } 3112 3113 /// Split critical edges found during the previous 3114 /// iteration that may enable further optimization. 3115 bool GVNPass::splitCriticalEdges() { 3116 if (toSplit.empty()) 3117 return false; 3118 3119 bool Changed = false; 3120 do { 3121 std::pair<Instruction *, unsigned> Edge = toSplit.pop_back_val(); 3122 Changed |= SplitCriticalEdge(Edge.first, Edge.second, 3123 CriticalEdgeSplittingOptions(DT, LI, MSSAU)) != 3124 nullptr; 3125 } while (!toSplit.empty()); 3126 if (Changed) { 3127 if (MD) 3128 MD->invalidateCachedPredecessors(); 3129 InvalidBlockRPONumbers = true; 3130 } 3131 return Changed; 3132 } 3133 3134 /// Executes one iteration of GVN 3135 bool GVNPass::iterateOnFunction(Function &F) { 3136 cleanupGlobalSets(); 3137 3138 // Top-down walk of the dominator tree 3139 bool Changed = false; 3140 // Needed for value numbering with phi construction to work. 3141 // RPOT walks the graph in its constructor and will not be invalidated during 3142 // processBlock. 3143 ReversePostOrderTraversal<Function *> RPOT(&F); 3144 3145 for (BasicBlock *BB : RPOT) 3146 Changed |= processBlock(BB); 3147 3148 return Changed; 3149 } 3150 3151 void GVNPass::cleanupGlobalSets() { 3152 VN.clear(); 3153 LeaderTable.clear(); 3154 BlockRPONumber.clear(); 3155 ICF->clear(); 3156 InvalidBlockRPONumbers = true; 3157 } 3158 3159 void GVNPass::removeInstruction(Instruction *I) { 3160 if (MD) MD->removeInstruction(I); 3161 if (MSSAU) 3162 MSSAU->removeMemoryAccess(I); 3163 #ifndef NDEBUG 3164 verifyRemoved(I); 3165 #endif 3166 ICF->removeInstruction(I); 3167 I->eraseFromParent(); 3168 } 3169 3170 /// Verify that the specified instruction does not occur in our 3171 /// internal data structures. 3172 void GVNPass::verifyRemoved(const Instruction *Inst) const { 3173 VN.verifyRemoved(Inst); 3174 LeaderTable.verifyRemoved(Inst); 3175 } 3176 3177 /// BB is declared dead, which implied other blocks become dead as well. This 3178 /// function is to add all these blocks to "DeadBlocks". For the dead blocks' 3179 /// live successors, update their phi nodes by replacing the operands 3180 /// corresponding to dead blocks with UndefVal. 3181 void GVNPass::addDeadBlock(BasicBlock *BB) { 3182 SmallVector<BasicBlock *, 4> NewDead; 3183 SmallSetVector<BasicBlock *, 4> DF; 3184 3185 NewDead.push_back(BB); 3186 while (!NewDead.empty()) { 3187 BasicBlock *D = NewDead.pop_back_val(); 3188 if (DeadBlocks.count(D)) 3189 continue; 3190 3191 // All blocks dominated by D are dead. 3192 SmallVector<BasicBlock *, 8> Dom; 3193 DT->getDescendants(D, Dom); 3194 DeadBlocks.insert(Dom.begin(), Dom.end()); 3195 3196 // Figure out the dominance-frontier(D). 3197 for (BasicBlock *B : Dom) { 3198 for (BasicBlock *S : successors(B)) { 3199 if (DeadBlocks.count(S)) 3200 continue; 3201 3202 bool AllPredDead = true; 3203 for (BasicBlock *P : predecessors(S)) 3204 if (!DeadBlocks.count(P)) { 3205 AllPredDead = false; 3206 break; 3207 } 3208 3209 if (!AllPredDead) { 3210 // S could be proved dead later on. That is why we don't update phi 3211 // operands at this moment. 3212 DF.insert(S); 3213 } else { 3214 // While S is not dominated by D, it is dead by now. This could take 3215 // place if S already have a dead predecessor before D is declared 3216 // dead. 3217 NewDead.push_back(S); 3218 } 3219 } 3220 } 3221 } 3222 3223 // For the dead blocks' live successors, update their phi nodes by replacing 3224 // the operands corresponding to dead blocks with UndefVal. 3225 for (BasicBlock *B : DF) { 3226 if (DeadBlocks.count(B)) 3227 continue; 3228 3229 // First, split the critical edges. This might also create additional blocks 3230 // to preserve LoopSimplify form and adjust edges accordingly. 3231 SmallVector<BasicBlock *, 4> Preds(predecessors(B)); 3232 for (BasicBlock *P : Preds) { 3233 if (!DeadBlocks.count(P)) 3234 continue; 3235 3236 if (llvm::is_contained(successors(P), B) && 3237 isCriticalEdge(P->getTerminator(), B)) { 3238 if (BasicBlock *S = splitCriticalEdges(P, B)) 3239 DeadBlocks.insert(P = S); 3240 } 3241 } 3242 3243 // Now poison the incoming values from the dead predecessors. 3244 for (BasicBlock *P : predecessors(B)) { 3245 if (!DeadBlocks.count(P)) 3246 continue; 3247 for (PHINode &Phi : B->phis()) { 3248 Phi.setIncomingValueForBlock(P, PoisonValue::get(Phi.getType())); 3249 if (MD) 3250 MD->invalidateCachedPointerInfo(&Phi); 3251 } 3252 } 3253 } 3254 } 3255 3256 // If the given branch is recognized as a foldable branch (i.e. conditional 3257 // branch with constant condition), it will perform following analyses and 3258 // transformation. 3259 // 1) If the dead out-coming edge is a critical-edge, split it. Let 3260 // R be the target of the dead out-coming edge. 3261 // 1) Identify the set of dead blocks implied by the branch's dead outcoming 3262 // edge. The result of this step will be {X| X is dominated by R} 3263 // 2) Identify those blocks which haves at least one dead predecessor. The 3264 // result of this step will be dominance-frontier(R). 3265 // 3) Update the PHIs in DF(R) by replacing the operands corresponding to 3266 // dead blocks with "UndefVal" in an hope these PHIs will optimized away. 3267 // 3268 // Return true iff *NEW* dead code are found. 3269 bool GVNPass::processFoldableCondBr(BranchInst *BI) { 3270 if (!BI || BI->isUnconditional()) 3271 return false; 3272 3273 // If a branch has two identical successors, we cannot declare either dead. 3274 if (BI->getSuccessor(0) == BI->getSuccessor(1)) 3275 return false; 3276 3277 ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); 3278 if (!Cond) 3279 return false; 3280 3281 BasicBlock *DeadRoot = 3282 Cond->getZExtValue() ? BI->getSuccessor(1) : BI->getSuccessor(0); 3283 if (DeadBlocks.count(DeadRoot)) 3284 return false; 3285 3286 if (!DeadRoot->getSinglePredecessor()) 3287 DeadRoot = splitCriticalEdges(BI->getParent(), DeadRoot); 3288 3289 addDeadBlock(DeadRoot); 3290 return true; 3291 } 3292 3293 // performPRE() will trigger assert if it comes across an instruction without 3294 // associated val-num. As it normally has far more live instructions than dead 3295 // instructions, it makes more sense just to "fabricate" a val-number for the 3296 // dead code than checking if instruction involved is dead or not. 3297 void GVNPass::assignValNumForDeadCode() { 3298 for (BasicBlock *BB : DeadBlocks) { 3299 for (Instruction &Inst : *BB) { 3300 unsigned ValNum = VN.lookupOrAdd(&Inst); 3301 LeaderTable.insert(ValNum, &Inst, BB); 3302 } 3303 } 3304 } 3305 3306 class llvm::gvn::GVNLegacyPass : public FunctionPass { 3307 public: 3308 static char ID; // Pass identification, replacement for typeid 3309 3310 explicit GVNLegacyPass(bool MemDepAnalysis = GVNEnableMemDep, 3311 bool MemSSAAnalysis = GVNEnableMemorySSA) 3312 : FunctionPass(ID), Impl(GVNOptions() 3313 .setMemDep(MemDepAnalysis) 3314 .setMemorySSA(MemSSAAnalysis)) { 3315 initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry()); 3316 } 3317 3318 bool runOnFunction(Function &F) override { 3319 if (skipFunction(F)) 3320 return false; 3321 3322 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 3323 if (Impl.isMemorySSAEnabled() && !MSSAWP) 3324 MSSAWP = &getAnalysis<MemorySSAWrapperPass>(); 3325 3326 return Impl.runImpl( 3327 F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), 3328 getAnalysis<DominatorTreeWrapperPass>().getDomTree(), 3329 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F), 3330 getAnalysis<AAResultsWrapperPass>().getAAResults(), 3331 Impl.isMemDepEnabled() 3332 ? &getAnalysis<MemoryDependenceWrapperPass>().getMemDep() 3333 : nullptr, 3334 getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), 3335 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(), 3336 MSSAWP ? &MSSAWP->getMSSA() : nullptr); 3337 } 3338 3339 void getAnalysisUsage(AnalysisUsage &AU) const override { 3340 AU.addRequired<AssumptionCacheTracker>(); 3341 AU.addRequired<DominatorTreeWrapperPass>(); 3342 AU.addRequired<TargetLibraryInfoWrapperPass>(); 3343 AU.addRequired<LoopInfoWrapperPass>(); 3344 if (Impl.isMemDepEnabled()) 3345 AU.addRequired<MemoryDependenceWrapperPass>(); 3346 AU.addRequired<AAResultsWrapperPass>(); 3347 AU.addPreserved<DominatorTreeWrapperPass>(); 3348 AU.addPreserved<GlobalsAAWrapperPass>(); 3349 AU.addPreserved<TargetLibraryInfoWrapperPass>(); 3350 AU.addPreserved<LoopInfoWrapperPass>(); 3351 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 3352 AU.addPreserved<MemorySSAWrapperPass>(); 3353 if (Impl.isMemorySSAEnabled()) 3354 AU.addRequired<MemorySSAWrapperPass>(); 3355 } 3356 3357 private: 3358 GVNPass Impl; 3359 }; 3360 3361 char GVNLegacyPass::ID = 0; 3362 3363 INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) 3364 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 3365 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 3366 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 3367 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 3368 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 3369 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 3370 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 3371 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 3372 INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) 3373 3374 // The public interface to this file... 3375 FunctionPass *llvm::createGVNPass() { return new GVNLegacyPass(); } 3376