109467b48Spatrick //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file implements an analysis that determines, for a given memory
1009467b48Spatrick // operation, what preceding memory operations it depends on. It builds on
1109467b48Spatrick // alias analysis information, and tries to provide a lazy, caching interface to
1209467b48Spatrick // a common kind of alias information query.
1309467b48Spatrick //
1409467b48Spatrick //===----------------------------------------------------------------------===//
1509467b48Spatrick
1609467b48Spatrick #include "llvm/Analysis/MemoryDependenceAnalysis.h"
1709467b48Spatrick #include "llvm/ADT/DenseMap.h"
1809467b48Spatrick #include "llvm/ADT/STLExtras.h"
1909467b48Spatrick #include "llvm/ADT/SmallPtrSet.h"
2009467b48Spatrick #include "llvm/ADT/SmallVector.h"
2109467b48Spatrick #include "llvm/ADT/Statistic.h"
2209467b48Spatrick #include "llvm/Analysis/AliasAnalysis.h"
2309467b48Spatrick #include "llvm/Analysis/AssumptionCache.h"
2409467b48Spatrick #include "llvm/Analysis/MemoryBuiltins.h"
2509467b48Spatrick #include "llvm/Analysis/MemoryLocation.h"
2609467b48Spatrick #include "llvm/Analysis/PHITransAddr.h"
2709467b48Spatrick #include "llvm/Analysis/TargetLibraryInfo.h"
2809467b48Spatrick #include "llvm/Analysis/ValueTracking.h"
2909467b48Spatrick #include "llvm/IR/BasicBlock.h"
3009467b48Spatrick #include "llvm/IR/Dominators.h"
3109467b48Spatrick #include "llvm/IR/Function.h"
3209467b48Spatrick #include "llvm/IR/InstrTypes.h"
3309467b48Spatrick #include "llvm/IR/Instruction.h"
3409467b48Spatrick #include "llvm/IR/Instructions.h"
3509467b48Spatrick #include "llvm/IR/IntrinsicInst.h"
3609467b48Spatrick #include "llvm/IR/LLVMContext.h"
3709467b48Spatrick #include "llvm/IR/Metadata.h"
3809467b48Spatrick #include "llvm/IR/Module.h"
3909467b48Spatrick #include "llvm/IR/PredIteratorCache.h"
4009467b48Spatrick #include "llvm/IR/Type.h"
4109467b48Spatrick #include "llvm/IR/Use.h"
4209467b48Spatrick #include "llvm/IR/Value.h"
4309467b48Spatrick #include "llvm/InitializePasses.h"
4409467b48Spatrick #include "llvm/Pass.h"
4509467b48Spatrick #include "llvm/Support/AtomicOrdering.h"
4609467b48Spatrick #include "llvm/Support/Casting.h"
4709467b48Spatrick #include "llvm/Support/CommandLine.h"
4809467b48Spatrick #include "llvm/Support/Compiler.h"
4909467b48Spatrick #include "llvm/Support/Debug.h"
5009467b48Spatrick #include <algorithm>
5109467b48Spatrick #include <cassert>
5209467b48Spatrick #include <iterator>
5309467b48Spatrick #include <utility>
5409467b48Spatrick
5509467b48Spatrick using namespace llvm;
5609467b48Spatrick
5709467b48Spatrick #define DEBUG_TYPE "memdep"
5809467b48Spatrick
5909467b48Spatrick STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
6009467b48Spatrick STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
6109467b48Spatrick STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
6209467b48Spatrick
6309467b48Spatrick STATISTIC(NumCacheNonLocalPtr,
6409467b48Spatrick "Number of fully cached non-local ptr responses");
6509467b48Spatrick STATISTIC(NumCacheDirtyNonLocalPtr,
6609467b48Spatrick "Number of cached, but dirty, non-local ptr responses");
6709467b48Spatrick STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
6809467b48Spatrick STATISTIC(NumCacheCompleteNonLocalPtr,
6909467b48Spatrick "Number of block queries that were completely cached");
7009467b48Spatrick
7109467b48Spatrick // Limit for the number of instructions to scan in a block.
7209467b48Spatrick
7309467b48Spatrick static cl::opt<unsigned> BlockScanLimit(
7409467b48Spatrick "memdep-block-scan-limit", cl::Hidden, cl::init(100),
7509467b48Spatrick cl::desc("The number of instructions to scan in a block in memory "
7609467b48Spatrick "dependency analysis (default = 100)"));
7709467b48Spatrick
7809467b48Spatrick static cl::opt<unsigned>
79*d415bd75Srobert BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200),
8009467b48Spatrick cl::desc("The number of blocks to scan during memory "
81*d415bd75Srobert "dependency analysis (default = 200)"));
8209467b48Spatrick
8309467b48Spatrick // Limit on the number of memdep results to process.
8409467b48Spatrick static const unsigned int NumResultsLimit = 100;
8509467b48Spatrick
8609467b48Spatrick /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
8709467b48Spatrick ///
8809467b48Spatrick /// If the set becomes empty, remove Inst's entry.
8909467b48Spatrick template <typename KeyTy>
9009467b48Spatrick static void
RemoveFromReverseMap(DenseMap<Instruction *,SmallPtrSet<KeyTy,4>> & ReverseMap,Instruction * Inst,KeyTy Val)9109467b48Spatrick RemoveFromReverseMap(DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>> &ReverseMap,
9209467b48Spatrick Instruction *Inst, KeyTy Val) {
9309467b48Spatrick typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
9409467b48Spatrick ReverseMap.find(Inst);
9509467b48Spatrick assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
9609467b48Spatrick bool Found = InstIt->second.erase(Val);
9709467b48Spatrick assert(Found && "Invalid reverse map!");
9809467b48Spatrick (void)Found;
9909467b48Spatrick if (InstIt->second.empty())
10009467b48Spatrick ReverseMap.erase(InstIt);
10109467b48Spatrick }
10209467b48Spatrick
10309467b48Spatrick /// If the given instruction references a specific memory location, fill in Loc
10409467b48Spatrick /// with the details, otherwise set Loc.Ptr to null.
10509467b48Spatrick ///
10609467b48Spatrick /// Returns a ModRefInfo value describing the general behavior of the
10709467b48Spatrick /// instruction.
GetLocation(const Instruction * Inst,MemoryLocation & Loc,const TargetLibraryInfo & TLI)10809467b48Spatrick static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
10909467b48Spatrick const TargetLibraryInfo &TLI) {
11009467b48Spatrick if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
11109467b48Spatrick if (LI->isUnordered()) {
11209467b48Spatrick Loc = MemoryLocation::get(LI);
11309467b48Spatrick return ModRefInfo::Ref;
11409467b48Spatrick }
11509467b48Spatrick if (LI->getOrdering() == AtomicOrdering::Monotonic) {
11609467b48Spatrick Loc = MemoryLocation::get(LI);
11709467b48Spatrick return ModRefInfo::ModRef;
11809467b48Spatrick }
11909467b48Spatrick Loc = MemoryLocation();
12009467b48Spatrick return ModRefInfo::ModRef;
12109467b48Spatrick }
12209467b48Spatrick
12309467b48Spatrick if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
12409467b48Spatrick if (SI->isUnordered()) {
12509467b48Spatrick Loc = MemoryLocation::get(SI);
12609467b48Spatrick return ModRefInfo::Mod;
12709467b48Spatrick }
12809467b48Spatrick if (SI->getOrdering() == AtomicOrdering::Monotonic) {
12909467b48Spatrick Loc = MemoryLocation::get(SI);
13009467b48Spatrick return ModRefInfo::ModRef;
13109467b48Spatrick }
13209467b48Spatrick Loc = MemoryLocation();
13309467b48Spatrick return ModRefInfo::ModRef;
13409467b48Spatrick }
13509467b48Spatrick
13609467b48Spatrick if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
13709467b48Spatrick Loc = MemoryLocation::get(V);
13809467b48Spatrick return ModRefInfo::ModRef;
13909467b48Spatrick }
14009467b48Spatrick
141*d415bd75Srobert if (const CallBase *CB = dyn_cast<CallBase>(Inst)) {
142*d415bd75Srobert if (Value *FreedOp = getFreedOperand(CB, &TLI)) {
14309467b48Spatrick // calls to free() deallocate the entire structure
144*d415bd75Srobert Loc = MemoryLocation::getAfter(FreedOp);
14509467b48Spatrick return ModRefInfo::Mod;
14609467b48Spatrick }
147*d415bd75Srobert }
14809467b48Spatrick
14909467b48Spatrick if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
15009467b48Spatrick switch (II->getIntrinsicID()) {
15109467b48Spatrick case Intrinsic::lifetime_start:
15209467b48Spatrick case Intrinsic::lifetime_end:
15309467b48Spatrick case Intrinsic::invariant_start:
15409467b48Spatrick Loc = MemoryLocation::getForArgument(II, 1, TLI);
15509467b48Spatrick // These intrinsics don't really modify the memory, but returning Mod
15609467b48Spatrick // will allow them to be handled conservatively.
15709467b48Spatrick return ModRefInfo::Mod;
15809467b48Spatrick case Intrinsic::invariant_end:
15909467b48Spatrick Loc = MemoryLocation::getForArgument(II, 2, TLI);
16009467b48Spatrick // These intrinsics don't really modify the memory, but returning Mod
16109467b48Spatrick // will allow them to be handled conservatively.
16209467b48Spatrick return ModRefInfo::Mod;
16373471bf0Spatrick case Intrinsic::masked_load:
16473471bf0Spatrick Loc = MemoryLocation::getForArgument(II, 0, TLI);
16573471bf0Spatrick return ModRefInfo::Ref;
16673471bf0Spatrick case Intrinsic::masked_store:
16773471bf0Spatrick Loc = MemoryLocation::getForArgument(II, 1, TLI);
16873471bf0Spatrick return ModRefInfo::Mod;
16909467b48Spatrick default:
17009467b48Spatrick break;
17109467b48Spatrick }
17209467b48Spatrick }
17309467b48Spatrick
17409467b48Spatrick // Otherwise, just do the coarse-grained thing that always works.
17509467b48Spatrick if (Inst->mayWriteToMemory())
17609467b48Spatrick return ModRefInfo::ModRef;
17709467b48Spatrick if (Inst->mayReadFromMemory())
17809467b48Spatrick return ModRefInfo::Ref;
17909467b48Spatrick return ModRefInfo::NoModRef;
18009467b48Spatrick }
18109467b48Spatrick
18209467b48Spatrick /// Private helper for finding the local dependencies of a call site.
getCallDependencyFrom(CallBase * Call,bool isReadOnlyCall,BasicBlock::iterator ScanIt,BasicBlock * BB)18309467b48Spatrick MemDepResult MemoryDependenceResults::getCallDependencyFrom(
18409467b48Spatrick CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
18509467b48Spatrick BasicBlock *BB) {
18609467b48Spatrick unsigned Limit = getDefaultBlockScanLimit();
18709467b48Spatrick
18809467b48Spatrick // Walk backwards through the block, looking for dependencies.
18909467b48Spatrick while (ScanIt != BB->begin()) {
19009467b48Spatrick Instruction *Inst = &*--ScanIt;
19109467b48Spatrick // Debug intrinsics don't cause dependences and should not affect Limit
19209467b48Spatrick if (isa<DbgInfoIntrinsic>(Inst))
19309467b48Spatrick continue;
19409467b48Spatrick
19509467b48Spatrick // Limit the amount of scanning we do so we don't end up with quadratic
19609467b48Spatrick // running time on extreme testcases.
19709467b48Spatrick --Limit;
19809467b48Spatrick if (!Limit)
19909467b48Spatrick return MemDepResult::getUnknown();
20009467b48Spatrick
20109467b48Spatrick // If this inst is a memory op, get the pointer it accessed
20209467b48Spatrick MemoryLocation Loc;
20309467b48Spatrick ModRefInfo MR = GetLocation(Inst, Loc, TLI);
20409467b48Spatrick if (Loc.Ptr) {
20509467b48Spatrick // A simple instruction.
20609467b48Spatrick if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
20709467b48Spatrick return MemDepResult::getClobber(Inst);
20809467b48Spatrick continue;
20909467b48Spatrick }
21009467b48Spatrick
21109467b48Spatrick if (auto *CallB = dyn_cast<CallBase>(Inst)) {
21209467b48Spatrick // If these two calls do not interfere, look past it.
21309467b48Spatrick if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
21409467b48Spatrick // If the two calls are the same, return Inst as a Def, so that
21509467b48Spatrick // Call can be found redundant and eliminated.
21609467b48Spatrick if (isReadOnlyCall && !isModSet(MR) &&
21709467b48Spatrick Call->isIdenticalToWhenDefined(CallB))
21809467b48Spatrick return MemDepResult::getDef(Inst);
21909467b48Spatrick
22009467b48Spatrick // Otherwise if the two calls don't interact (e.g. CallB is readnone)
22109467b48Spatrick // keep scanning.
22209467b48Spatrick continue;
22309467b48Spatrick } else
22409467b48Spatrick return MemDepResult::getClobber(Inst);
22509467b48Spatrick }
22609467b48Spatrick
22709467b48Spatrick // If we could not obtain a pointer for the instruction and the instruction
22809467b48Spatrick // touches memory then assume that this is a dependency.
22909467b48Spatrick if (isModOrRefSet(MR))
23009467b48Spatrick return MemDepResult::getClobber(Inst);
23109467b48Spatrick }
23209467b48Spatrick
23309467b48Spatrick // No dependence found. If this is the entry block of the function, it is
23409467b48Spatrick // unknown, otherwise it is non-local.
23509467b48Spatrick if (BB != &BB->getParent()->getEntryBlock())
23609467b48Spatrick return MemDepResult::getNonLocal();
23709467b48Spatrick return MemDepResult::getNonFuncLocal();
23809467b48Spatrick }
23909467b48Spatrick
getPointerDependencyFrom(const MemoryLocation & MemLoc,bool isLoad,BasicBlock::iterator ScanIt,BasicBlock * BB,Instruction * QueryInst,unsigned * Limit,BatchAAResults & BatchAA)24009467b48Spatrick MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
24109467b48Spatrick const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
24273471bf0Spatrick BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
24373471bf0Spatrick BatchAAResults &BatchAA) {
24409467b48Spatrick MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
24509467b48Spatrick if (QueryInst != nullptr) {
24609467b48Spatrick if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
24709467b48Spatrick InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
24809467b48Spatrick
24909467b48Spatrick if (InvariantGroupDependency.isDef())
25009467b48Spatrick return InvariantGroupDependency;
25109467b48Spatrick }
25209467b48Spatrick }
25309467b48Spatrick MemDepResult SimpleDep = getSimplePointerDependencyFrom(
25473471bf0Spatrick MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
25509467b48Spatrick if (SimpleDep.isDef())
25609467b48Spatrick return SimpleDep;
25709467b48Spatrick // Non-local invariant group dependency indicates there is non local Def
25809467b48Spatrick // (it only returns nonLocal if it finds nonLocal def), which is better than
25909467b48Spatrick // local clobber and everything else.
26009467b48Spatrick if (InvariantGroupDependency.isNonLocal())
26109467b48Spatrick return InvariantGroupDependency;
26209467b48Spatrick
26309467b48Spatrick assert(InvariantGroupDependency.isUnknown() &&
26409467b48Spatrick "InvariantGroupDependency should be only unknown at this point");
26509467b48Spatrick return SimpleDep;
26609467b48Spatrick }
26709467b48Spatrick
getPointerDependencyFrom(const MemoryLocation & MemLoc,bool isLoad,BasicBlock::iterator ScanIt,BasicBlock * BB,Instruction * QueryInst,unsigned * Limit)26873471bf0Spatrick MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
26973471bf0Spatrick const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
27073471bf0Spatrick BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
27173471bf0Spatrick BatchAAResults BatchAA(AA);
27273471bf0Spatrick return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
27373471bf0Spatrick BatchAA);
27473471bf0Spatrick }
27573471bf0Spatrick
27609467b48Spatrick MemDepResult
getInvariantGroupPointerDependency(LoadInst * LI,BasicBlock * BB)27709467b48Spatrick MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
27809467b48Spatrick BasicBlock *BB) {
27909467b48Spatrick
28009467b48Spatrick if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
28109467b48Spatrick return MemDepResult::getUnknown();
28209467b48Spatrick
28309467b48Spatrick // Take the ptr operand after all casts and geps 0. This way we can search
28409467b48Spatrick // cast graph down only.
28509467b48Spatrick Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
28609467b48Spatrick
28709467b48Spatrick // It's is not safe to walk the use list of global value, because function
28809467b48Spatrick // passes aren't allowed to look outside their functions.
28909467b48Spatrick // FIXME: this could be fixed by filtering instructions from outside
29009467b48Spatrick // of current function.
29109467b48Spatrick if (isa<GlobalValue>(LoadOperand))
29209467b48Spatrick return MemDepResult::getUnknown();
29309467b48Spatrick
29409467b48Spatrick // Queue to process all pointers that are equivalent to load operand.
29509467b48Spatrick SmallVector<const Value *, 8> LoadOperandsQueue;
29609467b48Spatrick LoadOperandsQueue.push_back(LoadOperand);
29709467b48Spatrick
29809467b48Spatrick Instruction *ClosestDependency = nullptr;
29909467b48Spatrick // Order of instructions in uses list is unpredictible. In order to always
30009467b48Spatrick // get the same result, we will look for the closest dominance.
30109467b48Spatrick auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
30209467b48Spatrick assert(Other && "Must call it with not null instruction");
30309467b48Spatrick if (Best == nullptr || DT.dominates(Best, Other))
30409467b48Spatrick return Other;
30509467b48Spatrick return Best;
30609467b48Spatrick };
30709467b48Spatrick
30809467b48Spatrick // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
30909467b48Spatrick // we will see all the instructions. This should be fixed in MSSA.
31009467b48Spatrick while (!LoadOperandsQueue.empty()) {
31109467b48Spatrick const Value *Ptr = LoadOperandsQueue.pop_back_val();
31209467b48Spatrick assert(Ptr && !isa<GlobalValue>(Ptr) &&
31309467b48Spatrick "Null or GlobalValue should not be inserted");
31409467b48Spatrick
31509467b48Spatrick for (const Use &Us : Ptr->uses()) {
31609467b48Spatrick auto *U = dyn_cast<Instruction>(Us.getUser());
31709467b48Spatrick if (!U || U == LI || !DT.dominates(U, LI))
31809467b48Spatrick continue;
31909467b48Spatrick
32009467b48Spatrick // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
32109467b48Spatrick // users. U = bitcast Ptr
32209467b48Spatrick if (isa<BitCastInst>(U)) {
32309467b48Spatrick LoadOperandsQueue.push_back(U);
32409467b48Spatrick continue;
32509467b48Spatrick }
32609467b48Spatrick // Gep with zeros is equivalent to bitcast.
32709467b48Spatrick // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
32809467b48Spatrick // or gep 0 to bitcast because of SROA, so there are 2 forms. When
32909467b48Spatrick // typeless pointers will be ready then both cases will be gone
33009467b48Spatrick // (and this BFS also won't be needed).
33109467b48Spatrick if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
33209467b48Spatrick if (GEP->hasAllZeroIndices()) {
33309467b48Spatrick LoadOperandsQueue.push_back(U);
33409467b48Spatrick continue;
33509467b48Spatrick }
33609467b48Spatrick
33709467b48Spatrick // If we hit load/store with the same invariant.group metadata (and the
33809467b48Spatrick // same pointer operand) we can assume that value pointed by pointer
33909467b48Spatrick // operand didn't change.
34073471bf0Spatrick if ((isa<LoadInst>(U) ||
34173471bf0Spatrick (isa<StoreInst>(U) &&
34273471bf0Spatrick cast<StoreInst>(U)->getPointerOperand() == Ptr)) &&
34309467b48Spatrick U->hasMetadata(LLVMContext::MD_invariant_group))
34409467b48Spatrick ClosestDependency = GetClosestDependency(ClosestDependency, U);
34509467b48Spatrick }
34609467b48Spatrick }
34709467b48Spatrick
34809467b48Spatrick if (!ClosestDependency)
34909467b48Spatrick return MemDepResult::getUnknown();
35009467b48Spatrick if (ClosestDependency->getParent() == BB)
35109467b48Spatrick return MemDepResult::getDef(ClosestDependency);
35209467b48Spatrick // Def(U) can't be returned here because it is non-local. If local
35309467b48Spatrick // dependency won't be found then return nonLocal counting that the
35409467b48Spatrick // user will call getNonLocalPointerDependency, which will return cached
35509467b48Spatrick // result.
35609467b48Spatrick NonLocalDefsCache.try_emplace(
35709467b48Spatrick LI, NonLocalDepResult(ClosestDependency->getParent(),
35809467b48Spatrick MemDepResult::getDef(ClosestDependency), nullptr));
35909467b48Spatrick ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
36009467b48Spatrick return MemDepResult::getNonLocal();
36109467b48Spatrick }
36209467b48Spatrick
getSimplePointerDependencyFrom(const MemoryLocation & MemLoc,bool isLoad,BasicBlock::iterator ScanIt,BasicBlock * BB,Instruction * QueryInst,unsigned * Limit,BatchAAResults & BatchAA)36309467b48Spatrick MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
36409467b48Spatrick const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
36573471bf0Spatrick BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
36673471bf0Spatrick BatchAAResults &BatchAA) {
36709467b48Spatrick bool isInvariantLoad = false;
36809467b48Spatrick
36909467b48Spatrick unsigned DefaultLimit = getDefaultBlockScanLimit();
37009467b48Spatrick if (!Limit)
37109467b48Spatrick Limit = &DefaultLimit;
37209467b48Spatrick
37309467b48Spatrick // We must be careful with atomic accesses, as they may allow another thread
37409467b48Spatrick // to touch this location, clobbering it. We are conservative: if the
37509467b48Spatrick // QueryInst is not a simple (non-atomic) memory access, we automatically
37609467b48Spatrick // return getClobber.
37709467b48Spatrick // If it is simple, we know based on the results of
37809467b48Spatrick // "Compiler testing via a theory of sound optimisations in the C11/C++11
37909467b48Spatrick // memory model" in PLDI 2013, that a non-atomic location can only be
38009467b48Spatrick // clobbered between a pair of a release and an acquire action, with no
38109467b48Spatrick // access to the location in between.
38209467b48Spatrick // Here is an example for giving the general intuition behind this rule.
38309467b48Spatrick // In the following code:
38409467b48Spatrick // store x 0;
38509467b48Spatrick // release action; [1]
38609467b48Spatrick // acquire action; [4]
38709467b48Spatrick // %val = load x;
38809467b48Spatrick // It is unsafe to replace %val by 0 because another thread may be running:
38909467b48Spatrick // acquire action; [2]
39009467b48Spatrick // store x 42;
39109467b48Spatrick // release action; [3]
39209467b48Spatrick // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
39309467b48Spatrick // being 42. A key property of this program however is that if either
39409467b48Spatrick // 1 or 4 were missing, there would be a race between the store of 42
39509467b48Spatrick // either the store of 0 or the load (making the whole program racy).
39609467b48Spatrick // The paper mentioned above shows that the same property is respected
39709467b48Spatrick // by every program that can detect any optimization of that kind: either
39809467b48Spatrick // it is racy (undefined) or there is a release followed by an acquire
39909467b48Spatrick // between the pair of accesses under consideration.
40009467b48Spatrick
40109467b48Spatrick // If the load is invariant, we "know" that it doesn't alias *any* write. We
40209467b48Spatrick // do want to respect mustalias results since defs are useful for value
40309467b48Spatrick // forwarding, but any mayalias write can be assumed to be noalias.
40409467b48Spatrick // Arguably, this logic should be pushed inside AliasAnalysis itself.
40509467b48Spatrick if (isLoad && QueryInst) {
40609467b48Spatrick LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
40709467b48Spatrick if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load))
40809467b48Spatrick isInvariantLoad = true;
40909467b48Spatrick }
41009467b48Spatrick
411*d415bd75Srobert // True for volatile instruction.
412*d415bd75Srobert // For Load/Store return true if atomic ordering is stronger than AO,
413*d415bd75Srobert // for other instruction just true if it can read or write to memory.
414*d415bd75Srobert auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
415*d415bd75Srobert if (I->isVolatile())
416*d415bd75Srobert return true;
41709467b48Spatrick if (auto *LI = dyn_cast<LoadInst>(I))
418*d415bd75Srobert return isStrongerThan(LI->getOrdering(), AO);
41909467b48Spatrick if (auto *SI = dyn_cast<StoreInst>(I))
420*d415bd75Srobert return isStrongerThan(SI->getOrdering(), AO);
421*d415bd75Srobert return I->mayReadOrWriteMemory();
42209467b48Spatrick };
42309467b48Spatrick
42409467b48Spatrick // Walk backwards through the basic block, looking for dependencies.
42509467b48Spatrick while (ScanIt != BB->begin()) {
42609467b48Spatrick Instruction *Inst = &*--ScanIt;
42709467b48Spatrick
42809467b48Spatrick if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
42909467b48Spatrick // Debug intrinsics don't (and can't) cause dependencies.
43009467b48Spatrick if (isa<DbgInfoIntrinsic>(II))
43109467b48Spatrick continue;
43209467b48Spatrick
43309467b48Spatrick // Limit the amount of scanning we do so we don't end up with quadratic
43409467b48Spatrick // running time on extreme testcases.
43509467b48Spatrick --*Limit;
43609467b48Spatrick if (!*Limit)
43709467b48Spatrick return MemDepResult::getUnknown();
43809467b48Spatrick
43909467b48Spatrick if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
44009467b48Spatrick // If we reach a lifetime begin or end marker, then the query ends here
44109467b48Spatrick // because the value is undefined.
44273471bf0Spatrick Intrinsic::ID ID = II->getIntrinsicID();
44373471bf0Spatrick switch (ID) {
44473471bf0Spatrick case Intrinsic::lifetime_start: {
44509467b48Spatrick // FIXME: This only considers queries directly on the invariant-tagged
44609467b48Spatrick // pointer, not on query pointers that are indexed off of them. It'd
44709467b48Spatrick // be nice to handle that at some point (the right approach is to use
44809467b48Spatrick // GetPointerBaseWithConstantOffset).
44973471bf0Spatrick MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
45073471bf0Spatrick if (BatchAA.isMustAlias(ArgLoc, MemLoc))
45109467b48Spatrick return MemDepResult::getDef(II);
45209467b48Spatrick continue;
45309467b48Spatrick }
45473471bf0Spatrick case Intrinsic::masked_load:
45573471bf0Spatrick case Intrinsic::masked_store: {
45673471bf0Spatrick MemoryLocation Loc;
45773471bf0Spatrick /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
45873471bf0Spatrick AliasResult R = BatchAA.alias(Loc, MemLoc);
45973471bf0Spatrick if (R == AliasResult::NoAlias)
46073471bf0Spatrick continue;
46173471bf0Spatrick if (R == AliasResult::MustAlias)
46273471bf0Spatrick return MemDepResult::getDef(II);
46373471bf0Spatrick if (ID == Intrinsic::masked_load)
46473471bf0Spatrick continue;
46573471bf0Spatrick return MemDepResult::getClobber(II);
46673471bf0Spatrick }
46773471bf0Spatrick }
46809467b48Spatrick }
46909467b48Spatrick
47009467b48Spatrick // Values depend on loads if the pointers are must aliased. This means
47109467b48Spatrick // that a load depends on another must aliased load from the same value.
47209467b48Spatrick // One exception is atomic loads: a value can depend on an atomic load that
47309467b48Spatrick // it does not alias with when this atomic load indicates that another
47409467b48Spatrick // thread may be accessing the location.
47509467b48Spatrick if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
47609467b48Spatrick // While volatile access cannot be eliminated, they do not have to clobber
47709467b48Spatrick // non-aliasing locations, as normal accesses, for example, can be safely
47809467b48Spatrick // reordered with volatile accesses.
47909467b48Spatrick if (LI->isVolatile()) {
48009467b48Spatrick if (!QueryInst)
48109467b48Spatrick // Original QueryInst *may* be volatile
48209467b48Spatrick return MemDepResult::getClobber(LI);
48373471bf0Spatrick if (QueryInst->isVolatile())
48409467b48Spatrick // Ordering required if QueryInst is itself volatile
48509467b48Spatrick return MemDepResult::getClobber(LI);
48609467b48Spatrick // Otherwise, volatile doesn't imply any special ordering
48709467b48Spatrick }
48809467b48Spatrick
48909467b48Spatrick // Atomic loads have complications involved.
49009467b48Spatrick // A Monotonic (or higher) load is OK if the query inst is itself not
49109467b48Spatrick // atomic.
49209467b48Spatrick // FIXME: This is overly conservative.
49309467b48Spatrick if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
494*d415bd75Srobert if (!QueryInst ||
495*d415bd75Srobert isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic))
49609467b48Spatrick return MemDepResult::getClobber(LI);
49709467b48Spatrick if (LI->getOrdering() != AtomicOrdering::Monotonic)
49809467b48Spatrick return MemDepResult::getClobber(LI);
49909467b48Spatrick }
50009467b48Spatrick
50109467b48Spatrick MemoryLocation LoadLoc = MemoryLocation::get(LI);
50209467b48Spatrick
50309467b48Spatrick // If we found a pointer, check if it could be the same as our pointer.
50473471bf0Spatrick AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
50509467b48Spatrick
50673471bf0Spatrick if (R == AliasResult::NoAlias)
50709467b48Spatrick continue;
50809467b48Spatrick
509*d415bd75Srobert if (isLoad) {
51009467b48Spatrick // Must aliased loads are defs of each other.
51173471bf0Spatrick if (R == AliasResult::MustAlias)
51209467b48Spatrick return MemDepResult::getDef(Inst);
51309467b48Spatrick
51409467b48Spatrick // If we have a partial alias, then return this as a clobber for the
51509467b48Spatrick // client to handle.
51673471bf0Spatrick if (R == AliasResult::PartialAlias && R.hasOffset()) {
51773471bf0Spatrick ClobberOffsets[LI] = R.getOffset();
51809467b48Spatrick return MemDepResult::getClobber(Inst);
51973471bf0Spatrick }
52009467b48Spatrick
52109467b48Spatrick // Random may-alias loads don't depend on each other without a
52209467b48Spatrick // dependence.
52309467b48Spatrick continue;
52409467b48Spatrick }
52509467b48Spatrick
52609467b48Spatrick // Stores don't alias loads from read-only memory.
527*d415bd75Srobert if (!isModSet(BatchAA.getModRefInfoMask(LoadLoc)))
52809467b48Spatrick continue;
52909467b48Spatrick
53009467b48Spatrick // Stores depend on may/must aliased loads.
53109467b48Spatrick return MemDepResult::getDef(Inst);
53209467b48Spatrick }
53309467b48Spatrick
53409467b48Spatrick if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
53509467b48Spatrick // Atomic stores have complications involved.
53609467b48Spatrick // A Monotonic store is OK if the query inst is itself not atomic.
53709467b48Spatrick // FIXME: This is overly conservative.
53809467b48Spatrick if (!SI->isUnordered() && SI->isAtomic()) {
539*d415bd75Srobert if (!QueryInst ||
540*d415bd75Srobert isComplexForReordering(QueryInst, AtomicOrdering::Unordered))
54109467b48Spatrick return MemDepResult::getClobber(SI);
542*d415bd75Srobert // Ok, if we are here the guard above guarantee us that
543*d415bd75Srobert // QueryInst is a non-atomic or unordered load/store.
544*d415bd75Srobert // SI is atomic with monotonic or release semantic (seq_cst for store
545*d415bd75Srobert // is actually a release semantic plus total order over other seq_cst
546*d415bd75Srobert // instructions, as soon as QueryInst is not seq_cst we can consider it
547*d415bd75Srobert // as simple release semantic).
548*d415bd75Srobert // Monotonic and Release semantic allows re-ordering before store
549*d415bd75Srobert // so we are safe to go further and check the aliasing. It will prohibit
550*d415bd75Srobert // re-ordering in case locations are may or must alias.
55109467b48Spatrick }
55209467b48Spatrick
55309467b48Spatrick // While volatile access cannot be eliminated, they do not have to clobber
55409467b48Spatrick // non-aliasing locations, as normal accesses can for example be reordered
55509467b48Spatrick // with volatile accesses.
55609467b48Spatrick if (SI->isVolatile())
557*d415bd75Srobert if (!QueryInst || QueryInst->isVolatile())
55809467b48Spatrick return MemDepResult::getClobber(SI);
55909467b48Spatrick
56009467b48Spatrick // If alias analysis can tell that this store is guaranteed to not modify
56109467b48Spatrick // the query pointer, ignore it. Use getModRefInfo to handle cases where
56209467b48Spatrick // the query pointer points to constant memory etc.
56373471bf0Spatrick if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
56409467b48Spatrick continue;
56509467b48Spatrick
56609467b48Spatrick // Ok, this store might clobber the query pointer. Check to see if it is
56709467b48Spatrick // a must alias: in this case, we want to return this as a def.
56809467b48Spatrick // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
56909467b48Spatrick MemoryLocation StoreLoc = MemoryLocation::get(SI);
57009467b48Spatrick
57109467b48Spatrick // If we found a pointer, check if it could be the same as our pointer.
57273471bf0Spatrick AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
57309467b48Spatrick
57473471bf0Spatrick if (R == AliasResult::NoAlias)
57509467b48Spatrick continue;
57673471bf0Spatrick if (R == AliasResult::MustAlias)
57709467b48Spatrick return MemDepResult::getDef(Inst);
57809467b48Spatrick if (isInvariantLoad)
57909467b48Spatrick continue;
58009467b48Spatrick return MemDepResult::getClobber(Inst);
58109467b48Spatrick }
58209467b48Spatrick
58309467b48Spatrick // If this is an allocation, and if we know that the accessed pointer is to
58409467b48Spatrick // the allocation, return Def. This means that there is no dependence and
58509467b48Spatrick // the access can be optimized based on that. For example, a load could
58609467b48Spatrick // turn into undef. Note that we can bypass the allocation itself when
58709467b48Spatrick // looking for a clobber in many cases; that's an alias property and is
58809467b48Spatrick // handled by BasicAA.
589*d415bd75Srobert if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) {
59073471bf0Spatrick const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
59173471bf0Spatrick if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
59209467b48Spatrick return MemDepResult::getDef(Inst);
59309467b48Spatrick }
59409467b48Spatrick
595*d415bd75Srobert // If we found a select instruction for MemLoc pointer, return it as Def
596*d415bd75Srobert // dependency.
597*d415bd75Srobert if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst)
598*d415bd75Srobert return MemDepResult::getDef(Inst);
599*d415bd75Srobert
60009467b48Spatrick if (isInvariantLoad)
60109467b48Spatrick continue;
60209467b48Spatrick
60309467b48Spatrick // A release fence requires that all stores complete before it, but does
60409467b48Spatrick // not prevent the reordering of following loads or stores 'before' the
60509467b48Spatrick // fence. As a result, we look past it when finding a dependency for
60609467b48Spatrick // loads. DSE uses this to find preceding stores to delete and thus we
60709467b48Spatrick // can't bypass the fence if the query instruction is a store.
60809467b48Spatrick if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
60909467b48Spatrick if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
61009467b48Spatrick continue;
61109467b48Spatrick
61209467b48Spatrick // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
61373471bf0Spatrick ModRefInfo MR = BatchAA.getModRefInfo(Inst, MemLoc);
61409467b48Spatrick // If necessary, perform additional analysis.
61509467b48Spatrick if (isModAndRefSet(MR))
61673471bf0Spatrick MR = BatchAA.callCapturesBefore(Inst, MemLoc, &DT);
617*d415bd75Srobert switch (MR) {
61809467b48Spatrick case ModRefInfo::NoModRef:
61909467b48Spatrick // If the call has no effect on the queried pointer, just ignore it.
62009467b48Spatrick continue;
62109467b48Spatrick case ModRefInfo::Mod:
62209467b48Spatrick return MemDepResult::getClobber(Inst);
62309467b48Spatrick case ModRefInfo::Ref:
62409467b48Spatrick // If the call is known to never store to the pointer, and if this is a
62509467b48Spatrick // load query, we can safely ignore it (scan past it).
62609467b48Spatrick if (isLoad)
62709467b48Spatrick continue;
628*d415bd75Srobert [[fallthrough]];
62909467b48Spatrick default:
63009467b48Spatrick // Otherwise, there is a potential dependence. Return a clobber.
63109467b48Spatrick return MemDepResult::getClobber(Inst);
63209467b48Spatrick }
63309467b48Spatrick }
63409467b48Spatrick
63509467b48Spatrick // No dependence found. If this is the entry block of the function, it is
63609467b48Spatrick // unknown, otherwise it is non-local.
63709467b48Spatrick if (BB != &BB->getParent()->getEntryBlock())
63809467b48Spatrick return MemDepResult::getNonLocal();
63909467b48Spatrick return MemDepResult::getNonFuncLocal();
64009467b48Spatrick }
64109467b48Spatrick
getDependency(Instruction * QueryInst)642097a140dSpatrick MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
64373471bf0Spatrick ClobberOffsets.clear();
64409467b48Spatrick Instruction *ScanPos = QueryInst;
64509467b48Spatrick
64609467b48Spatrick // Check for a cached result
64709467b48Spatrick MemDepResult &LocalCache = LocalDeps[QueryInst];
64809467b48Spatrick
64909467b48Spatrick // If the cached entry is non-dirty, just return it. Note that this depends
65009467b48Spatrick // on MemDepResult's default constructing to 'dirty'.
65109467b48Spatrick if (!LocalCache.isDirty())
65209467b48Spatrick return LocalCache;
65309467b48Spatrick
65409467b48Spatrick // Otherwise, if we have a dirty entry, we know we can start the scan at that
65509467b48Spatrick // instruction, which may save us some work.
65609467b48Spatrick if (Instruction *Inst = LocalCache.getInst()) {
65709467b48Spatrick ScanPos = Inst;
65809467b48Spatrick
65909467b48Spatrick RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
66009467b48Spatrick }
66109467b48Spatrick
66209467b48Spatrick BasicBlock *QueryParent = QueryInst->getParent();
66309467b48Spatrick
66409467b48Spatrick // Do the scan.
66509467b48Spatrick if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
66609467b48Spatrick // No dependence found. If this is the entry block of the function, it is
66709467b48Spatrick // unknown, otherwise it is non-local.
66809467b48Spatrick if (QueryParent != &QueryParent->getParent()->getEntryBlock())
66909467b48Spatrick LocalCache = MemDepResult::getNonLocal();
67009467b48Spatrick else
67109467b48Spatrick LocalCache = MemDepResult::getNonFuncLocal();
67209467b48Spatrick } else {
67309467b48Spatrick MemoryLocation MemLoc;
67409467b48Spatrick ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
67509467b48Spatrick if (MemLoc.Ptr) {
67609467b48Spatrick // If we can do a pointer scan, make it happen.
67709467b48Spatrick bool isLoad = !isModSet(MR);
67809467b48Spatrick if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
67909467b48Spatrick isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
68009467b48Spatrick
68109467b48Spatrick LocalCache =
68209467b48Spatrick getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
683097a140dSpatrick QueryParent, QueryInst, nullptr);
68409467b48Spatrick } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
68509467b48Spatrick bool isReadOnly = AA.onlyReadsMemory(QueryCall);
68609467b48Spatrick LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
68709467b48Spatrick ScanPos->getIterator(), QueryParent);
68809467b48Spatrick } else
68909467b48Spatrick // Non-memory instruction.
69009467b48Spatrick LocalCache = MemDepResult::getUnknown();
69109467b48Spatrick }
69209467b48Spatrick
69309467b48Spatrick // Remember the result!
69409467b48Spatrick if (Instruction *I = LocalCache.getInst())
69509467b48Spatrick ReverseLocalDeps[I].insert(QueryInst);
69609467b48Spatrick
69709467b48Spatrick return LocalCache;
69809467b48Spatrick }
69909467b48Spatrick
70009467b48Spatrick #ifndef NDEBUG
70109467b48Spatrick /// This method is used when -debug is specified to verify that cache arrays
70209467b48Spatrick /// are properly kept sorted.
AssertSorted(MemoryDependenceResults::NonLocalDepInfo & Cache,int Count=-1)70309467b48Spatrick static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache,
70409467b48Spatrick int Count = -1) {
70509467b48Spatrick if (Count == -1)
70609467b48Spatrick Count = Cache.size();
70709467b48Spatrick assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
70809467b48Spatrick "Cache isn't sorted!");
70909467b48Spatrick }
71009467b48Spatrick #endif
71109467b48Spatrick
71209467b48Spatrick const MemoryDependenceResults::NonLocalDepInfo &
getNonLocalCallDependency(CallBase * QueryCall)71309467b48Spatrick MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
71409467b48Spatrick assert(getDependency(QueryCall).isNonLocal() &&
71509467b48Spatrick "getNonLocalCallDependency should only be used on calls with "
71609467b48Spatrick "non-local deps!");
71773471bf0Spatrick PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
71809467b48Spatrick NonLocalDepInfo &Cache = CacheP.first;
71909467b48Spatrick
72009467b48Spatrick // This is the set of blocks that need to be recomputed. In the cached case,
72109467b48Spatrick // this can happen due to instructions being deleted etc. In the uncached
72209467b48Spatrick // case, this starts out as the set of predecessors we care about.
72309467b48Spatrick SmallVector<BasicBlock *, 32> DirtyBlocks;
72409467b48Spatrick
72509467b48Spatrick if (!Cache.empty()) {
72609467b48Spatrick // Okay, we have a cache entry. If we know it is not dirty, just return it
72709467b48Spatrick // with no computation.
72809467b48Spatrick if (!CacheP.second) {
72909467b48Spatrick ++NumCacheNonLocal;
73009467b48Spatrick return Cache;
73109467b48Spatrick }
73209467b48Spatrick
73309467b48Spatrick // If we already have a partially computed set of results, scan them to
73409467b48Spatrick // determine what is dirty, seeding our initial DirtyBlocks worklist.
73509467b48Spatrick for (auto &Entry : Cache)
73609467b48Spatrick if (Entry.getResult().isDirty())
73709467b48Spatrick DirtyBlocks.push_back(Entry.getBB());
73809467b48Spatrick
73909467b48Spatrick // Sort the cache so that we can do fast binary search lookups below.
74009467b48Spatrick llvm::sort(Cache);
74109467b48Spatrick
74209467b48Spatrick ++NumCacheDirtyNonLocal;
74309467b48Spatrick } else {
74409467b48Spatrick // Seed DirtyBlocks with each of the preds of QueryInst's block.
74509467b48Spatrick BasicBlock *QueryBB = QueryCall->getParent();
74673471bf0Spatrick append_range(DirtyBlocks, PredCache.get(QueryBB));
74709467b48Spatrick ++NumUncacheNonLocal;
74809467b48Spatrick }
74909467b48Spatrick
75009467b48Spatrick // isReadonlyCall - If this is a read-only call, we can be more aggressive.
75109467b48Spatrick bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
75209467b48Spatrick
75309467b48Spatrick SmallPtrSet<BasicBlock *, 32> Visited;
75409467b48Spatrick
75509467b48Spatrick unsigned NumSortedEntries = Cache.size();
75609467b48Spatrick LLVM_DEBUG(AssertSorted(Cache));
75709467b48Spatrick
75809467b48Spatrick // Iterate while we still have blocks to update.
75909467b48Spatrick while (!DirtyBlocks.empty()) {
76073471bf0Spatrick BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
76109467b48Spatrick
76209467b48Spatrick // Already processed this block?
76309467b48Spatrick if (!Visited.insert(DirtyBB).second)
76409467b48Spatrick continue;
76509467b48Spatrick
76609467b48Spatrick // Do a binary search to see if we already have an entry for this block in
76709467b48Spatrick // the cache set. If so, find it.
76809467b48Spatrick LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
76909467b48Spatrick NonLocalDepInfo::iterator Entry =
77009467b48Spatrick std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
77109467b48Spatrick NonLocalDepEntry(DirtyBB));
77209467b48Spatrick if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
77309467b48Spatrick --Entry;
77409467b48Spatrick
77509467b48Spatrick NonLocalDepEntry *ExistingResult = nullptr;
77609467b48Spatrick if (Entry != Cache.begin() + NumSortedEntries &&
77709467b48Spatrick Entry->getBB() == DirtyBB) {
77809467b48Spatrick // If we already have an entry, and if it isn't already dirty, the block
77909467b48Spatrick // is done.
78009467b48Spatrick if (!Entry->getResult().isDirty())
78109467b48Spatrick continue;
78209467b48Spatrick
78309467b48Spatrick // Otherwise, remember this slot so we can update the value.
78409467b48Spatrick ExistingResult = &*Entry;
78509467b48Spatrick }
78609467b48Spatrick
78709467b48Spatrick // If the dirty entry has a pointer, start scanning from it so we don't have
78809467b48Spatrick // to rescan the entire block.
78909467b48Spatrick BasicBlock::iterator ScanPos = DirtyBB->end();
79009467b48Spatrick if (ExistingResult) {
79109467b48Spatrick if (Instruction *Inst = ExistingResult->getResult().getInst()) {
79209467b48Spatrick ScanPos = Inst->getIterator();
79309467b48Spatrick // We're removing QueryInst's use of Inst.
79409467b48Spatrick RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
79509467b48Spatrick QueryCall);
79609467b48Spatrick }
79709467b48Spatrick }
79809467b48Spatrick
79909467b48Spatrick // Find out if this block has a local dependency for QueryInst.
80009467b48Spatrick MemDepResult Dep;
80109467b48Spatrick
80209467b48Spatrick if (ScanPos != DirtyBB->begin()) {
80309467b48Spatrick Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
80409467b48Spatrick } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
80509467b48Spatrick // No dependence found. If this is the entry block of the function, it is
80609467b48Spatrick // a clobber, otherwise it is unknown.
80709467b48Spatrick Dep = MemDepResult::getNonLocal();
80809467b48Spatrick } else {
80909467b48Spatrick Dep = MemDepResult::getNonFuncLocal();
81009467b48Spatrick }
81109467b48Spatrick
81209467b48Spatrick // If we had a dirty entry for the block, update it. Otherwise, just add
81309467b48Spatrick // a new entry.
81409467b48Spatrick if (ExistingResult)
81509467b48Spatrick ExistingResult->setResult(Dep);
81609467b48Spatrick else
81709467b48Spatrick Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
81809467b48Spatrick
81909467b48Spatrick // If the block has a dependency (i.e. it isn't completely transparent to
82009467b48Spatrick // the value), remember the association!
82109467b48Spatrick if (!Dep.isNonLocal()) {
82209467b48Spatrick // Keep the ReverseNonLocalDeps map up to date so we can efficiently
82309467b48Spatrick // update this when we remove instructions.
82409467b48Spatrick if (Instruction *Inst = Dep.getInst())
82509467b48Spatrick ReverseNonLocalDeps[Inst].insert(QueryCall);
82609467b48Spatrick } else {
82709467b48Spatrick
82809467b48Spatrick // If the block *is* completely transparent to the load, we need to check
82909467b48Spatrick // the predecessors of this block. Add them to our worklist.
83073471bf0Spatrick append_range(DirtyBlocks, PredCache.get(DirtyBB));
83109467b48Spatrick }
83209467b48Spatrick }
83309467b48Spatrick
83409467b48Spatrick return Cache;
83509467b48Spatrick }
83609467b48Spatrick
getNonLocalPointerDependency(Instruction * QueryInst,SmallVectorImpl<NonLocalDepResult> & Result)83709467b48Spatrick void MemoryDependenceResults::getNonLocalPointerDependency(
83809467b48Spatrick Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
83909467b48Spatrick const MemoryLocation Loc = MemoryLocation::get(QueryInst);
84009467b48Spatrick bool isLoad = isa<LoadInst>(QueryInst);
84109467b48Spatrick BasicBlock *FromBB = QueryInst->getParent();
84209467b48Spatrick assert(FromBB);
84309467b48Spatrick
84409467b48Spatrick assert(Loc.Ptr->getType()->isPointerTy() &&
84509467b48Spatrick "Can't get pointer deps of a non-pointer!");
84609467b48Spatrick Result.clear();
84709467b48Spatrick {
84809467b48Spatrick // Check if there is cached Def with invariant.group.
84909467b48Spatrick auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
85009467b48Spatrick if (NonLocalDefIt != NonLocalDefsCache.end()) {
85109467b48Spatrick Result.push_back(NonLocalDefIt->second);
85209467b48Spatrick ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
85309467b48Spatrick .erase(QueryInst);
85409467b48Spatrick NonLocalDefsCache.erase(NonLocalDefIt);
85509467b48Spatrick return;
85609467b48Spatrick }
85709467b48Spatrick }
85809467b48Spatrick // This routine does not expect to deal with volatile instructions.
85909467b48Spatrick // Doing so would require piping through the QueryInst all the way through.
86009467b48Spatrick // TODO: volatiles can't be elided, but they can be reordered with other
86109467b48Spatrick // non-volatile accesses.
86209467b48Spatrick
86309467b48Spatrick // We currently give up on any instruction which is ordered, but we do handle
86409467b48Spatrick // atomic instructions which are unordered.
86509467b48Spatrick // TODO: Handle ordered instructions
86609467b48Spatrick auto isOrdered = [](Instruction *Inst) {
86709467b48Spatrick if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
86809467b48Spatrick return !LI->isUnordered();
86909467b48Spatrick } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
87009467b48Spatrick return !SI->isUnordered();
87109467b48Spatrick }
87209467b48Spatrick return false;
87309467b48Spatrick };
87473471bf0Spatrick if (QueryInst->isVolatile() || isOrdered(QueryInst)) {
87509467b48Spatrick Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
87609467b48Spatrick const_cast<Value *>(Loc.Ptr)));
87709467b48Spatrick return;
87809467b48Spatrick }
87909467b48Spatrick const DataLayout &DL = FromBB->getModule()->getDataLayout();
88009467b48Spatrick PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
88109467b48Spatrick
88209467b48Spatrick // This is the set of blocks we've inspected, and the pointer we consider in
88309467b48Spatrick // each block. Because of critical edges, we currently bail out if querying
88409467b48Spatrick // a block with multiple different pointers. This can happen during PHI
88509467b48Spatrick // translation.
88609467b48Spatrick DenseMap<BasicBlock *, Value *> Visited;
88709467b48Spatrick if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
88809467b48Spatrick Result, Visited, true))
88909467b48Spatrick return;
89009467b48Spatrick Result.clear();
89109467b48Spatrick Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
89209467b48Spatrick const_cast<Value *>(Loc.Ptr)));
89309467b48Spatrick }
89409467b48Spatrick
89509467b48Spatrick /// Compute the memdep value for BB with Pointer/PointeeSize using either
89609467b48Spatrick /// cached information in Cache or by doing a lookup (which may use dirty cache
89709467b48Spatrick /// info if available).
89809467b48Spatrick ///
89909467b48Spatrick /// If we do a lookup, add the result to the cache.
getNonLocalInfoForBlock(Instruction * QueryInst,const MemoryLocation & Loc,bool isLoad,BasicBlock * BB,NonLocalDepInfo * Cache,unsigned NumSortedEntries,BatchAAResults & BatchAA)90073471bf0Spatrick MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
90109467b48Spatrick Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
90273471bf0Spatrick BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries,
90373471bf0Spatrick BatchAAResults &BatchAA) {
90409467b48Spatrick
905097a140dSpatrick bool isInvariantLoad = false;
906097a140dSpatrick
907097a140dSpatrick if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
908097a140dSpatrick isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
909097a140dSpatrick
91009467b48Spatrick // Do a binary search to see if we already have an entry for this block in
91109467b48Spatrick // the cache set. If so, find it.
91209467b48Spatrick NonLocalDepInfo::iterator Entry = std::upper_bound(
91309467b48Spatrick Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
91409467b48Spatrick if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
91509467b48Spatrick --Entry;
91609467b48Spatrick
91709467b48Spatrick NonLocalDepEntry *ExistingResult = nullptr;
91809467b48Spatrick if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
91909467b48Spatrick ExistingResult = &*Entry;
92009467b48Spatrick
921097a140dSpatrick // Use cached result for invariant load only if there is no dependency for non
922097a140dSpatrick // invariant load. In this case invariant load can not have any dependency as
923097a140dSpatrick // well.
924097a140dSpatrick if (ExistingResult && isInvariantLoad &&
925097a140dSpatrick !ExistingResult->getResult().isNonFuncLocal())
926097a140dSpatrick ExistingResult = nullptr;
927097a140dSpatrick
92809467b48Spatrick // If we have a cached entry, and it is non-dirty, use it as the value for
92909467b48Spatrick // this dependency.
93009467b48Spatrick if (ExistingResult && !ExistingResult->getResult().isDirty()) {
93109467b48Spatrick ++NumCacheNonLocalPtr;
93209467b48Spatrick return ExistingResult->getResult();
93309467b48Spatrick }
93409467b48Spatrick
93509467b48Spatrick // Otherwise, we have to scan for the value. If we have a dirty cache
93609467b48Spatrick // entry, start scanning from its position, otherwise we scan from the end
93709467b48Spatrick // of the block.
93809467b48Spatrick BasicBlock::iterator ScanPos = BB->end();
93909467b48Spatrick if (ExistingResult && ExistingResult->getResult().getInst()) {
94009467b48Spatrick assert(ExistingResult->getResult().getInst()->getParent() == BB &&
94109467b48Spatrick "Instruction invalidated?");
94209467b48Spatrick ++NumCacheDirtyNonLocalPtr;
94309467b48Spatrick ScanPos = ExistingResult->getResult().getInst()->getIterator();
94409467b48Spatrick
94509467b48Spatrick // Eliminating the dirty entry from 'Cache', so update the reverse info.
94609467b48Spatrick ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
94709467b48Spatrick RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
94809467b48Spatrick } else {
94909467b48Spatrick ++NumUncacheNonLocalPtr;
95009467b48Spatrick }
95109467b48Spatrick
95209467b48Spatrick // Scan the block for the dependency.
95373471bf0Spatrick MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
95473471bf0Spatrick QueryInst, nullptr, BatchAA);
95509467b48Spatrick
956097a140dSpatrick // Don't cache results for invariant load.
957097a140dSpatrick if (isInvariantLoad)
958097a140dSpatrick return Dep;
959097a140dSpatrick
96009467b48Spatrick // If we had a dirty entry for the block, update it. Otherwise, just add
96109467b48Spatrick // a new entry.
96209467b48Spatrick if (ExistingResult)
96309467b48Spatrick ExistingResult->setResult(Dep);
96409467b48Spatrick else
96509467b48Spatrick Cache->push_back(NonLocalDepEntry(BB, Dep));
96609467b48Spatrick
96709467b48Spatrick // If the block has a dependency (i.e. it isn't completely transparent to
96809467b48Spatrick // the value), remember the reverse association because we just added it
96909467b48Spatrick // to Cache!
970*d415bd75Srobert if (!Dep.isLocal())
97109467b48Spatrick return Dep;
97209467b48Spatrick
97309467b48Spatrick // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
97409467b48Spatrick // update MemDep when we remove instructions.
97509467b48Spatrick Instruction *Inst = Dep.getInst();
97609467b48Spatrick assert(Inst && "Didn't depend on anything?");
97709467b48Spatrick ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
97809467b48Spatrick ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
97909467b48Spatrick return Dep;
98009467b48Spatrick }
98109467b48Spatrick
98209467b48Spatrick /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
98309467b48Spatrick /// array that are already properly ordered.
98409467b48Spatrick ///
98509467b48Spatrick /// This is optimized for the case when only a few entries are added.
98609467b48Spatrick static void
SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo & Cache,unsigned NumSortedEntries)98709467b48Spatrick SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
98809467b48Spatrick unsigned NumSortedEntries) {
98909467b48Spatrick switch (Cache.size() - NumSortedEntries) {
99009467b48Spatrick case 0:
99109467b48Spatrick // done, no new entries.
99209467b48Spatrick break;
99309467b48Spatrick case 2: {
99409467b48Spatrick // Two new entries, insert the last one into place.
99509467b48Spatrick NonLocalDepEntry Val = Cache.back();
99609467b48Spatrick Cache.pop_back();
99709467b48Spatrick MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
99809467b48Spatrick std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
99909467b48Spatrick Cache.insert(Entry, Val);
1000*d415bd75Srobert [[fallthrough]];
100109467b48Spatrick }
100209467b48Spatrick case 1:
100309467b48Spatrick // One new entry, Just insert the new value at the appropriate position.
100409467b48Spatrick if (Cache.size() != 1) {
100509467b48Spatrick NonLocalDepEntry Val = Cache.back();
100609467b48Spatrick Cache.pop_back();
100709467b48Spatrick MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
100873471bf0Spatrick llvm::upper_bound(Cache, Val);
100909467b48Spatrick Cache.insert(Entry, Val);
101009467b48Spatrick }
101109467b48Spatrick break;
101209467b48Spatrick default:
101309467b48Spatrick // Added many values, do a full scale sort.
101409467b48Spatrick llvm::sort(Cache);
101509467b48Spatrick break;
101609467b48Spatrick }
101709467b48Spatrick }
101809467b48Spatrick
101909467b48Spatrick /// Perform a dependency query based on pointer/pointeesize starting at the end
102009467b48Spatrick /// of StartBB.
102109467b48Spatrick ///
102209467b48Spatrick /// Add any clobber/def results to the results vector and keep track of which
102309467b48Spatrick /// blocks are visited in 'Visited'.
102409467b48Spatrick ///
102509467b48Spatrick /// This has special behavior for the first block queries (when SkipFirstBlock
102609467b48Spatrick /// is true). In this special case, it ignores the contents of the specified
102709467b48Spatrick /// block and starts returning dependence info for its predecessors.
102809467b48Spatrick ///
102909467b48Spatrick /// This function returns true on success, or false to indicate that it could
103009467b48Spatrick /// not compute dependence information for some reason. This should be treated
103109467b48Spatrick /// as a clobber dependence on the first instruction in the predecessor block.
getNonLocalPointerDepFromBB(Instruction * QueryInst,const PHITransAddr & Pointer,const MemoryLocation & Loc,bool isLoad,BasicBlock * StartBB,SmallVectorImpl<NonLocalDepResult> & Result,DenseMap<BasicBlock *,Value * > & Visited,bool SkipFirstBlock,bool IsIncomplete)103209467b48Spatrick bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
103309467b48Spatrick Instruction *QueryInst, const PHITransAddr &Pointer,
103409467b48Spatrick const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
103509467b48Spatrick SmallVectorImpl<NonLocalDepResult> &Result,
1036097a140dSpatrick DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock,
1037097a140dSpatrick bool IsIncomplete) {
103809467b48Spatrick // Look up the cached info for Pointer.
103909467b48Spatrick ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
104009467b48Spatrick
104109467b48Spatrick // Set up a temporary NLPI value. If the map doesn't yet have an entry for
104209467b48Spatrick // CacheKey, this value will be inserted as the associated value. Otherwise,
104309467b48Spatrick // it'll be ignored, and we'll have to check to see if the cached size and
104409467b48Spatrick // aa tags are consistent with the current query.
104509467b48Spatrick NonLocalPointerInfo InitialNLPI;
104609467b48Spatrick InitialNLPI.Size = Loc.Size;
104709467b48Spatrick InitialNLPI.AATags = Loc.AATags;
104809467b48Spatrick
1049097a140dSpatrick bool isInvariantLoad = false;
1050097a140dSpatrick if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1051097a140dSpatrick isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1052097a140dSpatrick
105309467b48Spatrick // Get the NLPI for CacheKey, inserting one into the map if it doesn't
105409467b48Spatrick // already have one.
105509467b48Spatrick std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
105609467b48Spatrick NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
105709467b48Spatrick NonLocalPointerInfo *CacheInfo = &Pair.first->second;
105809467b48Spatrick
105909467b48Spatrick // If we already have a cache entry for this CacheKey, we may need to do some
106009467b48Spatrick // work to reconcile the cache entry and the current query.
1061097a140dSpatrick // Invariant loads don't participate in caching. Thus no need to reconcile.
1062097a140dSpatrick if (!isInvariantLoad && !Pair.second) {
106309467b48Spatrick if (CacheInfo->Size != Loc.Size) {
106409467b48Spatrick bool ThrowOutEverything;
106509467b48Spatrick if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
106609467b48Spatrick // FIXME: We may be able to do better in the face of results with mixed
106709467b48Spatrick // precision. We don't appear to get them in practice, though, so just
106809467b48Spatrick // be conservative.
106909467b48Spatrick ThrowOutEverything =
107009467b48Spatrick CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
107109467b48Spatrick CacheInfo->Size.getValue() < Loc.Size.getValue();
107209467b48Spatrick } else {
107309467b48Spatrick // For our purposes, unknown size > all others.
107409467b48Spatrick ThrowOutEverything = !Loc.Size.hasValue();
107509467b48Spatrick }
107609467b48Spatrick
107709467b48Spatrick if (ThrowOutEverything) {
107809467b48Spatrick // The query's Size is greater than the cached one. Throw out the
107909467b48Spatrick // cached data and proceed with the query at the greater size.
108009467b48Spatrick CacheInfo->Pair = BBSkipFirstBlockPair();
108109467b48Spatrick CacheInfo->Size = Loc.Size;
108209467b48Spatrick for (auto &Entry : CacheInfo->NonLocalDeps)
108309467b48Spatrick if (Instruction *Inst = Entry.getResult().getInst())
108409467b48Spatrick RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
108509467b48Spatrick CacheInfo->NonLocalDeps.clear();
1086097a140dSpatrick // The cache is cleared (in the above line) so we will have lost
1087097a140dSpatrick // information about blocks we have already visited. We therefore must
1088097a140dSpatrick // assume that the cache information is incomplete.
1089097a140dSpatrick IsIncomplete = true;
109009467b48Spatrick } else {
109109467b48Spatrick // This query's Size is less than the cached one. Conservatively restart
109209467b48Spatrick // the query using the greater size.
109309467b48Spatrick return getNonLocalPointerDepFromBB(
109409467b48Spatrick QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1095097a140dSpatrick StartBB, Result, Visited, SkipFirstBlock, IsIncomplete);
109609467b48Spatrick }
109709467b48Spatrick }
109809467b48Spatrick
109909467b48Spatrick // If the query's AATags are inconsistent with the cached one,
110009467b48Spatrick // conservatively throw out the cached data and restart the query with
110109467b48Spatrick // no tag if needed.
110209467b48Spatrick if (CacheInfo->AATags != Loc.AATags) {
110309467b48Spatrick if (CacheInfo->AATags) {
110409467b48Spatrick CacheInfo->Pair = BBSkipFirstBlockPair();
110509467b48Spatrick CacheInfo->AATags = AAMDNodes();
110609467b48Spatrick for (auto &Entry : CacheInfo->NonLocalDeps)
110709467b48Spatrick if (Instruction *Inst = Entry.getResult().getInst())
110809467b48Spatrick RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
110909467b48Spatrick CacheInfo->NonLocalDeps.clear();
1110097a140dSpatrick // The cache is cleared (in the above line) so we will have lost
1111097a140dSpatrick // information about blocks we have already visited. We therefore must
1112097a140dSpatrick // assume that the cache information is incomplete.
1113097a140dSpatrick IsIncomplete = true;
111409467b48Spatrick }
111509467b48Spatrick if (Loc.AATags)
111609467b48Spatrick return getNonLocalPointerDepFromBB(
111709467b48Spatrick QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1118097a140dSpatrick Visited, SkipFirstBlock, IsIncomplete);
111909467b48Spatrick }
112009467b48Spatrick }
112109467b48Spatrick
112209467b48Spatrick NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
112309467b48Spatrick
112409467b48Spatrick // If we have valid cached information for exactly the block we are
112509467b48Spatrick // investigating, just return it with no recomputation.
1126097a140dSpatrick // Don't use cached information for invariant loads since it is valid for
1127097a140dSpatrick // non-invariant loads only.
1128097a140dSpatrick if (!IsIncomplete && !isInvariantLoad &&
1129097a140dSpatrick CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
113009467b48Spatrick // We have a fully cached result for this query then we can just return the
113109467b48Spatrick // cached results and populate the visited set. However, we have to verify
113209467b48Spatrick // that we don't already have conflicting results for these blocks. Check
113309467b48Spatrick // to ensure that if a block in the results set is in the visited set that
113409467b48Spatrick // it was for the same pointer query.
113509467b48Spatrick if (!Visited.empty()) {
113609467b48Spatrick for (auto &Entry : *Cache) {
113709467b48Spatrick DenseMap<BasicBlock *, Value *>::iterator VI =
113809467b48Spatrick Visited.find(Entry.getBB());
113909467b48Spatrick if (VI == Visited.end() || VI->second == Pointer.getAddr())
114009467b48Spatrick continue;
114109467b48Spatrick
114209467b48Spatrick // We have a pointer mismatch in a block. Just return false, saying
114309467b48Spatrick // that something was clobbered in this result. We could also do a
114409467b48Spatrick // non-fully cached query, but there is little point in doing this.
114509467b48Spatrick return false;
114609467b48Spatrick }
114709467b48Spatrick }
114809467b48Spatrick
114909467b48Spatrick Value *Addr = Pointer.getAddr();
115009467b48Spatrick for (auto &Entry : *Cache) {
115109467b48Spatrick Visited.insert(std::make_pair(Entry.getBB(), Addr));
115209467b48Spatrick if (Entry.getResult().isNonLocal()) {
115309467b48Spatrick continue;
115409467b48Spatrick }
115509467b48Spatrick
115609467b48Spatrick if (DT.isReachableFromEntry(Entry.getBB())) {
115709467b48Spatrick Result.push_back(
115809467b48Spatrick NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
115909467b48Spatrick }
116009467b48Spatrick }
116109467b48Spatrick ++NumCacheCompleteNonLocalPtr;
116209467b48Spatrick return true;
116309467b48Spatrick }
116409467b48Spatrick
116509467b48Spatrick // Otherwise, either this is a new block, a block with an invalid cache
1166097a140dSpatrick // pointer or one that we're about to invalidate by putting more info into
1167097a140dSpatrick // it than its valid cache info. If empty and not explicitly indicated as
1168097a140dSpatrick // incomplete, the result will be valid cache info, otherwise it isn't.
1169097a140dSpatrick //
1170097a140dSpatrick // Invariant loads don't affect cache in any way thus no need to update
1171097a140dSpatrick // CacheInfo as well.
1172097a140dSpatrick if (!isInvariantLoad) {
1173097a140dSpatrick if (!IsIncomplete && Cache->empty())
117409467b48Spatrick CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
117509467b48Spatrick else
117609467b48Spatrick CacheInfo->Pair = BBSkipFirstBlockPair();
1177097a140dSpatrick }
117809467b48Spatrick
117909467b48Spatrick SmallVector<BasicBlock *, 32> Worklist;
118009467b48Spatrick Worklist.push_back(StartBB);
118109467b48Spatrick
118209467b48Spatrick // PredList used inside loop.
118309467b48Spatrick SmallVector<std::pair<BasicBlock *, PHITransAddr>, 16> PredList;
118409467b48Spatrick
118509467b48Spatrick // Keep track of the entries that we know are sorted. Previously cached
118609467b48Spatrick // entries will all be sorted. The entries we add we only sort on demand (we
118709467b48Spatrick // don't insert every element into its sorted position). We know that we
118809467b48Spatrick // won't get any reuse from currently inserted values, because we don't
118909467b48Spatrick // revisit blocks after we insert info for them.
119009467b48Spatrick unsigned NumSortedEntries = Cache->size();
119109467b48Spatrick unsigned WorklistEntries = BlockNumberLimit;
119209467b48Spatrick bool GotWorklistLimit = false;
119309467b48Spatrick LLVM_DEBUG(AssertSorted(*Cache));
119409467b48Spatrick
119573471bf0Spatrick BatchAAResults BatchAA(AA);
119609467b48Spatrick while (!Worklist.empty()) {
119709467b48Spatrick BasicBlock *BB = Worklist.pop_back_val();
119809467b48Spatrick
119909467b48Spatrick // If we do process a large number of blocks it becomes very expensive and
120009467b48Spatrick // likely it isn't worth worrying about
120109467b48Spatrick if (Result.size() > NumResultsLimit) {
120209467b48Spatrick // Sort it now (if needed) so that recursive invocations of
120309467b48Spatrick // getNonLocalPointerDepFromBB and other routines that could reuse the
120409467b48Spatrick // cache value will only see properly sorted cache arrays.
120509467b48Spatrick if (Cache && NumSortedEntries != Cache->size()) {
120609467b48Spatrick SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
120709467b48Spatrick }
120809467b48Spatrick // Since we bail out, the "Cache" set won't contain all of the
120909467b48Spatrick // results for the query. This is ok (we can still use it to accelerate
121009467b48Spatrick // specific block queries) but we can't do the fastpath "return all
121109467b48Spatrick // results from the set". Clear out the indicator for this.
121209467b48Spatrick CacheInfo->Pair = BBSkipFirstBlockPair();
121309467b48Spatrick return false;
121409467b48Spatrick }
121509467b48Spatrick
121609467b48Spatrick // Skip the first block if we have it.
121709467b48Spatrick if (!SkipFirstBlock) {
121809467b48Spatrick // Analyze the dependency of *Pointer in FromBB. See if we already have
121909467b48Spatrick // been here.
122009467b48Spatrick assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
122109467b48Spatrick
122209467b48Spatrick // Get the dependency info for Pointer in BB. If we have cached
122309467b48Spatrick // information, we will use it, otherwise we compute it.
122409467b48Spatrick LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
122573471bf0Spatrick MemDepResult Dep = getNonLocalInfoForBlock(
122673471bf0Spatrick QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA);
122709467b48Spatrick
122809467b48Spatrick // If we got a Def or Clobber, add this to the list of results.
122909467b48Spatrick if (!Dep.isNonLocal()) {
123009467b48Spatrick if (DT.isReachableFromEntry(BB)) {
123109467b48Spatrick Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
123209467b48Spatrick continue;
123309467b48Spatrick }
123409467b48Spatrick }
123509467b48Spatrick }
123609467b48Spatrick
123709467b48Spatrick // If 'Pointer' is an instruction defined in this block, then we need to do
123809467b48Spatrick // phi translation to change it into a value live in the predecessor block.
123909467b48Spatrick // If not, we just add the predecessors to the worklist and scan them with
124009467b48Spatrick // the same Pointer.
124109467b48Spatrick if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
124209467b48Spatrick SkipFirstBlock = false;
124309467b48Spatrick SmallVector<BasicBlock *, 16> NewBlocks;
124409467b48Spatrick for (BasicBlock *Pred : PredCache.get(BB)) {
124509467b48Spatrick // Verify that we haven't looked at this block yet.
124609467b48Spatrick std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
124709467b48Spatrick Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
124809467b48Spatrick if (InsertRes.second) {
124909467b48Spatrick // First time we've looked at *PI.
125009467b48Spatrick NewBlocks.push_back(Pred);
125109467b48Spatrick continue;
125209467b48Spatrick }
125309467b48Spatrick
125409467b48Spatrick // If we have seen this block before, but it was with a different
125509467b48Spatrick // pointer then we have a phi translation failure and we have to treat
125609467b48Spatrick // this as a clobber.
125709467b48Spatrick if (InsertRes.first->second != Pointer.getAddr()) {
125809467b48Spatrick // Make sure to clean up the Visited map before continuing on to
125909467b48Spatrick // PredTranslationFailure.
126009467b48Spatrick for (unsigned i = 0; i < NewBlocks.size(); i++)
126109467b48Spatrick Visited.erase(NewBlocks[i]);
126209467b48Spatrick goto PredTranslationFailure;
126309467b48Spatrick }
126409467b48Spatrick }
126509467b48Spatrick if (NewBlocks.size() > WorklistEntries) {
126609467b48Spatrick // Make sure to clean up the Visited map before continuing on to
126709467b48Spatrick // PredTranslationFailure.
126809467b48Spatrick for (unsigned i = 0; i < NewBlocks.size(); i++)
126909467b48Spatrick Visited.erase(NewBlocks[i]);
127009467b48Spatrick GotWorklistLimit = true;
127109467b48Spatrick goto PredTranslationFailure;
127209467b48Spatrick }
127309467b48Spatrick WorklistEntries -= NewBlocks.size();
127409467b48Spatrick Worklist.append(NewBlocks.begin(), NewBlocks.end());
127509467b48Spatrick continue;
127609467b48Spatrick }
127709467b48Spatrick
127809467b48Spatrick // We do need to do phi translation, if we know ahead of time we can't phi
127909467b48Spatrick // translate this value, don't even try.
128009467b48Spatrick if (!Pointer.IsPotentiallyPHITranslatable())
128109467b48Spatrick goto PredTranslationFailure;
128209467b48Spatrick
128309467b48Spatrick // We may have added values to the cache list before this PHI translation.
128409467b48Spatrick // If so, we haven't done anything to ensure that the cache remains sorted.
128509467b48Spatrick // Sort it now (if needed) so that recursive invocations of
128609467b48Spatrick // getNonLocalPointerDepFromBB and other routines that could reuse the cache
128709467b48Spatrick // value will only see properly sorted cache arrays.
128809467b48Spatrick if (Cache && NumSortedEntries != Cache->size()) {
128909467b48Spatrick SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
129009467b48Spatrick NumSortedEntries = Cache->size();
129109467b48Spatrick }
129209467b48Spatrick Cache = nullptr;
129309467b48Spatrick
129409467b48Spatrick PredList.clear();
129509467b48Spatrick for (BasicBlock *Pred : PredCache.get(BB)) {
129609467b48Spatrick PredList.push_back(std::make_pair(Pred, Pointer));
129709467b48Spatrick
129809467b48Spatrick // Get the PHI translated pointer in this predecessor. This can fail if
129909467b48Spatrick // not translatable, in which case the getAddr() returns null.
130009467b48Spatrick PHITransAddr &PredPointer = PredList.back().second;
130109467b48Spatrick PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
130209467b48Spatrick Value *PredPtrVal = PredPointer.getAddr();
130309467b48Spatrick
130409467b48Spatrick // Check to see if we have already visited this pred block with another
130509467b48Spatrick // pointer. If so, we can't do this lookup. This failure can occur
130609467b48Spatrick // with PHI translation when a critical edge exists and the PHI node in
130709467b48Spatrick // the successor translates to a pointer value different than the
130809467b48Spatrick // pointer the block was first analyzed with.
130909467b48Spatrick std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
131009467b48Spatrick Visited.insert(std::make_pair(Pred, PredPtrVal));
131109467b48Spatrick
131209467b48Spatrick if (!InsertRes.second) {
131309467b48Spatrick // We found the pred; take it off the list of preds to visit.
131409467b48Spatrick PredList.pop_back();
131509467b48Spatrick
131609467b48Spatrick // If the predecessor was visited with PredPtr, then we already did
131709467b48Spatrick // the analysis and can ignore it.
131809467b48Spatrick if (InsertRes.first->second == PredPtrVal)
131909467b48Spatrick continue;
132009467b48Spatrick
132109467b48Spatrick // Otherwise, the block was previously analyzed with a different
132209467b48Spatrick // pointer. We can't represent the result of this case, so we just
132309467b48Spatrick // treat this as a phi translation failure.
132409467b48Spatrick
132509467b48Spatrick // Make sure to clean up the Visited map before continuing on to
132609467b48Spatrick // PredTranslationFailure.
132709467b48Spatrick for (unsigned i = 0, n = PredList.size(); i < n; ++i)
132809467b48Spatrick Visited.erase(PredList[i].first);
132909467b48Spatrick
133009467b48Spatrick goto PredTranslationFailure;
133109467b48Spatrick }
133209467b48Spatrick }
133309467b48Spatrick
133409467b48Spatrick // Actually process results here; this need to be a separate loop to avoid
133509467b48Spatrick // calling getNonLocalPointerDepFromBB for blocks we don't want to return
133609467b48Spatrick // any results for. (getNonLocalPointerDepFromBB will modify our
133709467b48Spatrick // datastructures in ways the code after the PredTranslationFailure label
133809467b48Spatrick // doesn't expect.)
133909467b48Spatrick for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
134009467b48Spatrick BasicBlock *Pred = PredList[i].first;
134109467b48Spatrick PHITransAddr &PredPointer = PredList[i].second;
134209467b48Spatrick Value *PredPtrVal = PredPointer.getAddr();
134309467b48Spatrick
134409467b48Spatrick bool CanTranslate = true;
134509467b48Spatrick // If PHI translation was unable to find an available pointer in this
134609467b48Spatrick // predecessor, then we have to assume that the pointer is clobbered in
134709467b48Spatrick // that predecessor. We can still do PRE of the load, which would insert
134809467b48Spatrick // a computation of the pointer in this predecessor.
134909467b48Spatrick if (!PredPtrVal)
135009467b48Spatrick CanTranslate = false;
135109467b48Spatrick
135209467b48Spatrick // FIXME: it is entirely possible that PHI translating will end up with
135309467b48Spatrick // the same value. Consider PHI translating something like:
135409467b48Spatrick // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
135509467b48Spatrick // to recurse here, pedantically speaking.
135609467b48Spatrick
135709467b48Spatrick // If getNonLocalPointerDepFromBB fails here, that means the cached
135809467b48Spatrick // result conflicted with the Visited list; we have to conservatively
135909467b48Spatrick // assume it is unknown, but this also does not block PRE of the load.
136009467b48Spatrick if (!CanTranslate ||
136109467b48Spatrick !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
136209467b48Spatrick Loc.getWithNewPtr(PredPtrVal), isLoad,
136309467b48Spatrick Pred, Result, Visited)) {
136409467b48Spatrick // Add the entry to the Result list.
136509467b48Spatrick NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
136609467b48Spatrick Result.push_back(Entry);
136709467b48Spatrick
136809467b48Spatrick // Since we had a phi translation failure, the cache for CacheKey won't
136909467b48Spatrick // include all of the entries that we need to immediately satisfy future
137009467b48Spatrick // queries. Mark this in NonLocalPointerDeps by setting the
137109467b48Spatrick // BBSkipFirstBlockPair pointer to null. This requires reuse of the
137209467b48Spatrick // cached value to do more work but not miss the phi trans failure.
137309467b48Spatrick NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
137409467b48Spatrick NLPI.Pair = BBSkipFirstBlockPair();
137509467b48Spatrick continue;
137609467b48Spatrick }
137709467b48Spatrick }
137809467b48Spatrick
137909467b48Spatrick // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
138009467b48Spatrick CacheInfo = &NonLocalPointerDeps[CacheKey];
138109467b48Spatrick Cache = &CacheInfo->NonLocalDeps;
138209467b48Spatrick NumSortedEntries = Cache->size();
138309467b48Spatrick
138409467b48Spatrick // Since we did phi translation, the "Cache" set won't contain all of the
138509467b48Spatrick // results for the query. This is ok (we can still use it to accelerate
138609467b48Spatrick // specific block queries) but we can't do the fastpath "return all
138709467b48Spatrick // results from the set" Clear out the indicator for this.
138809467b48Spatrick CacheInfo->Pair = BBSkipFirstBlockPair();
138909467b48Spatrick SkipFirstBlock = false;
139009467b48Spatrick continue;
139109467b48Spatrick
139209467b48Spatrick PredTranslationFailure:
139309467b48Spatrick // The following code is "failure"; we can't produce a sane translation
139409467b48Spatrick // for the given block. It assumes that we haven't modified any of
139509467b48Spatrick // our datastructures while processing the current block.
139609467b48Spatrick
139709467b48Spatrick if (!Cache) {
139809467b48Spatrick // Refresh the CacheInfo/Cache pointer if it got invalidated.
139909467b48Spatrick CacheInfo = &NonLocalPointerDeps[CacheKey];
140009467b48Spatrick Cache = &CacheInfo->NonLocalDeps;
140109467b48Spatrick NumSortedEntries = Cache->size();
140209467b48Spatrick }
140309467b48Spatrick
140409467b48Spatrick // Since we failed phi translation, the "Cache" set won't contain all of the
140509467b48Spatrick // results for the query. This is ok (we can still use it to accelerate
140609467b48Spatrick // specific block queries) but we can't do the fastpath "return all
140709467b48Spatrick // results from the set". Clear out the indicator for this.
140809467b48Spatrick CacheInfo->Pair = BBSkipFirstBlockPair();
140909467b48Spatrick
141009467b48Spatrick // If *nothing* works, mark the pointer as unknown.
141109467b48Spatrick //
141209467b48Spatrick // If this is the magic first block, return this as a clobber of the whole
141309467b48Spatrick // incoming value. Since we can't phi translate to one of the predecessors,
141409467b48Spatrick // we have to bail out.
141509467b48Spatrick if (SkipFirstBlock)
141609467b48Spatrick return false;
141709467b48Spatrick
1418097a140dSpatrick // Results of invariant loads are not cached thus no need to update cached
1419097a140dSpatrick // information.
1420097a140dSpatrick if (!isInvariantLoad) {
142109467b48Spatrick for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
142209467b48Spatrick if (I.getBB() != BB)
142309467b48Spatrick continue;
142409467b48Spatrick
142509467b48Spatrick assert((GotWorklistLimit || I.getResult().isNonLocal() ||
142609467b48Spatrick !DT.isReachableFromEntry(BB)) &&
142709467b48Spatrick "Should only be here with transparent block");
1428097a140dSpatrick
142909467b48Spatrick I.setResult(MemDepResult::getUnknown());
1430097a140dSpatrick
1431097a140dSpatrick
143209467b48Spatrick break;
143309467b48Spatrick }
1434097a140dSpatrick }
1435097a140dSpatrick (void)GotWorklistLimit;
1436097a140dSpatrick // Go ahead and report unknown dependence.
1437097a140dSpatrick Result.push_back(
1438097a140dSpatrick NonLocalDepResult(BB, MemDepResult::getUnknown(), Pointer.getAddr()));
143909467b48Spatrick }
144009467b48Spatrick
144109467b48Spatrick // Okay, we're done now. If we added new values to the cache, re-sort it.
144209467b48Spatrick SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
144309467b48Spatrick LLVM_DEBUG(AssertSorted(*Cache));
144409467b48Spatrick return true;
144509467b48Spatrick }
144609467b48Spatrick
144709467b48Spatrick /// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
removeCachedNonLocalPointerDependencies(ValueIsLoadPair P)144873471bf0Spatrick void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
144909467b48Spatrick ValueIsLoadPair P) {
145009467b48Spatrick
145109467b48Spatrick // Most of the time this cache is empty.
145209467b48Spatrick if (!NonLocalDefsCache.empty()) {
145309467b48Spatrick auto it = NonLocalDefsCache.find(P.getPointer());
145409467b48Spatrick if (it != NonLocalDefsCache.end()) {
145509467b48Spatrick RemoveFromReverseMap(ReverseNonLocalDefsCache,
145609467b48Spatrick it->second.getResult().getInst(), P.getPointer());
145709467b48Spatrick NonLocalDefsCache.erase(it);
145809467b48Spatrick }
145909467b48Spatrick
146009467b48Spatrick if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
146109467b48Spatrick auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
146209467b48Spatrick if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
146309467b48Spatrick for (const auto *entry : toRemoveIt->second)
146409467b48Spatrick NonLocalDefsCache.erase(entry);
146509467b48Spatrick ReverseNonLocalDefsCache.erase(toRemoveIt);
146609467b48Spatrick }
146709467b48Spatrick }
146809467b48Spatrick }
146909467b48Spatrick
147009467b48Spatrick CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
147109467b48Spatrick if (It == NonLocalPointerDeps.end())
147209467b48Spatrick return;
147309467b48Spatrick
147409467b48Spatrick // Remove all of the entries in the BB->val map. This involves removing
147509467b48Spatrick // instructions from the reverse map.
147609467b48Spatrick NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
147709467b48Spatrick
1478*d415bd75Srobert for (const NonLocalDepEntry &DE : PInfo) {
1479*d415bd75Srobert Instruction *Target = DE.getResult().getInst();
148009467b48Spatrick if (!Target)
148109467b48Spatrick continue; // Ignore non-local dep results.
1482*d415bd75Srobert assert(Target->getParent() == DE.getBB());
148309467b48Spatrick
148409467b48Spatrick // Eliminating the dirty entry from 'Cache', so update the reverse info.
148509467b48Spatrick RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
148609467b48Spatrick }
148709467b48Spatrick
148809467b48Spatrick // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
148909467b48Spatrick NonLocalPointerDeps.erase(It);
149009467b48Spatrick }
149109467b48Spatrick
invalidateCachedPointerInfo(Value * Ptr)149209467b48Spatrick void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) {
149309467b48Spatrick // If Ptr isn't really a pointer, just ignore it.
149409467b48Spatrick if (!Ptr->getType()->isPointerTy())
149509467b48Spatrick return;
149609467b48Spatrick // Flush store info for the pointer.
149773471bf0Spatrick removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
149809467b48Spatrick // Flush load info for the pointer.
149973471bf0Spatrick removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
150009467b48Spatrick }
150109467b48Spatrick
invalidateCachedPredecessors()150209467b48Spatrick void MemoryDependenceResults::invalidateCachedPredecessors() {
150309467b48Spatrick PredCache.clear();
150409467b48Spatrick }
150509467b48Spatrick
removeInstruction(Instruction * RemInst)150609467b48Spatrick void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
150709467b48Spatrick // Walk through the Non-local dependencies, removing this one as the value
150809467b48Spatrick // for any cached queries.
150973471bf0Spatrick NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);
151073471bf0Spatrick if (NLDI != NonLocalDepsMap.end()) {
151109467b48Spatrick NonLocalDepInfo &BlockMap = NLDI->second.first;
151209467b48Spatrick for (auto &Entry : BlockMap)
151309467b48Spatrick if (Instruction *Inst = Entry.getResult().getInst())
151409467b48Spatrick RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
151573471bf0Spatrick NonLocalDepsMap.erase(NLDI);
151609467b48Spatrick }
151709467b48Spatrick
151809467b48Spatrick // If we have a cached local dependence query for this instruction, remove it.
151909467b48Spatrick LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
152009467b48Spatrick if (LocalDepEntry != LocalDeps.end()) {
152109467b48Spatrick // Remove us from DepInst's reverse set now that the local dep info is gone.
152209467b48Spatrick if (Instruction *Inst = LocalDepEntry->second.getInst())
152309467b48Spatrick RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
152409467b48Spatrick
152509467b48Spatrick // Remove this local dependency info.
152609467b48Spatrick LocalDeps.erase(LocalDepEntry);
152709467b48Spatrick }
152809467b48Spatrick
1529097a140dSpatrick // If we have any cached dependencies on this instruction, remove
1530097a140dSpatrick // them.
153109467b48Spatrick
1532097a140dSpatrick // If the instruction is a pointer, remove it from both the load info and the
1533097a140dSpatrick // store info.
153409467b48Spatrick if (RemInst->getType()->isPointerTy()) {
153573471bf0Spatrick removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
153673471bf0Spatrick removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1537097a140dSpatrick } else {
1538097a140dSpatrick // Otherwise, if the instructions is in the map directly, it must be a load.
1539097a140dSpatrick // Remove it.
1540097a140dSpatrick auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1541097a140dSpatrick if (toRemoveIt != NonLocalDefsCache.end()) {
1542097a140dSpatrick assert(isa<LoadInst>(RemInst) &&
1543097a140dSpatrick "only load instructions should be added directly");
1544097a140dSpatrick const Instruction *DepV = toRemoveIt->second.getResult().getInst();
1545097a140dSpatrick ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst);
1546097a140dSpatrick NonLocalDefsCache.erase(toRemoveIt);
1547097a140dSpatrick }
154809467b48Spatrick }
154909467b48Spatrick
155009467b48Spatrick // Loop over all of the things that depend on the instruction we're removing.
155109467b48Spatrick SmallVector<std::pair<Instruction *, Instruction *>, 8> ReverseDepsToAdd;
155209467b48Spatrick
155309467b48Spatrick // If we find RemInst as a clobber or Def in any of the maps for other values,
155409467b48Spatrick // we need to replace its entry with a dirty version of the instruction after
155509467b48Spatrick // it. If RemInst is a terminator, we use a null dirty value.
155609467b48Spatrick //
155709467b48Spatrick // Using a dirty version of the instruction after RemInst saves having to scan
155809467b48Spatrick // the entire block to get to this point.
155909467b48Spatrick MemDepResult NewDirtyVal;
156009467b48Spatrick if (!RemInst->isTerminator())
156109467b48Spatrick NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
156209467b48Spatrick
156309467b48Spatrick ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
156409467b48Spatrick if (ReverseDepIt != ReverseLocalDeps.end()) {
156509467b48Spatrick // RemInst can't be the terminator if it has local stuff depending on it.
156609467b48Spatrick assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
156709467b48Spatrick "Nothing can locally depend on a terminator");
156809467b48Spatrick
156909467b48Spatrick for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
157009467b48Spatrick assert(InstDependingOnRemInst != RemInst &&
157109467b48Spatrick "Already removed our local dep info");
157209467b48Spatrick
157309467b48Spatrick LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
157409467b48Spatrick
157509467b48Spatrick // Make sure to remember that new things depend on NewDepInst.
157609467b48Spatrick assert(NewDirtyVal.getInst() &&
157709467b48Spatrick "There is no way something else can have "
157809467b48Spatrick "a local dep on this if it is a terminator!");
157909467b48Spatrick ReverseDepsToAdd.push_back(
158009467b48Spatrick std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
158109467b48Spatrick }
158209467b48Spatrick
158309467b48Spatrick ReverseLocalDeps.erase(ReverseDepIt);
158409467b48Spatrick
158509467b48Spatrick // Add new reverse deps after scanning the set, to avoid invalidating the
158609467b48Spatrick // 'ReverseDeps' reference.
158709467b48Spatrick while (!ReverseDepsToAdd.empty()) {
158809467b48Spatrick ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
158909467b48Spatrick ReverseDepsToAdd.back().second);
159009467b48Spatrick ReverseDepsToAdd.pop_back();
159109467b48Spatrick }
159209467b48Spatrick }
159309467b48Spatrick
159409467b48Spatrick ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
159509467b48Spatrick if (ReverseDepIt != ReverseNonLocalDeps.end()) {
159609467b48Spatrick for (Instruction *I : ReverseDepIt->second) {
159709467b48Spatrick assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
159809467b48Spatrick
159973471bf0Spatrick PerInstNLInfo &INLD = NonLocalDepsMap[I];
160009467b48Spatrick // The information is now dirty!
160109467b48Spatrick INLD.second = true;
160209467b48Spatrick
160309467b48Spatrick for (auto &Entry : INLD.first) {
160409467b48Spatrick if (Entry.getResult().getInst() != RemInst)
160509467b48Spatrick continue;
160609467b48Spatrick
160709467b48Spatrick // Convert to a dirty entry for the subsequent instruction.
160809467b48Spatrick Entry.setResult(NewDirtyVal);
160909467b48Spatrick
161009467b48Spatrick if (Instruction *NextI = NewDirtyVal.getInst())
161109467b48Spatrick ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
161209467b48Spatrick }
161309467b48Spatrick }
161409467b48Spatrick
161509467b48Spatrick ReverseNonLocalDeps.erase(ReverseDepIt);
161609467b48Spatrick
161709467b48Spatrick // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
161809467b48Spatrick while (!ReverseDepsToAdd.empty()) {
161909467b48Spatrick ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
162009467b48Spatrick ReverseDepsToAdd.back().second);
162109467b48Spatrick ReverseDepsToAdd.pop_back();
162209467b48Spatrick }
162309467b48Spatrick }
162409467b48Spatrick
162509467b48Spatrick // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
162609467b48Spatrick // value in the NonLocalPointerDeps info.
162709467b48Spatrick ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
162809467b48Spatrick ReverseNonLocalPtrDeps.find(RemInst);
162909467b48Spatrick if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
163009467b48Spatrick SmallVector<std::pair<Instruction *, ValueIsLoadPair>, 8>
163109467b48Spatrick ReversePtrDepsToAdd;
163209467b48Spatrick
163309467b48Spatrick for (ValueIsLoadPair P : ReversePtrDepIt->second) {
163409467b48Spatrick assert(P.getPointer() != RemInst &&
163509467b48Spatrick "Already removed NonLocalPointerDeps info for RemInst");
163609467b48Spatrick
163709467b48Spatrick NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
163809467b48Spatrick
163909467b48Spatrick // The cache is not valid for any specific block anymore.
164009467b48Spatrick NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
164109467b48Spatrick
164209467b48Spatrick // Update any entries for RemInst to use the instruction after it.
164309467b48Spatrick for (auto &Entry : NLPDI) {
164409467b48Spatrick if (Entry.getResult().getInst() != RemInst)
164509467b48Spatrick continue;
164609467b48Spatrick
164709467b48Spatrick // Convert to a dirty entry for the subsequent instruction.
164809467b48Spatrick Entry.setResult(NewDirtyVal);
164909467b48Spatrick
165009467b48Spatrick if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
165109467b48Spatrick ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
165209467b48Spatrick }
165309467b48Spatrick
165409467b48Spatrick // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
165509467b48Spatrick // subsequent value may invalidate the sortedness.
165609467b48Spatrick llvm::sort(NLPDI);
165709467b48Spatrick }
165809467b48Spatrick
165909467b48Spatrick ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
166009467b48Spatrick
166109467b48Spatrick while (!ReversePtrDepsToAdd.empty()) {
166209467b48Spatrick ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
166309467b48Spatrick ReversePtrDepsToAdd.back().second);
166409467b48Spatrick ReversePtrDepsToAdd.pop_back();
166509467b48Spatrick }
166609467b48Spatrick }
166709467b48Spatrick
166873471bf0Spatrick assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?");
166909467b48Spatrick LLVM_DEBUG(verifyRemoved(RemInst));
167009467b48Spatrick }
167109467b48Spatrick
167209467b48Spatrick /// Verify that the specified instruction does not occur in our internal data
167309467b48Spatrick /// structures.
167409467b48Spatrick ///
167509467b48Spatrick /// This function verifies by asserting in debug builds.
verifyRemoved(Instruction * D) const167609467b48Spatrick void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
167709467b48Spatrick #ifndef NDEBUG
167809467b48Spatrick for (const auto &DepKV : LocalDeps) {
167909467b48Spatrick assert(DepKV.first != D && "Inst occurs in data structures");
168009467b48Spatrick assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
168109467b48Spatrick }
168209467b48Spatrick
168309467b48Spatrick for (const auto &DepKV : NonLocalPointerDeps) {
168409467b48Spatrick assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
168509467b48Spatrick for (const auto &Entry : DepKV.second.NonLocalDeps)
168609467b48Spatrick assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
168709467b48Spatrick }
168809467b48Spatrick
168973471bf0Spatrick for (const auto &DepKV : NonLocalDepsMap) {
169009467b48Spatrick assert(DepKV.first != D && "Inst occurs in data structures");
169109467b48Spatrick const PerInstNLInfo &INLD = DepKV.second;
169209467b48Spatrick for (const auto &Entry : INLD.first)
169309467b48Spatrick assert(Entry.getResult().getInst() != D &&
169409467b48Spatrick "Inst occurs in data structures");
169509467b48Spatrick }
169609467b48Spatrick
169709467b48Spatrick for (const auto &DepKV : ReverseLocalDeps) {
169809467b48Spatrick assert(DepKV.first != D && "Inst occurs in data structures");
169909467b48Spatrick for (Instruction *Inst : DepKV.second)
170009467b48Spatrick assert(Inst != D && "Inst occurs in data structures");
170109467b48Spatrick }
170209467b48Spatrick
170309467b48Spatrick for (const auto &DepKV : ReverseNonLocalDeps) {
170409467b48Spatrick assert(DepKV.first != D && "Inst occurs in data structures");
170509467b48Spatrick for (Instruction *Inst : DepKV.second)
170609467b48Spatrick assert(Inst != D && "Inst occurs in data structures");
170709467b48Spatrick }
170809467b48Spatrick
170909467b48Spatrick for (const auto &DepKV : ReverseNonLocalPtrDeps) {
171009467b48Spatrick assert(DepKV.first != D && "Inst occurs in rev NLPD map");
171109467b48Spatrick
171209467b48Spatrick for (ValueIsLoadPair P : DepKV.second)
171309467b48Spatrick assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
171409467b48Spatrick "Inst occurs in ReverseNonLocalPtrDeps map");
171509467b48Spatrick }
171609467b48Spatrick #endif
171709467b48Spatrick }
171809467b48Spatrick
171909467b48Spatrick AnalysisKey MemoryDependenceAnalysis::Key;
172009467b48Spatrick
MemoryDependenceAnalysis()172109467b48Spatrick MemoryDependenceAnalysis::MemoryDependenceAnalysis()
172209467b48Spatrick : DefaultBlockScanLimit(BlockScanLimit) {}
172309467b48Spatrick
172409467b48Spatrick MemoryDependenceResults
run(Function & F,FunctionAnalysisManager & AM)172509467b48Spatrick MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
172609467b48Spatrick auto &AA = AM.getResult<AAManager>(F);
172709467b48Spatrick auto &AC = AM.getResult<AssumptionAnalysis>(F);
172809467b48Spatrick auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
172909467b48Spatrick auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1730*d415bd75Srobert return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit);
173109467b48Spatrick }
173209467b48Spatrick
173309467b48Spatrick char MemoryDependenceWrapperPass::ID = 0;
173409467b48Spatrick
173509467b48Spatrick INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep",
173609467b48Spatrick "Memory Dependence Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)173709467b48Spatrick INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
173809467b48Spatrick INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
173909467b48Spatrick INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
174009467b48Spatrick INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
174109467b48Spatrick INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
174209467b48Spatrick "Memory Dependence Analysis", false, true)
174309467b48Spatrick
174409467b48Spatrick MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
174509467b48Spatrick initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry());
174609467b48Spatrick }
174709467b48Spatrick
174809467b48Spatrick MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default;
174909467b48Spatrick
releaseMemory()175009467b48Spatrick void MemoryDependenceWrapperPass::releaseMemory() {
175109467b48Spatrick MemDep.reset();
175209467b48Spatrick }
175309467b48Spatrick
getAnalysisUsage(AnalysisUsage & AU) const175409467b48Spatrick void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
175509467b48Spatrick AU.setPreservesAll();
175609467b48Spatrick AU.addRequired<AssumptionCacheTracker>();
175709467b48Spatrick AU.addRequired<DominatorTreeWrapperPass>();
175809467b48Spatrick AU.addRequiredTransitive<AAResultsWrapperPass>();
175909467b48Spatrick AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
176009467b48Spatrick }
176109467b48Spatrick
invalidate(Function & F,const PreservedAnalyses & PA,FunctionAnalysisManager::Invalidator & Inv)176209467b48Spatrick bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
176309467b48Spatrick FunctionAnalysisManager::Invalidator &Inv) {
176409467b48Spatrick // Check whether our analysis is preserved.
176509467b48Spatrick auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
176609467b48Spatrick if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
176709467b48Spatrick // If not, give up now.
176809467b48Spatrick return true;
176909467b48Spatrick
177009467b48Spatrick // Check whether the analyses we depend on became invalid for any reason.
177109467b48Spatrick if (Inv.invalidate<AAManager>(F, PA) ||
177209467b48Spatrick Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1773*d415bd75Srobert Inv.invalidate<DominatorTreeAnalysis>(F, PA))
177409467b48Spatrick return true;
177509467b48Spatrick
177609467b48Spatrick // Otherwise this analysis result remains valid.
177709467b48Spatrick return false;
177809467b48Spatrick }
177909467b48Spatrick
getDefaultBlockScanLimit() const178009467b48Spatrick unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
178109467b48Spatrick return DefaultBlockScanLimit;
178209467b48Spatrick }
178309467b48Spatrick
runOnFunction(Function & F)178409467b48Spatrick bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
178509467b48Spatrick auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
178609467b48Spatrick auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
178709467b48Spatrick auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
178809467b48Spatrick auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1789*d415bd75Srobert MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit);
179009467b48Spatrick return false;
179109467b48Spatrick }
1792