181ad6265SDimitry Andric //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric 981ad6265SDimitry Andric #include "AMDGPUMemoryUtils.h" 1081ad6265SDimitry Andric #include "AMDGPU.h" 1181ad6265SDimitry Andric #include "AMDGPUBaseInfo.h" 12*0fca6ea1SDimitry Andric #include "llvm/ADT/SetOperations.h" 1381ad6265SDimitry Andric #include "llvm/ADT/SmallSet.h" 1481ad6265SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 15*0fca6ea1SDimitry Andric #include "llvm/Analysis/CallGraph.h" 1681ad6265SDimitry Andric #include "llvm/Analysis/MemorySSA.h" 1781ad6265SDimitry Andric #include "llvm/IR/DataLayout.h" 1881ad6265SDimitry Andric #include "llvm/IR/Instructions.h" 1981ad6265SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 2081ad6265SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 21*0fca6ea1SDimitry Andric #include "llvm/IR/Operator.h" 2281ad6265SDimitry Andric #include "llvm/IR/ReplaceConstant.h" 2381ad6265SDimitry Andric 2481ad6265SDimitry Andric #define DEBUG_TYPE "amdgpu-memory-utils" 2581ad6265SDimitry Andric 2681ad6265SDimitry Andric using namespace llvm; 2781ad6265SDimitry Andric 28*0fca6ea1SDimitry Andric namespace llvm::AMDGPU { 2981ad6265SDimitry Andric 30*0fca6ea1SDimitry Andric Align getAlign(const DataLayout &DL, const GlobalVariable *GV) { 3181ad6265SDimitry Andric return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), 3281ad6265SDimitry Andric GV->getValueType()); 3381ad6265SDimitry Andric } 3481ad6265SDimitry Andric 3506c3fb27SDimitry Andric bool isDynamicLDS(const GlobalVariable &GV) { 36*0fca6ea1SDimitry Andric // external zero size addrspace(3) without initializer is dynlds. 3706c3fb27SDimitry Andric const Module *M = GV.getParent(); 3806c3fb27SDimitry Andric const DataLayout &DL = M->getDataLayout(); 39*0fca6ea1SDimitry Andric if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 4081ad6265SDimitry Andric return false; 41*0fca6ea1SDimitry Andric return DL.getTypeAllocSize(GV.getValueType()) == 0; 4281ad6265SDimitry Andric } 4381ad6265SDimitry Andric 44bdd1243dSDimitry Andric bool isLDSVariableToLower(const GlobalVariable &GV) { 4581ad6265SDimitry Andric if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { 46bdd1243dSDimitry Andric return false; 4781ad6265SDimitry Andric } 4806c3fb27SDimitry Andric if (isDynamicLDS(GV)) { 4906c3fb27SDimitry Andric return true; 5081ad6265SDimitry Andric } 5181ad6265SDimitry Andric if (GV.isConstant()) { 5281ad6265SDimitry Andric // A constant undef variable can't be written to, and any load is 5381ad6265SDimitry Andric // undef, so it should be eliminated by the optimizer. It could be 5481ad6265SDimitry Andric // dropped by the back end if not. This pass skips over it. 55bdd1243dSDimitry Andric return false; 56bdd1243dSDimitry Andric } 5706c3fb27SDimitry Andric if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) { 5806c3fb27SDimitry Andric // Initializers are unimplemented for LDS address space. 5906c3fb27SDimitry Andric // Leave such variables in place for consistent error reporting. 6006c3fb27SDimitry Andric return false; 6106c3fb27SDimitry Andric } 62bdd1243dSDimitry Andric return true; 63bdd1243dSDimitry Andric } 64bdd1243dSDimitry Andric 65*0fca6ea1SDimitry Andric bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) { 66*0fca6ea1SDimitry Andric // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS 67*0fca6ea1SDimitry Andric // global may have uses from multiple different functions as a result. 68*0fca6ea1SDimitry Andric // This pass specialises LDS variables with respect to the kernel that 69*0fca6ea1SDimitry Andric // allocates them. 70*0fca6ea1SDimitry Andric 71*0fca6ea1SDimitry Andric // This is semantically equivalent to (the unimplemented as slow): 72*0fca6ea1SDimitry Andric // for (auto &F : M.functions()) 73*0fca6ea1SDimitry Andric // for (auto &BB : F) 74*0fca6ea1SDimitry Andric // for (auto &I : BB) 75*0fca6ea1SDimitry Andric // for (Use &Op : I.operands()) 76*0fca6ea1SDimitry Andric // if (constantExprUsesLDS(Op)) 77*0fca6ea1SDimitry Andric // replaceConstantExprInFunction(I, Op); 78*0fca6ea1SDimitry Andric 79*0fca6ea1SDimitry Andric SmallVector<Constant *> LDSGlobals; 80*0fca6ea1SDimitry Andric for (auto &GV : M.globals()) 81*0fca6ea1SDimitry Andric if (AMDGPU::isLDSVariableToLower(GV)) 82*0fca6ea1SDimitry Andric LDSGlobals.push_back(&GV); 83*0fca6ea1SDimitry Andric return convertUsersOfConstantsToInstructions(LDSGlobals); 84*0fca6ea1SDimitry Andric } 85*0fca6ea1SDimitry Andric 86*0fca6ea1SDimitry Andric void getUsesOfLDSByFunction(const CallGraph &CG, Module &M, 87*0fca6ea1SDimitry Andric FunctionVariableMap &kernels, 88*0fca6ea1SDimitry Andric FunctionVariableMap &Functions) { 89*0fca6ea1SDimitry Andric // Get uses from the current function, excluding uses by called Functions 90*0fca6ea1SDimitry Andric // Two output variables to avoid walking the globals list twice 91*0fca6ea1SDimitry Andric for (auto &GV : M.globals()) { 92*0fca6ea1SDimitry Andric if (!AMDGPU::isLDSVariableToLower(GV)) 93*0fca6ea1SDimitry Andric continue; 94*0fca6ea1SDimitry Andric for (User *V : GV.users()) { 95*0fca6ea1SDimitry Andric if (auto *I = dyn_cast<Instruction>(V)) { 96*0fca6ea1SDimitry Andric Function *F = I->getFunction(); 97*0fca6ea1SDimitry Andric if (isKernelLDS(F)) 98*0fca6ea1SDimitry Andric kernels[F].insert(&GV); 99*0fca6ea1SDimitry Andric else 100*0fca6ea1SDimitry Andric Functions[F].insert(&GV); 101*0fca6ea1SDimitry Andric } 102*0fca6ea1SDimitry Andric } 103*0fca6ea1SDimitry Andric } 104*0fca6ea1SDimitry Andric } 105*0fca6ea1SDimitry Andric 106*0fca6ea1SDimitry Andric bool isKernelLDS(const Function *F) { 107*0fca6ea1SDimitry Andric // Some weirdness here. AMDGPU::isKernelCC does not call into 108*0fca6ea1SDimitry Andric // AMDGPU::isKernel with the calling conv, it instead calls into 109*0fca6ea1SDimitry Andric // isModuleEntryFunction which returns true for more calling conventions 110*0fca6ea1SDimitry Andric // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel. 111*0fca6ea1SDimitry Andric // There's also a test that checks that the LDS lowering does not hit on 112*0fca6ea1SDimitry Andric // a graphics shader, denoted amdgpu_ps, so stay with the limited case. 113*0fca6ea1SDimitry Andric // Putting LDS in the name of the function to draw attention to this. 114*0fca6ea1SDimitry Andric return AMDGPU::isKernel(F->getCallingConv()); 115*0fca6ea1SDimitry Andric } 116*0fca6ea1SDimitry Andric 117*0fca6ea1SDimitry Andric LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) { 118*0fca6ea1SDimitry Andric 119*0fca6ea1SDimitry Andric FunctionVariableMap DirectMapKernel; 120*0fca6ea1SDimitry Andric FunctionVariableMap DirectMapFunction; 121*0fca6ea1SDimitry Andric getUsesOfLDSByFunction(CG, M, DirectMapKernel, DirectMapFunction); 122*0fca6ea1SDimitry Andric 123*0fca6ea1SDimitry Andric // Collect variables that are used by functions whose address has escaped 124*0fca6ea1SDimitry Andric DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer; 125*0fca6ea1SDimitry Andric for (Function &F : M.functions()) { 126*0fca6ea1SDimitry Andric if (!isKernelLDS(&F)) 127*0fca6ea1SDimitry Andric if (F.hasAddressTaken(nullptr, 128*0fca6ea1SDimitry Andric /* IgnoreCallbackUses */ false, 129*0fca6ea1SDimitry Andric /* IgnoreAssumeLikeCalls */ false, 130*0fca6ea1SDimitry Andric /* IgnoreLLVMUsed */ true, 131*0fca6ea1SDimitry Andric /* IgnoreArcAttachedCall */ false)) { 132*0fca6ea1SDimitry Andric set_union(VariablesReachableThroughFunctionPointer, 133*0fca6ea1SDimitry Andric DirectMapFunction[&F]); 134*0fca6ea1SDimitry Andric } 135*0fca6ea1SDimitry Andric } 136*0fca6ea1SDimitry Andric 137*0fca6ea1SDimitry Andric auto FunctionMakesUnknownCall = [&](const Function *F) -> bool { 138*0fca6ea1SDimitry Andric assert(!F->isDeclaration()); 139*0fca6ea1SDimitry Andric for (const CallGraphNode::CallRecord &R : *CG[F]) { 140*0fca6ea1SDimitry Andric if (!R.second->getFunction()) 141*0fca6ea1SDimitry Andric return true; 142*0fca6ea1SDimitry Andric } 143*0fca6ea1SDimitry Andric return false; 144*0fca6ea1SDimitry Andric }; 145*0fca6ea1SDimitry Andric 146*0fca6ea1SDimitry Andric // Work out which variables are reachable through function calls 147*0fca6ea1SDimitry Andric FunctionVariableMap TransitiveMapFunction = DirectMapFunction; 148*0fca6ea1SDimitry Andric 149*0fca6ea1SDimitry Andric // If the function makes any unknown call, assume the worst case that it can 150*0fca6ea1SDimitry Andric // access all variables accessed by functions whose address escaped 151*0fca6ea1SDimitry Andric for (Function &F : M.functions()) { 152*0fca6ea1SDimitry Andric if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) { 153*0fca6ea1SDimitry Andric if (!isKernelLDS(&F)) { 154*0fca6ea1SDimitry Andric set_union(TransitiveMapFunction[&F], 155*0fca6ea1SDimitry Andric VariablesReachableThroughFunctionPointer); 156*0fca6ea1SDimitry Andric } 157*0fca6ea1SDimitry Andric } 158*0fca6ea1SDimitry Andric } 159*0fca6ea1SDimitry Andric 160*0fca6ea1SDimitry Andric // Direct implementation of collecting all variables reachable from each 161*0fca6ea1SDimitry Andric // function 162*0fca6ea1SDimitry Andric for (Function &Func : M.functions()) { 163*0fca6ea1SDimitry Andric if (Func.isDeclaration() || isKernelLDS(&Func)) 164*0fca6ea1SDimitry Andric continue; 165*0fca6ea1SDimitry Andric 166*0fca6ea1SDimitry Andric DenseSet<Function *> seen; // catches cycles 167*0fca6ea1SDimitry Andric SmallVector<Function *, 4> wip = {&Func}; 168*0fca6ea1SDimitry Andric 169*0fca6ea1SDimitry Andric while (!wip.empty()) { 170*0fca6ea1SDimitry Andric Function *F = wip.pop_back_val(); 171*0fca6ea1SDimitry Andric 172*0fca6ea1SDimitry Andric // Can accelerate this by referring to transitive map for functions that 173*0fca6ea1SDimitry Andric // have already been computed, with more care than this 174*0fca6ea1SDimitry Andric set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]); 175*0fca6ea1SDimitry Andric 176*0fca6ea1SDimitry Andric for (const CallGraphNode::CallRecord &R : *CG[F]) { 177*0fca6ea1SDimitry Andric Function *Ith = R.second->getFunction(); 178*0fca6ea1SDimitry Andric if (Ith) { 179*0fca6ea1SDimitry Andric if (!seen.contains(Ith)) { 180*0fca6ea1SDimitry Andric seen.insert(Ith); 181*0fca6ea1SDimitry Andric wip.push_back(Ith); 182*0fca6ea1SDimitry Andric } 183*0fca6ea1SDimitry Andric } 184*0fca6ea1SDimitry Andric } 185*0fca6ea1SDimitry Andric } 186*0fca6ea1SDimitry Andric } 187*0fca6ea1SDimitry Andric 188*0fca6ea1SDimitry Andric // DirectMapKernel lists which variables are used by the kernel 189*0fca6ea1SDimitry Andric // find the variables which are used through a function call 190*0fca6ea1SDimitry Andric FunctionVariableMap IndirectMapKernel; 191*0fca6ea1SDimitry Andric 192*0fca6ea1SDimitry Andric for (Function &Func : M.functions()) { 193*0fca6ea1SDimitry Andric if (Func.isDeclaration() || !isKernelLDS(&Func)) 194*0fca6ea1SDimitry Andric continue; 195*0fca6ea1SDimitry Andric 196*0fca6ea1SDimitry Andric for (const CallGraphNode::CallRecord &R : *CG[&Func]) { 197*0fca6ea1SDimitry Andric Function *Ith = R.second->getFunction(); 198*0fca6ea1SDimitry Andric if (Ith) { 199*0fca6ea1SDimitry Andric set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]); 200*0fca6ea1SDimitry Andric } else { 201*0fca6ea1SDimitry Andric set_union(IndirectMapKernel[&Func], 202*0fca6ea1SDimitry Andric VariablesReachableThroughFunctionPointer); 203*0fca6ea1SDimitry Andric } 204*0fca6ea1SDimitry Andric } 205*0fca6ea1SDimitry Andric } 206*0fca6ea1SDimitry Andric 207*0fca6ea1SDimitry Andric // Verify that we fall into one of 2 cases: 208*0fca6ea1SDimitry Andric // - All variables are either absolute 209*0fca6ea1SDimitry Andric // or direct mapped dynamic LDS that is not lowered. 210*0fca6ea1SDimitry Andric // this is a re-run of the pass 211*0fca6ea1SDimitry Andric // so we don't have anything to do. 212*0fca6ea1SDimitry Andric // - No variables are absolute. 213*0fca6ea1SDimitry Andric std::optional<bool> HasAbsoluteGVs; 214*0fca6ea1SDimitry Andric for (auto &Map : {DirectMapKernel, IndirectMapKernel}) { 215*0fca6ea1SDimitry Andric for (auto &[Fn, GVs] : Map) { 216*0fca6ea1SDimitry Andric for (auto *GV : GVs) { 217*0fca6ea1SDimitry Andric bool IsAbsolute = GV->isAbsoluteSymbolRef(); 218*0fca6ea1SDimitry Andric bool IsDirectMapDynLDSGV = AMDGPU::isDynamicLDS(*GV) && DirectMapKernel.contains(Fn); 219*0fca6ea1SDimitry Andric if (IsDirectMapDynLDSGV) 220*0fca6ea1SDimitry Andric continue; 221*0fca6ea1SDimitry Andric if (HasAbsoluteGVs.has_value()) { 222*0fca6ea1SDimitry Andric if (*HasAbsoluteGVs != IsAbsolute) { 223*0fca6ea1SDimitry Andric report_fatal_error( 224*0fca6ea1SDimitry Andric "Module cannot mix absolute and non-absolute LDS GVs"); 225*0fca6ea1SDimitry Andric } 226*0fca6ea1SDimitry Andric } else 227*0fca6ea1SDimitry Andric HasAbsoluteGVs = IsAbsolute; 228*0fca6ea1SDimitry Andric } 229*0fca6ea1SDimitry Andric } 230*0fca6ea1SDimitry Andric } 231*0fca6ea1SDimitry Andric 232*0fca6ea1SDimitry Andric // If we only had absolute GVs, we have nothing to do, return an empty 233*0fca6ea1SDimitry Andric // result. 234*0fca6ea1SDimitry Andric if (HasAbsoluteGVs && *HasAbsoluteGVs) 235*0fca6ea1SDimitry Andric return {FunctionVariableMap(), FunctionVariableMap()}; 236*0fca6ea1SDimitry Andric 237*0fca6ea1SDimitry Andric return {std::move(DirectMapKernel), std::move(IndirectMapKernel)}; 238*0fca6ea1SDimitry Andric } 239*0fca6ea1SDimitry Andric 240*0fca6ea1SDimitry Andric void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, 241*0fca6ea1SDimitry Andric ArrayRef<StringRef> FnAttrs) { 242*0fca6ea1SDimitry Andric for (StringRef Attr : FnAttrs) 243*0fca6ea1SDimitry Andric KernelRoot->removeFnAttr(Attr); 244*0fca6ea1SDimitry Andric 245*0fca6ea1SDimitry Andric SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()}; 246*0fca6ea1SDimitry Andric SmallPtrSet<Function *, 8> Visited; 247*0fca6ea1SDimitry Andric bool SeenUnknownCall = false; 248*0fca6ea1SDimitry Andric 249*0fca6ea1SDimitry Andric while (!WorkList.empty()) { 250*0fca6ea1SDimitry Andric Function *F = WorkList.pop_back_val(); 251*0fca6ea1SDimitry Andric 252*0fca6ea1SDimitry Andric for (auto &CallRecord : *CG[F]) { 253*0fca6ea1SDimitry Andric if (!CallRecord.second) 254*0fca6ea1SDimitry Andric continue; 255*0fca6ea1SDimitry Andric 256*0fca6ea1SDimitry Andric Function *Callee = CallRecord.second->getFunction(); 257*0fca6ea1SDimitry Andric if (!Callee) { 258*0fca6ea1SDimitry Andric if (!SeenUnknownCall) { 259*0fca6ea1SDimitry Andric SeenUnknownCall = true; 260*0fca6ea1SDimitry Andric 261*0fca6ea1SDimitry Andric // If we see any indirect calls, assume nothing about potential 262*0fca6ea1SDimitry Andric // targets. 263*0fca6ea1SDimitry Andric // TODO: This could be refined to possible LDS global users. 264*0fca6ea1SDimitry Andric for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) { 265*0fca6ea1SDimitry Andric Function *PotentialCallee = 266*0fca6ea1SDimitry Andric ExternalCallRecord.second->getFunction(); 267*0fca6ea1SDimitry Andric assert(PotentialCallee); 268*0fca6ea1SDimitry Andric if (!isKernelLDS(PotentialCallee)) { 269*0fca6ea1SDimitry Andric for (StringRef Attr : FnAttrs) 270*0fca6ea1SDimitry Andric PotentialCallee->removeFnAttr(Attr); 271*0fca6ea1SDimitry Andric } 272*0fca6ea1SDimitry Andric } 273*0fca6ea1SDimitry Andric } 274*0fca6ea1SDimitry Andric } else { 275*0fca6ea1SDimitry Andric for (StringRef Attr : FnAttrs) 276*0fca6ea1SDimitry Andric Callee->removeFnAttr(Attr); 277*0fca6ea1SDimitry Andric if (Visited.insert(Callee).second) 278*0fca6ea1SDimitry Andric WorkList.push_back(Callee); 279*0fca6ea1SDimitry Andric } 280*0fca6ea1SDimitry Andric } 281*0fca6ea1SDimitry Andric } 282*0fca6ea1SDimitry Andric } 283*0fca6ea1SDimitry Andric 28481ad6265SDimitry Andric bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) { 28581ad6265SDimitry Andric Instruction *DefInst = Def->getMemoryInst(); 28681ad6265SDimitry Andric 28781ad6265SDimitry Andric if (isa<FenceInst>(DefInst)) 28881ad6265SDimitry Andric return false; 28981ad6265SDimitry Andric 29081ad6265SDimitry Andric if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 29181ad6265SDimitry Andric switch (II->getIntrinsicID()) { 29281ad6265SDimitry Andric case Intrinsic::amdgcn_s_barrier: 2935f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_signal: 2945f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_signal_var: 2955f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_signal_isfirst: 2965f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_signal_isfirst_var: 2975f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_init: 2985f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_join: 2995f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_wait: 3005f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_leave: 3015f757f3fSDimitry Andric case Intrinsic::amdgcn_s_get_barrier_state: 3025f757f3fSDimitry Andric case Intrinsic::amdgcn_s_wakeup_barrier: 30381ad6265SDimitry Andric case Intrinsic::amdgcn_wave_barrier: 30481ad6265SDimitry Andric case Intrinsic::amdgcn_sched_barrier: 305bdd1243dSDimitry Andric case Intrinsic::amdgcn_sched_group_barrier: 30681ad6265SDimitry Andric return false; 30781ad6265SDimitry Andric default: 30881ad6265SDimitry Andric break; 30981ad6265SDimitry Andric } 31081ad6265SDimitry Andric } 31181ad6265SDimitry Andric 31281ad6265SDimitry Andric // Ignore atomics not aliasing with the original load, any atomic is a 31381ad6265SDimitry Andric // universal MemoryDef from MSSA's point of view too, just like a fence. 31481ad6265SDimitry Andric const auto checkNoAlias = [AA, Ptr](auto I) -> bool { 31581ad6265SDimitry Andric return I && AA->isNoAlias(I->getPointerOperand(), Ptr); 31681ad6265SDimitry Andric }; 31781ad6265SDimitry Andric 31881ad6265SDimitry Andric if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) || 31981ad6265SDimitry Andric checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst))) 32081ad6265SDimitry Andric return false; 32181ad6265SDimitry Andric 32281ad6265SDimitry Andric return true; 32381ad6265SDimitry Andric } 32481ad6265SDimitry Andric 32581ad6265SDimitry Andric bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, 32681ad6265SDimitry Andric AAResults *AA) { 32781ad6265SDimitry Andric MemorySSAWalker *Walker = MSSA->getWalker(); 32881ad6265SDimitry Andric SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)}; 32981ad6265SDimitry Andric SmallSet<MemoryAccess *, 8> Visited; 33081ad6265SDimitry Andric MemoryLocation Loc(MemoryLocation::get(Load)); 33181ad6265SDimitry Andric 33281ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); 33381ad6265SDimitry Andric 33481ad6265SDimitry Andric // Start with a nearest dominating clobbering access, it will be either 33581ad6265SDimitry Andric // live on entry (nothing to do, load is not clobbered), MemoryDef, or 33681ad6265SDimitry Andric // MemoryPhi if several MemoryDefs can define this memory state. In that 33781ad6265SDimitry Andric // case add all Defs to WorkList and continue going up and checking all 33881ad6265SDimitry Andric // the definitions of this memory location until the root. When all the 33981ad6265SDimitry Andric // defs are exhausted and came to the entry state we have no clobber. 34081ad6265SDimitry Andric // Along the scan ignore barriers and fences which are considered clobbers 34181ad6265SDimitry Andric // by the MemorySSA, but not really writing anything into the memory. 34281ad6265SDimitry Andric while (!WorkList.empty()) { 34381ad6265SDimitry Andric MemoryAccess *MA = WorkList.pop_back_val(); 34481ad6265SDimitry Andric if (!Visited.insert(MA).second) 34581ad6265SDimitry Andric continue; 34681ad6265SDimitry Andric 34781ad6265SDimitry Andric if (MSSA->isLiveOnEntryDef(MA)) 34881ad6265SDimitry Andric continue; 34981ad6265SDimitry Andric 35081ad6265SDimitry Andric if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) { 35181ad6265SDimitry Andric LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n'); 35281ad6265SDimitry Andric 35381ad6265SDimitry Andric if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) { 35481ad6265SDimitry Andric LLVM_DEBUG(dbgs() << " -> load is clobbered\n"); 35581ad6265SDimitry Andric return true; 35681ad6265SDimitry Andric } 35781ad6265SDimitry Andric 35881ad6265SDimitry Andric WorkList.push_back( 35981ad6265SDimitry Andric Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); 36081ad6265SDimitry Andric continue; 36181ad6265SDimitry Andric } 36281ad6265SDimitry Andric 36381ad6265SDimitry Andric const MemoryPhi *Phi = cast<MemoryPhi>(MA); 364bdd1243dSDimitry Andric for (const auto &Use : Phi->incoming_values()) 36581ad6265SDimitry Andric WorkList.push_back(cast<MemoryAccess>(&Use)); 36681ad6265SDimitry Andric } 36781ad6265SDimitry Andric 36881ad6265SDimitry Andric LLVM_DEBUG(dbgs() << " -> no clobber\n"); 36981ad6265SDimitry Andric return false; 37081ad6265SDimitry Andric } 37181ad6265SDimitry Andric 372*0fca6ea1SDimitry Andric } // end namespace llvm::AMDGPU 373