155d744eeSJay Foad //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===// 255d744eeSJay Foad // 355d744eeSJay Foad // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 455d744eeSJay Foad // See https://llvm.org/LICENSE.txt for license information. 555d744eeSJay Foad // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 655d744eeSJay Foad // 755d744eeSJay Foad //===----------------------------------------------------------------------===// 855d744eeSJay Foad 955d744eeSJay Foad #include "AMDGPUMemoryUtils.h" 1055d744eeSJay Foad #include "AMDGPU.h" 1155d744eeSJay Foad #include "Utils/AMDGPUBaseInfo.h" 1255d744eeSJay Foad #include "llvm/ADT/SetOperations.h" 1355d744eeSJay Foad #include "llvm/ADT/SmallSet.h" 1455d744eeSJay Foad #include "llvm/Analysis/AliasAnalysis.h" 1555d744eeSJay Foad #include "llvm/Analysis/CallGraph.h" 1655d744eeSJay Foad #include "llvm/Analysis/MemorySSA.h" 1755d744eeSJay Foad #include "llvm/IR/DataLayout.h" 1855d744eeSJay Foad #include "llvm/IR/Instructions.h" 1955d744eeSJay Foad #include "llvm/IR/IntrinsicInst.h" 2055d744eeSJay Foad #include "llvm/IR/IntrinsicsAMDGPU.h" 2155d744eeSJay Foad #include "llvm/IR/ReplaceConstant.h" 2255d744eeSJay Foad 2355d744eeSJay Foad #define DEBUG_TYPE "amdgpu-memory-utils" 2455d744eeSJay Foad 2555d744eeSJay Foad using namespace llvm; 2655d744eeSJay Foad 2755d744eeSJay Foad namespace llvm::AMDGPU { 2855d744eeSJay Foad 2955d744eeSJay Foad Align getAlign(const DataLayout &DL, const GlobalVariable *GV) { 3055d744eeSJay Foad return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), 3155d744eeSJay Foad GV->getValueType()); 3255d744eeSJay Foad } 3355d744eeSJay Foad 348c752900SGang Chen TargetExtType *isNamedBarrier(const GlobalVariable &GV) { 358c752900SGang Chen // TODO: Allow arrays and structs, if all members are barriers 368c752900SGang Chen // in the same scope. 378c752900SGang Chen // TODO: Disallow other uses of target("amdgcn.named.barrier") including: 388c752900SGang Chen // - Structs containing barriers in different scope. 398c752900SGang Chen // - Structs containing a mixture of barriers and other data. 408c752900SGang Chen // - Globals in other address spaces. 418c752900SGang Chen // - Allocas. 428c752900SGang Chen Type *Ty = GV.getValueType(); 438c752900SGang Chen while (true) { 448c752900SGang Chen if (auto *TTy = dyn_cast<TargetExtType>(Ty)) 458c752900SGang Chen return TTy->getName() == "amdgcn.named.barrier" ? TTy : nullptr; 468c752900SGang Chen if (auto *STy = dyn_cast<StructType>(Ty)) { 478c752900SGang Chen if (STy->getNumElements() == 0) 488c752900SGang Chen return nullptr; 498c752900SGang Chen Ty = STy->getElementType(0); 508c752900SGang Chen continue; 518c752900SGang Chen } 528c752900SGang Chen return nullptr; 538c752900SGang Chen } 548c752900SGang Chen } 558c752900SGang Chen 5655d744eeSJay Foad bool isDynamicLDS(const GlobalVariable &GV) { 5755d744eeSJay Foad // external zero size addrspace(3) without initializer is dynlds. 5855d744eeSJay Foad const Module *M = GV.getParent(); 5955d744eeSJay Foad const DataLayout &DL = M->getDataLayout(); 6055d744eeSJay Foad if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) 6155d744eeSJay Foad return false; 6255d744eeSJay Foad return DL.getTypeAllocSize(GV.getValueType()) == 0; 6355d744eeSJay Foad } 6455d744eeSJay Foad 6555d744eeSJay Foad bool isLDSVariableToLower(const GlobalVariable &GV) { 6655d744eeSJay Foad if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { 6755d744eeSJay Foad return false; 6855d744eeSJay Foad } 6955d744eeSJay Foad if (isDynamicLDS(GV)) { 7055d744eeSJay Foad return true; 7155d744eeSJay Foad } 7255d744eeSJay Foad if (GV.isConstant()) { 7355d744eeSJay Foad // A constant undef variable can't be written to, and any load is 7455d744eeSJay Foad // undef, so it should be eliminated by the optimizer. It could be 7555d744eeSJay Foad // dropped by the back end if not. This pass skips over it. 7655d744eeSJay Foad return false; 7755d744eeSJay Foad } 7855d744eeSJay Foad if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) { 7955d744eeSJay Foad // Initializers are unimplemented for LDS address space. 8055d744eeSJay Foad // Leave such variables in place for consistent error reporting. 8155d744eeSJay Foad return false; 8255d744eeSJay Foad } 8355d744eeSJay Foad return true; 8455d744eeSJay Foad } 8555d744eeSJay Foad 8655d744eeSJay Foad bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) { 8755d744eeSJay Foad // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS 8855d744eeSJay Foad // global may have uses from multiple different functions as a result. 8955d744eeSJay Foad // This pass specialises LDS variables with respect to the kernel that 9055d744eeSJay Foad // allocates them. 9155d744eeSJay Foad 9255d744eeSJay Foad // This is semantically equivalent to (the unimplemented as slow): 9355d744eeSJay Foad // for (auto &F : M.functions()) 9455d744eeSJay Foad // for (auto &BB : F) 9555d744eeSJay Foad // for (auto &I : BB) 9655d744eeSJay Foad // for (Use &Op : I.operands()) 9755d744eeSJay Foad // if (constantExprUsesLDS(Op)) 9855d744eeSJay Foad // replaceConstantExprInFunction(I, Op); 9955d744eeSJay Foad 10055d744eeSJay Foad SmallVector<Constant *> LDSGlobals; 10155d744eeSJay Foad for (auto &GV : M.globals()) 10255d744eeSJay Foad if (AMDGPU::isLDSVariableToLower(GV)) 10355d744eeSJay Foad LDSGlobals.push_back(&GV); 10455d744eeSJay Foad return convertUsersOfConstantsToInstructions(LDSGlobals); 10555d744eeSJay Foad } 10655d744eeSJay Foad 10755d744eeSJay Foad void getUsesOfLDSByFunction(const CallGraph &CG, Module &M, 10855d744eeSJay Foad FunctionVariableMap &kernels, 10955d744eeSJay Foad FunctionVariableMap &Functions) { 11055d744eeSJay Foad // Get uses from the current function, excluding uses by called Functions 11155d744eeSJay Foad // Two output variables to avoid walking the globals list twice 11255d744eeSJay Foad for (auto &GV : M.globals()) { 11355d744eeSJay Foad if (!AMDGPU::isLDSVariableToLower(GV)) 11455d744eeSJay Foad continue; 11555d744eeSJay Foad for (User *V : GV.users()) { 11655d744eeSJay Foad if (auto *I = dyn_cast<Instruction>(V)) { 11755d744eeSJay Foad Function *F = I->getFunction(); 11855d744eeSJay Foad if (isKernelLDS(F)) 11955d744eeSJay Foad kernels[F].insert(&GV); 12055d744eeSJay Foad else 12155d744eeSJay Foad Functions[F].insert(&GV); 12255d744eeSJay Foad } 12355d744eeSJay Foad } 12455d744eeSJay Foad } 12555d744eeSJay Foad } 12655d744eeSJay Foad 12755d744eeSJay Foad bool isKernelLDS(const Function *F) { 12855d744eeSJay Foad // Some weirdness here. AMDGPU::isKernelCC does not call into 12955d744eeSJay Foad // AMDGPU::isKernel with the calling conv, it instead calls into 13055d744eeSJay Foad // isModuleEntryFunction which returns true for more calling conventions 13155d744eeSJay Foad // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel. 13255d744eeSJay Foad // There's also a test that checks that the LDS lowering does not hit on 13355d744eeSJay Foad // a graphics shader, denoted amdgpu_ps, so stay with the limited case. 13455d744eeSJay Foad // Putting LDS in the name of the function to draw attention to this. 13555d744eeSJay Foad return AMDGPU::isKernel(F->getCallingConv()); 13655d744eeSJay Foad } 13755d744eeSJay Foad 13855d744eeSJay Foad LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) { 13955d744eeSJay Foad 14055d744eeSJay Foad FunctionVariableMap DirectMapKernel; 14155d744eeSJay Foad FunctionVariableMap DirectMapFunction; 14255d744eeSJay Foad getUsesOfLDSByFunction(CG, M, DirectMapKernel, DirectMapFunction); 14355d744eeSJay Foad 144*ff55c9bcSKareem Ergawy // Collect functions whose address has escaped 145*ff55c9bcSKareem Ergawy DenseSet<Function *> AddressTakenFuncs; 14655d744eeSJay Foad for (Function &F : M.functions()) { 14755d744eeSJay Foad if (!isKernelLDS(&F)) 14855d744eeSJay Foad if (F.hasAddressTaken(nullptr, 14955d744eeSJay Foad /* IgnoreCallbackUses */ false, 15055d744eeSJay Foad /* IgnoreAssumeLikeCalls */ false, 15155d744eeSJay Foad /* IgnoreLLVMUsed */ true, 15255d744eeSJay Foad /* IgnoreArcAttachedCall */ false)) { 153*ff55c9bcSKareem Ergawy AddressTakenFuncs.insert(&F); 15455d744eeSJay Foad } 15555d744eeSJay Foad } 15655d744eeSJay Foad 157*ff55c9bcSKareem Ergawy // Collect variables that are used by functions whose address has escaped 158*ff55c9bcSKareem Ergawy DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer; 159*ff55c9bcSKareem Ergawy for (Function *F : AddressTakenFuncs) { 160*ff55c9bcSKareem Ergawy set_union(VariablesReachableThroughFunctionPointer, DirectMapFunction[F]); 161*ff55c9bcSKareem Ergawy } 162*ff55c9bcSKareem Ergawy 16355d744eeSJay Foad auto FunctionMakesUnknownCall = [&](const Function *F) -> bool { 16455d744eeSJay Foad assert(!F->isDeclaration()); 16555d744eeSJay Foad for (const CallGraphNode::CallRecord &R : *CG[F]) { 16655d744eeSJay Foad if (!R.second->getFunction()) 16755d744eeSJay Foad return true; 16855d744eeSJay Foad } 16955d744eeSJay Foad return false; 17055d744eeSJay Foad }; 17155d744eeSJay Foad 17255d744eeSJay Foad // Work out which variables are reachable through function calls 17355d744eeSJay Foad FunctionVariableMap TransitiveMapFunction = DirectMapFunction; 17455d744eeSJay Foad 17555d744eeSJay Foad // If the function makes any unknown call, assume the worst case that it can 17655d744eeSJay Foad // access all variables accessed by functions whose address escaped 17755d744eeSJay Foad for (Function &F : M.functions()) { 17855d744eeSJay Foad if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) { 17955d744eeSJay Foad if (!isKernelLDS(&F)) { 18055d744eeSJay Foad set_union(TransitiveMapFunction[&F], 18155d744eeSJay Foad VariablesReachableThroughFunctionPointer); 18255d744eeSJay Foad } 18355d744eeSJay Foad } 18455d744eeSJay Foad } 18555d744eeSJay Foad 18655d744eeSJay Foad // Direct implementation of collecting all variables reachable from each 18755d744eeSJay Foad // function 18855d744eeSJay Foad for (Function &Func : M.functions()) { 18955d744eeSJay Foad if (Func.isDeclaration() || isKernelLDS(&Func)) 19055d744eeSJay Foad continue; 19155d744eeSJay Foad 19255d744eeSJay Foad DenseSet<Function *> seen; // catches cycles 19355d744eeSJay Foad SmallVector<Function *, 4> wip = {&Func}; 19455d744eeSJay Foad 19555d744eeSJay Foad while (!wip.empty()) { 19655d744eeSJay Foad Function *F = wip.pop_back_val(); 19755d744eeSJay Foad 19855d744eeSJay Foad // Can accelerate this by referring to transitive map for functions that 19955d744eeSJay Foad // have already been computed, with more care than this 20055d744eeSJay Foad set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]); 20155d744eeSJay Foad 20255d744eeSJay Foad for (const CallGraphNode::CallRecord &R : *CG[F]) { 20355d744eeSJay Foad Function *Ith = R.second->getFunction(); 20455d744eeSJay Foad if (Ith) { 20555d744eeSJay Foad if (!seen.contains(Ith)) { 20655d744eeSJay Foad seen.insert(Ith); 20755d744eeSJay Foad wip.push_back(Ith); 20855d744eeSJay Foad } 20955d744eeSJay Foad } 21055d744eeSJay Foad } 21155d744eeSJay Foad } 21255d744eeSJay Foad } 21355d744eeSJay Foad 214*ff55c9bcSKareem Ergawy // Collect variables that are transitively used by functions whose address has 215*ff55c9bcSKareem Ergawy // escaped 216*ff55c9bcSKareem Ergawy for (Function *F : AddressTakenFuncs) { 217*ff55c9bcSKareem Ergawy set_union(VariablesReachableThroughFunctionPointer, 218*ff55c9bcSKareem Ergawy TransitiveMapFunction[F]); 219*ff55c9bcSKareem Ergawy } 220*ff55c9bcSKareem Ergawy 22155d744eeSJay Foad // DirectMapKernel lists which variables are used by the kernel 22255d744eeSJay Foad // find the variables which are used through a function call 22355d744eeSJay Foad FunctionVariableMap IndirectMapKernel; 22455d744eeSJay Foad 22555d744eeSJay Foad for (Function &Func : M.functions()) { 22655d744eeSJay Foad if (Func.isDeclaration() || !isKernelLDS(&Func)) 22755d744eeSJay Foad continue; 22855d744eeSJay Foad 22955d744eeSJay Foad for (const CallGraphNode::CallRecord &R : *CG[&Func]) { 23055d744eeSJay Foad Function *Ith = R.second->getFunction(); 23155d744eeSJay Foad if (Ith) { 23255d744eeSJay Foad set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]); 233*ff55c9bcSKareem Ergawy } 234*ff55c9bcSKareem Ergawy } 235*ff55c9bcSKareem Ergawy 236*ff55c9bcSKareem Ergawy // Check if the kernel encounters unknows calls, wheher directly or 237*ff55c9bcSKareem Ergawy // indirectly. 238*ff55c9bcSKareem Ergawy bool SeesUnknownCalls = [&]() { 239*ff55c9bcSKareem Ergawy SmallVector<Function *> WorkList = {CG[&Func]->getFunction()}; 240*ff55c9bcSKareem Ergawy SmallPtrSet<Function *, 8> Visited; 241*ff55c9bcSKareem Ergawy 242*ff55c9bcSKareem Ergawy while (!WorkList.empty()) { 243*ff55c9bcSKareem Ergawy Function *F = WorkList.pop_back_val(); 244*ff55c9bcSKareem Ergawy 245*ff55c9bcSKareem Ergawy for (const CallGraphNode::CallRecord &CallRecord : *CG[F]) { 246*ff55c9bcSKareem Ergawy if (!CallRecord.second) 247*ff55c9bcSKareem Ergawy continue; 248*ff55c9bcSKareem Ergawy 249*ff55c9bcSKareem Ergawy Function *Callee = CallRecord.second->getFunction(); 250*ff55c9bcSKareem Ergawy if (!Callee) 251*ff55c9bcSKareem Ergawy return true; 252*ff55c9bcSKareem Ergawy 253*ff55c9bcSKareem Ergawy if (Visited.insert(Callee).second) 254*ff55c9bcSKareem Ergawy WorkList.push_back(Callee); 255*ff55c9bcSKareem Ergawy } 256*ff55c9bcSKareem Ergawy } 257*ff55c9bcSKareem Ergawy return false; 258*ff55c9bcSKareem Ergawy }(); 259*ff55c9bcSKareem Ergawy 260*ff55c9bcSKareem Ergawy if (SeesUnknownCalls) { 26155d744eeSJay Foad set_union(IndirectMapKernel[&Func], 26255d744eeSJay Foad VariablesReachableThroughFunctionPointer); 26355d744eeSJay Foad } 26455d744eeSJay Foad } 26555d744eeSJay Foad 26655d744eeSJay Foad // Verify that we fall into one of 2 cases: 26755d744eeSJay Foad // - All variables are either absolute 26855d744eeSJay Foad // or direct mapped dynamic LDS that is not lowered. 26955d744eeSJay Foad // this is a re-run of the pass 27055d744eeSJay Foad // so we don't have anything to do. 27155d744eeSJay Foad // - No variables are absolute. 27255d744eeSJay Foad std::optional<bool> HasAbsoluteGVs; 2738c752900SGang Chen bool HasSpecialGVs = false; 27455d744eeSJay Foad for (auto &Map : {DirectMapKernel, IndirectMapKernel}) { 27555d744eeSJay Foad for (auto &[Fn, GVs] : Map) { 27655d744eeSJay Foad for (auto *GV : GVs) { 27755d744eeSJay Foad bool IsAbsolute = GV->isAbsoluteSymbolRef(); 27855d744eeSJay Foad bool IsDirectMapDynLDSGV = 27955d744eeSJay Foad AMDGPU::isDynamicLDS(*GV) && DirectMapKernel.contains(Fn); 28055d744eeSJay Foad if (IsDirectMapDynLDSGV) 28155d744eeSJay Foad continue; 2828c752900SGang Chen if (isNamedBarrier(*GV)) { 2838c752900SGang Chen HasSpecialGVs = true; 2848c752900SGang Chen continue; 2858c752900SGang Chen } 28655d744eeSJay Foad if (HasAbsoluteGVs.has_value()) { 28755d744eeSJay Foad if (*HasAbsoluteGVs != IsAbsolute) { 28855d744eeSJay Foad report_fatal_error( 28955d744eeSJay Foad "Module cannot mix absolute and non-absolute LDS GVs"); 29055d744eeSJay Foad } 29155d744eeSJay Foad } else 29255d744eeSJay Foad HasAbsoluteGVs = IsAbsolute; 29355d744eeSJay Foad } 29455d744eeSJay Foad } 29555d744eeSJay Foad } 29655d744eeSJay Foad 29755d744eeSJay Foad // If we only had absolute GVs, we have nothing to do, return an empty 29855d744eeSJay Foad // result. 29955d744eeSJay Foad if (HasAbsoluteGVs && *HasAbsoluteGVs) 3008c752900SGang Chen return {FunctionVariableMap(), FunctionVariableMap(), false}; 30155d744eeSJay Foad 3028c752900SGang Chen return {std::move(DirectMapKernel), std::move(IndirectMapKernel), 3038c752900SGang Chen HasSpecialGVs}; 30455d744eeSJay Foad } 30555d744eeSJay Foad 30655d744eeSJay Foad void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, 30755d744eeSJay Foad ArrayRef<StringRef> FnAttrs) { 30855d744eeSJay Foad for (StringRef Attr : FnAttrs) 30955d744eeSJay Foad KernelRoot->removeFnAttr(Attr); 31055d744eeSJay Foad 31155d744eeSJay Foad SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()}; 31255d744eeSJay Foad SmallPtrSet<Function *, 8> Visited; 31355d744eeSJay Foad bool SeenUnknownCall = false; 31455d744eeSJay Foad 31555d744eeSJay Foad while (!WorkList.empty()) { 31655d744eeSJay Foad Function *F = WorkList.pop_back_val(); 31755d744eeSJay Foad 31855d744eeSJay Foad for (auto &CallRecord : *CG[F]) { 31955d744eeSJay Foad if (!CallRecord.second) 32055d744eeSJay Foad continue; 32155d744eeSJay Foad 32255d744eeSJay Foad Function *Callee = CallRecord.second->getFunction(); 32355d744eeSJay Foad if (!Callee) { 32455d744eeSJay Foad if (!SeenUnknownCall) { 32555d744eeSJay Foad SeenUnknownCall = true; 32655d744eeSJay Foad 32755d744eeSJay Foad // If we see any indirect calls, assume nothing about potential 32855d744eeSJay Foad // targets. 32955d744eeSJay Foad // TODO: This could be refined to possible LDS global users. 33055d744eeSJay Foad for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) { 33155d744eeSJay Foad Function *PotentialCallee = 33255d744eeSJay Foad ExternalCallRecord.second->getFunction(); 33355d744eeSJay Foad assert(PotentialCallee); 33455d744eeSJay Foad if (!isKernelLDS(PotentialCallee)) { 33555d744eeSJay Foad for (StringRef Attr : FnAttrs) 33655d744eeSJay Foad PotentialCallee->removeFnAttr(Attr); 33755d744eeSJay Foad } 33855d744eeSJay Foad } 33955d744eeSJay Foad } 34055d744eeSJay Foad } else { 34155d744eeSJay Foad for (StringRef Attr : FnAttrs) 34255d744eeSJay Foad Callee->removeFnAttr(Attr); 34355d744eeSJay Foad if (Visited.insert(Callee).second) 34455d744eeSJay Foad WorkList.push_back(Callee); 34555d744eeSJay Foad } 34655d744eeSJay Foad } 34755d744eeSJay Foad } 34855d744eeSJay Foad } 34955d744eeSJay Foad 35055d744eeSJay Foad bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) { 35155d744eeSJay Foad Instruction *DefInst = Def->getMemoryInst(); 35255d744eeSJay Foad 35355d744eeSJay Foad if (isa<FenceInst>(DefInst)) 35455d744eeSJay Foad return false; 35555d744eeSJay Foad 35655d744eeSJay Foad if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { 35755d744eeSJay Foad switch (II->getIntrinsicID()) { 35855d744eeSJay Foad case Intrinsic::amdgcn_s_barrier: 35955d744eeSJay Foad case Intrinsic::amdgcn_s_barrier_signal: 36055d744eeSJay Foad case Intrinsic::amdgcn_s_barrier_signal_var: 36155d744eeSJay Foad case Intrinsic::amdgcn_s_barrier_signal_isfirst: 36255d744eeSJay Foad case Intrinsic::amdgcn_s_barrier_init: 36355d744eeSJay Foad case Intrinsic::amdgcn_s_barrier_join: 36455d744eeSJay Foad case Intrinsic::amdgcn_s_barrier_wait: 36555d744eeSJay Foad case Intrinsic::amdgcn_s_barrier_leave: 36655d744eeSJay Foad case Intrinsic::amdgcn_s_get_barrier_state: 36755d744eeSJay Foad case Intrinsic::amdgcn_wave_barrier: 36855d744eeSJay Foad case Intrinsic::amdgcn_sched_barrier: 36955d744eeSJay Foad case Intrinsic::amdgcn_sched_group_barrier: 37055d744eeSJay Foad return false; 37155d744eeSJay Foad default: 37255d744eeSJay Foad break; 37355d744eeSJay Foad } 37455d744eeSJay Foad } 37555d744eeSJay Foad 37655d744eeSJay Foad // Ignore atomics not aliasing with the original load, any atomic is a 37755d744eeSJay Foad // universal MemoryDef from MSSA's point of view too, just like a fence. 37855d744eeSJay Foad const auto checkNoAlias = [AA, Ptr](auto I) -> bool { 37955d744eeSJay Foad return I && AA->isNoAlias(I->getPointerOperand(), Ptr); 38055d744eeSJay Foad }; 38155d744eeSJay Foad 38255d744eeSJay Foad if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) || 38355d744eeSJay Foad checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst))) 38455d744eeSJay Foad return false; 38555d744eeSJay Foad 38655d744eeSJay Foad return true; 38755d744eeSJay Foad } 38855d744eeSJay Foad 38955d744eeSJay Foad bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA, 39055d744eeSJay Foad AAResults *AA) { 39155d744eeSJay Foad MemorySSAWalker *Walker = MSSA->getWalker(); 39255d744eeSJay Foad SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)}; 39355d744eeSJay Foad SmallSet<MemoryAccess *, 8> Visited; 39455d744eeSJay Foad MemoryLocation Loc(MemoryLocation::get(Load)); 39555d744eeSJay Foad 39655d744eeSJay Foad LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); 39755d744eeSJay Foad 39855d744eeSJay Foad // Start with a nearest dominating clobbering access, it will be either 39955d744eeSJay Foad // live on entry (nothing to do, load is not clobbered), MemoryDef, or 40055d744eeSJay Foad // MemoryPhi if several MemoryDefs can define this memory state. In that 40155d744eeSJay Foad // case add all Defs to WorkList and continue going up and checking all 40255d744eeSJay Foad // the definitions of this memory location until the root. When all the 40355d744eeSJay Foad // defs are exhausted and came to the entry state we have no clobber. 40455d744eeSJay Foad // Along the scan ignore barriers and fences which are considered clobbers 40555d744eeSJay Foad // by the MemorySSA, but not really writing anything into the memory. 40655d744eeSJay Foad while (!WorkList.empty()) { 40755d744eeSJay Foad MemoryAccess *MA = WorkList.pop_back_val(); 40855d744eeSJay Foad if (!Visited.insert(MA).second) 40955d744eeSJay Foad continue; 41055d744eeSJay Foad 41155d744eeSJay Foad if (MSSA->isLiveOnEntryDef(MA)) 41255d744eeSJay Foad continue; 41355d744eeSJay Foad 41455d744eeSJay Foad if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) { 41555d744eeSJay Foad LLVM_DEBUG(dbgs() << " Def: " << *Def->getMemoryInst() << '\n'); 41655d744eeSJay Foad 41755d744eeSJay Foad if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) { 41855d744eeSJay Foad LLVM_DEBUG(dbgs() << " -> load is clobbered\n"); 41955d744eeSJay Foad return true; 42055d744eeSJay Foad } 42155d744eeSJay Foad 42255d744eeSJay Foad WorkList.push_back( 42355d744eeSJay Foad Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); 42455d744eeSJay Foad continue; 42555d744eeSJay Foad } 42655d744eeSJay Foad 42755d744eeSJay Foad const MemoryPhi *Phi = cast<MemoryPhi>(MA); 42855d744eeSJay Foad for (const auto &Use : Phi->incoming_values()) 42955d744eeSJay Foad WorkList.push_back(cast<MemoryAccess>(&Use)); 43055d744eeSJay Foad } 43155d744eeSJay Foad 43255d744eeSJay Foad LLVM_DEBUG(dbgs() << " -> no clobber\n"); 43355d744eeSJay Foad return false; 43455d744eeSJay Foad } 43555d744eeSJay Foad 43655d744eeSJay Foad } // end namespace llvm::AMDGPU 437