xref: /llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp (revision ff55c9bc63ddd1bbe13376c25ae1fc327e3d5da2)
155d744eeSJay Foad //===-- AMDGPUMemoryUtils.cpp - -------------------------------------------===//
255d744eeSJay Foad //
355d744eeSJay Foad // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
455d744eeSJay Foad // See https://llvm.org/LICENSE.txt for license information.
555d744eeSJay Foad // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
655d744eeSJay Foad //
755d744eeSJay Foad //===----------------------------------------------------------------------===//
855d744eeSJay Foad 
955d744eeSJay Foad #include "AMDGPUMemoryUtils.h"
1055d744eeSJay Foad #include "AMDGPU.h"
1155d744eeSJay Foad #include "Utils/AMDGPUBaseInfo.h"
1255d744eeSJay Foad #include "llvm/ADT/SetOperations.h"
1355d744eeSJay Foad #include "llvm/ADT/SmallSet.h"
1455d744eeSJay Foad #include "llvm/Analysis/AliasAnalysis.h"
1555d744eeSJay Foad #include "llvm/Analysis/CallGraph.h"
1655d744eeSJay Foad #include "llvm/Analysis/MemorySSA.h"
1755d744eeSJay Foad #include "llvm/IR/DataLayout.h"
1855d744eeSJay Foad #include "llvm/IR/Instructions.h"
1955d744eeSJay Foad #include "llvm/IR/IntrinsicInst.h"
2055d744eeSJay Foad #include "llvm/IR/IntrinsicsAMDGPU.h"
2155d744eeSJay Foad #include "llvm/IR/ReplaceConstant.h"
2255d744eeSJay Foad 
2355d744eeSJay Foad #define DEBUG_TYPE "amdgpu-memory-utils"
2455d744eeSJay Foad 
2555d744eeSJay Foad using namespace llvm;
2655d744eeSJay Foad 
2755d744eeSJay Foad namespace llvm::AMDGPU {
2855d744eeSJay Foad 
2955d744eeSJay Foad Align getAlign(const DataLayout &DL, const GlobalVariable *GV) {
3055d744eeSJay Foad   return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL),
3155d744eeSJay Foad                                        GV->getValueType());
3255d744eeSJay Foad }
3355d744eeSJay Foad 
348c752900SGang Chen TargetExtType *isNamedBarrier(const GlobalVariable &GV) {
358c752900SGang Chen   // TODO: Allow arrays and structs, if all members are barriers
368c752900SGang Chen   // in the same scope.
378c752900SGang Chen   // TODO: Disallow other uses of target("amdgcn.named.barrier") including:
388c752900SGang Chen   // - Structs containing barriers in different scope.
398c752900SGang Chen   // - Structs containing a mixture of barriers and other data.
408c752900SGang Chen   // - Globals in other address spaces.
418c752900SGang Chen   // - Allocas.
428c752900SGang Chen   Type *Ty = GV.getValueType();
438c752900SGang Chen   while (true) {
448c752900SGang Chen     if (auto *TTy = dyn_cast<TargetExtType>(Ty))
458c752900SGang Chen       return TTy->getName() == "amdgcn.named.barrier" ? TTy : nullptr;
468c752900SGang Chen     if (auto *STy = dyn_cast<StructType>(Ty)) {
478c752900SGang Chen       if (STy->getNumElements() == 0)
488c752900SGang Chen         return nullptr;
498c752900SGang Chen       Ty = STy->getElementType(0);
508c752900SGang Chen       continue;
518c752900SGang Chen     }
528c752900SGang Chen     return nullptr;
538c752900SGang Chen   }
548c752900SGang Chen }
558c752900SGang Chen 
5655d744eeSJay Foad bool isDynamicLDS(const GlobalVariable &GV) {
5755d744eeSJay Foad   // external zero size addrspace(3) without initializer is dynlds.
5855d744eeSJay Foad   const Module *M = GV.getParent();
5955d744eeSJay Foad   const DataLayout &DL = M->getDataLayout();
6055d744eeSJay Foad   if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
6155d744eeSJay Foad     return false;
6255d744eeSJay Foad   return DL.getTypeAllocSize(GV.getValueType()) == 0;
6355d744eeSJay Foad }
6455d744eeSJay Foad 
6555d744eeSJay Foad bool isLDSVariableToLower(const GlobalVariable &GV) {
6655d744eeSJay Foad   if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
6755d744eeSJay Foad     return false;
6855d744eeSJay Foad   }
6955d744eeSJay Foad   if (isDynamicLDS(GV)) {
7055d744eeSJay Foad     return true;
7155d744eeSJay Foad   }
7255d744eeSJay Foad   if (GV.isConstant()) {
7355d744eeSJay Foad     // A constant undef variable can't be written to, and any load is
7455d744eeSJay Foad     // undef, so it should be eliminated by the optimizer. It could be
7555d744eeSJay Foad     // dropped by the back end if not. This pass skips over it.
7655d744eeSJay Foad     return false;
7755d744eeSJay Foad   }
7855d744eeSJay Foad   if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) {
7955d744eeSJay Foad     // Initializers are unimplemented for LDS address space.
8055d744eeSJay Foad     // Leave such variables in place for consistent error reporting.
8155d744eeSJay Foad     return false;
8255d744eeSJay Foad   }
8355d744eeSJay Foad   return true;
8455d744eeSJay Foad }
8555d744eeSJay Foad 
8655d744eeSJay Foad bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
8755d744eeSJay Foad   // Constants are uniqued within LLVM. A ConstantExpr referring to a LDS
8855d744eeSJay Foad   // global may have uses from multiple different functions as a result.
8955d744eeSJay Foad   // This pass specialises LDS variables with respect to the kernel that
9055d744eeSJay Foad   // allocates them.
9155d744eeSJay Foad 
9255d744eeSJay Foad   // This is semantically equivalent to (the unimplemented as slow):
9355d744eeSJay Foad   // for (auto &F : M.functions())
9455d744eeSJay Foad   //   for (auto &BB : F)
9555d744eeSJay Foad   //     for (auto &I : BB)
9655d744eeSJay Foad   //       for (Use &Op : I.operands())
9755d744eeSJay Foad   //         if (constantExprUsesLDS(Op))
9855d744eeSJay Foad   //           replaceConstantExprInFunction(I, Op);
9955d744eeSJay Foad 
10055d744eeSJay Foad   SmallVector<Constant *> LDSGlobals;
10155d744eeSJay Foad   for (auto &GV : M.globals())
10255d744eeSJay Foad     if (AMDGPU::isLDSVariableToLower(GV))
10355d744eeSJay Foad       LDSGlobals.push_back(&GV);
10455d744eeSJay Foad   return convertUsersOfConstantsToInstructions(LDSGlobals);
10555d744eeSJay Foad }
10655d744eeSJay Foad 
10755d744eeSJay Foad void getUsesOfLDSByFunction(const CallGraph &CG, Module &M,
10855d744eeSJay Foad                             FunctionVariableMap &kernels,
10955d744eeSJay Foad                             FunctionVariableMap &Functions) {
11055d744eeSJay Foad   // Get uses from the current function, excluding uses by called Functions
11155d744eeSJay Foad   // Two output variables to avoid walking the globals list twice
11255d744eeSJay Foad   for (auto &GV : M.globals()) {
11355d744eeSJay Foad     if (!AMDGPU::isLDSVariableToLower(GV))
11455d744eeSJay Foad       continue;
11555d744eeSJay Foad     for (User *V : GV.users()) {
11655d744eeSJay Foad       if (auto *I = dyn_cast<Instruction>(V)) {
11755d744eeSJay Foad         Function *F = I->getFunction();
11855d744eeSJay Foad         if (isKernelLDS(F))
11955d744eeSJay Foad           kernels[F].insert(&GV);
12055d744eeSJay Foad         else
12155d744eeSJay Foad           Functions[F].insert(&GV);
12255d744eeSJay Foad       }
12355d744eeSJay Foad     }
12455d744eeSJay Foad   }
12555d744eeSJay Foad }
12655d744eeSJay Foad 
12755d744eeSJay Foad bool isKernelLDS(const Function *F) {
12855d744eeSJay Foad   // Some weirdness here. AMDGPU::isKernelCC does not call into
12955d744eeSJay Foad   // AMDGPU::isKernel with the calling conv, it instead calls into
13055d744eeSJay Foad   // isModuleEntryFunction which returns true for more calling conventions
13155d744eeSJay Foad   // than AMDGPU::isKernel does. There's a FIXME on AMDGPU::isKernel.
13255d744eeSJay Foad   // There's also a test that checks that the LDS lowering does not hit on
13355d744eeSJay Foad   // a graphics shader, denoted amdgpu_ps, so stay with the limited case.
13455d744eeSJay Foad   // Putting LDS in the name of the function to draw attention to this.
13555d744eeSJay Foad   return AMDGPU::isKernel(F->getCallingConv());
13655d744eeSJay Foad }
13755d744eeSJay Foad 
13855d744eeSJay Foad LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M) {
13955d744eeSJay Foad 
14055d744eeSJay Foad   FunctionVariableMap DirectMapKernel;
14155d744eeSJay Foad   FunctionVariableMap DirectMapFunction;
14255d744eeSJay Foad   getUsesOfLDSByFunction(CG, M, DirectMapKernel, DirectMapFunction);
14355d744eeSJay Foad 
144*ff55c9bcSKareem Ergawy   // Collect functions whose address has escaped
145*ff55c9bcSKareem Ergawy   DenseSet<Function *> AddressTakenFuncs;
14655d744eeSJay Foad   for (Function &F : M.functions()) {
14755d744eeSJay Foad     if (!isKernelLDS(&F))
14855d744eeSJay Foad       if (F.hasAddressTaken(nullptr,
14955d744eeSJay Foad                             /* IgnoreCallbackUses */ false,
15055d744eeSJay Foad                             /* IgnoreAssumeLikeCalls */ false,
15155d744eeSJay Foad                             /* IgnoreLLVMUsed */ true,
15255d744eeSJay Foad                             /* IgnoreArcAttachedCall */ false)) {
153*ff55c9bcSKareem Ergawy         AddressTakenFuncs.insert(&F);
15455d744eeSJay Foad       }
15555d744eeSJay Foad   }
15655d744eeSJay Foad 
157*ff55c9bcSKareem Ergawy   // Collect variables that are used by functions whose address has escaped
158*ff55c9bcSKareem Ergawy   DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
159*ff55c9bcSKareem Ergawy   for (Function *F : AddressTakenFuncs) {
160*ff55c9bcSKareem Ergawy     set_union(VariablesReachableThroughFunctionPointer, DirectMapFunction[F]);
161*ff55c9bcSKareem Ergawy   }
162*ff55c9bcSKareem Ergawy 
16355d744eeSJay Foad   auto FunctionMakesUnknownCall = [&](const Function *F) -> bool {
16455d744eeSJay Foad     assert(!F->isDeclaration());
16555d744eeSJay Foad     for (const CallGraphNode::CallRecord &R : *CG[F]) {
16655d744eeSJay Foad       if (!R.second->getFunction())
16755d744eeSJay Foad         return true;
16855d744eeSJay Foad     }
16955d744eeSJay Foad     return false;
17055d744eeSJay Foad   };
17155d744eeSJay Foad 
17255d744eeSJay Foad   // Work out which variables are reachable through function calls
17355d744eeSJay Foad   FunctionVariableMap TransitiveMapFunction = DirectMapFunction;
17455d744eeSJay Foad 
17555d744eeSJay Foad   // If the function makes any unknown call, assume the worst case that it can
17655d744eeSJay Foad   // access all variables accessed by functions whose address escaped
17755d744eeSJay Foad   for (Function &F : M.functions()) {
17855d744eeSJay Foad     if (!F.isDeclaration() && FunctionMakesUnknownCall(&F)) {
17955d744eeSJay Foad       if (!isKernelLDS(&F)) {
18055d744eeSJay Foad         set_union(TransitiveMapFunction[&F],
18155d744eeSJay Foad                   VariablesReachableThroughFunctionPointer);
18255d744eeSJay Foad       }
18355d744eeSJay Foad     }
18455d744eeSJay Foad   }
18555d744eeSJay Foad 
18655d744eeSJay Foad   // Direct implementation of collecting all variables reachable from each
18755d744eeSJay Foad   // function
18855d744eeSJay Foad   for (Function &Func : M.functions()) {
18955d744eeSJay Foad     if (Func.isDeclaration() || isKernelLDS(&Func))
19055d744eeSJay Foad       continue;
19155d744eeSJay Foad 
19255d744eeSJay Foad     DenseSet<Function *> seen; // catches cycles
19355d744eeSJay Foad     SmallVector<Function *, 4> wip = {&Func};
19455d744eeSJay Foad 
19555d744eeSJay Foad     while (!wip.empty()) {
19655d744eeSJay Foad       Function *F = wip.pop_back_val();
19755d744eeSJay Foad 
19855d744eeSJay Foad       // Can accelerate this by referring to transitive map for functions that
19955d744eeSJay Foad       // have already been computed, with more care than this
20055d744eeSJay Foad       set_union(TransitiveMapFunction[&Func], DirectMapFunction[F]);
20155d744eeSJay Foad 
20255d744eeSJay Foad       for (const CallGraphNode::CallRecord &R : *CG[F]) {
20355d744eeSJay Foad         Function *Ith = R.second->getFunction();
20455d744eeSJay Foad         if (Ith) {
20555d744eeSJay Foad           if (!seen.contains(Ith)) {
20655d744eeSJay Foad             seen.insert(Ith);
20755d744eeSJay Foad             wip.push_back(Ith);
20855d744eeSJay Foad           }
20955d744eeSJay Foad         }
21055d744eeSJay Foad       }
21155d744eeSJay Foad     }
21255d744eeSJay Foad   }
21355d744eeSJay Foad 
214*ff55c9bcSKareem Ergawy   // Collect variables that are transitively used by functions whose address has
215*ff55c9bcSKareem Ergawy   // escaped
216*ff55c9bcSKareem Ergawy   for (Function *F : AddressTakenFuncs) {
217*ff55c9bcSKareem Ergawy     set_union(VariablesReachableThroughFunctionPointer,
218*ff55c9bcSKareem Ergawy               TransitiveMapFunction[F]);
219*ff55c9bcSKareem Ergawy   }
220*ff55c9bcSKareem Ergawy 
22155d744eeSJay Foad   // DirectMapKernel lists which variables are used by the kernel
22255d744eeSJay Foad   // find the variables which are used through a function call
22355d744eeSJay Foad   FunctionVariableMap IndirectMapKernel;
22455d744eeSJay Foad 
22555d744eeSJay Foad   for (Function &Func : M.functions()) {
22655d744eeSJay Foad     if (Func.isDeclaration() || !isKernelLDS(&Func))
22755d744eeSJay Foad       continue;
22855d744eeSJay Foad 
22955d744eeSJay Foad     for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
23055d744eeSJay Foad       Function *Ith = R.second->getFunction();
23155d744eeSJay Foad       if (Ith) {
23255d744eeSJay Foad         set_union(IndirectMapKernel[&Func], TransitiveMapFunction[Ith]);
233*ff55c9bcSKareem Ergawy       }
234*ff55c9bcSKareem Ergawy     }
235*ff55c9bcSKareem Ergawy 
236*ff55c9bcSKareem Ergawy     // Check if the kernel encounters unknows calls, wheher directly or
237*ff55c9bcSKareem Ergawy     // indirectly.
238*ff55c9bcSKareem Ergawy     bool SeesUnknownCalls = [&]() {
239*ff55c9bcSKareem Ergawy       SmallVector<Function *> WorkList = {CG[&Func]->getFunction()};
240*ff55c9bcSKareem Ergawy       SmallPtrSet<Function *, 8> Visited;
241*ff55c9bcSKareem Ergawy 
242*ff55c9bcSKareem Ergawy       while (!WorkList.empty()) {
243*ff55c9bcSKareem Ergawy         Function *F = WorkList.pop_back_val();
244*ff55c9bcSKareem Ergawy 
245*ff55c9bcSKareem Ergawy         for (const CallGraphNode::CallRecord &CallRecord : *CG[F]) {
246*ff55c9bcSKareem Ergawy           if (!CallRecord.second)
247*ff55c9bcSKareem Ergawy             continue;
248*ff55c9bcSKareem Ergawy 
249*ff55c9bcSKareem Ergawy           Function *Callee = CallRecord.second->getFunction();
250*ff55c9bcSKareem Ergawy           if (!Callee)
251*ff55c9bcSKareem Ergawy             return true;
252*ff55c9bcSKareem Ergawy 
253*ff55c9bcSKareem Ergawy           if (Visited.insert(Callee).second)
254*ff55c9bcSKareem Ergawy             WorkList.push_back(Callee);
255*ff55c9bcSKareem Ergawy         }
256*ff55c9bcSKareem Ergawy       }
257*ff55c9bcSKareem Ergawy       return false;
258*ff55c9bcSKareem Ergawy     }();
259*ff55c9bcSKareem Ergawy 
260*ff55c9bcSKareem Ergawy     if (SeesUnknownCalls) {
26155d744eeSJay Foad       set_union(IndirectMapKernel[&Func],
26255d744eeSJay Foad                 VariablesReachableThroughFunctionPointer);
26355d744eeSJay Foad     }
26455d744eeSJay Foad   }
26555d744eeSJay Foad 
26655d744eeSJay Foad   // Verify that we fall into one of 2 cases:
26755d744eeSJay Foad   //    - All variables are either absolute
26855d744eeSJay Foad   //      or direct mapped dynamic LDS that is not lowered.
26955d744eeSJay Foad   //      this is a re-run of the pass
27055d744eeSJay Foad   //      so we don't have anything to do.
27155d744eeSJay Foad   //    - No variables are absolute.
27255d744eeSJay Foad   std::optional<bool> HasAbsoluteGVs;
2738c752900SGang Chen   bool HasSpecialGVs = false;
27455d744eeSJay Foad   for (auto &Map : {DirectMapKernel, IndirectMapKernel}) {
27555d744eeSJay Foad     for (auto &[Fn, GVs] : Map) {
27655d744eeSJay Foad       for (auto *GV : GVs) {
27755d744eeSJay Foad         bool IsAbsolute = GV->isAbsoluteSymbolRef();
27855d744eeSJay Foad         bool IsDirectMapDynLDSGV =
27955d744eeSJay Foad             AMDGPU::isDynamicLDS(*GV) && DirectMapKernel.contains(Fn);
28055d744eeSJay Foad         if (IsDirectMapDynLDSGV)
28155d744eeSJay Foad           continue;
2828c752900SGang Chen         if (isNamedBarrier(*GV)) {
2838c752900SGang Chen           HasSpecialGVs = true;
2848c752900SGang Chen           continue;
2858c752900SGang Chen         }
28655d744eeSJay Foad         if (HasAbsoluteGVs.has_value()) {
28755d744eeSJay Foad           if (*HasAbsoluteGVs != IsAbsolute) {
28855d744eeSJay Foad             report_fatal_error(
28955d744eeSJay Foad                 "Module cannot mix absolute and non-absolute LDS GVs");
29055d744eeSJay Foad           }
29155d744eeSJay Foad         } else
29255d744eeSJay Foad           HasAbsoluteGVs = IsAbsolute;
29355d744eeSJay Foad       }
29455d744eeSJay Foad     }
29555d744eeSJay Foad   }
29655d744eeSJay Foad 
29755d744eeSJay Foad   // If we only had absolute GVs, we have nothing to do, return an empty
29855d744eeSJay Foad   // result.
29955d744eeSJay Foad   if (HasAbsoluteGVs && *HasAbsoluteGVs)
3008c752900SGang Chen     return {FunctionVariableMap(), FunctionVariableMap(), false};
30155d744eeSJay Foad 
3028c752900SGang Chen   return {std::move(DirectMapKernel), std::move(IndirectMapKernel),
3038c752900SGang Chen           HasSpecialGVs};
30455d744eeSJay Foad }
30555d744eeSJay Foad 
30655d744eeSJay Foad void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot,
30755d744eeSJay Foad                                ArrayRef<StringRef> FnAttrs) {
30855d744eeSJay Foad   for (StringRef Attr : FnAttrs)
30955d744eeSJay Foad     KernelRoot->removeFnAttr(Attr);
31055d744eeSJay Foad 
31155d744eeSJay Foad   SmallVector<Function *> WorkList = {CG[KernelRoot]->getFunction()};
31255d744eeSJay Foad   SmallPtrSet<Function *, 8> Visited;
31355d744eeSJay Foad   bool SeenUnknownCall = false;
31455d744eeSJay Foad 
31555d744eeSJay Foad   while (!WorkList.empty()) {
31655d744eeSJay Foad     Function *F = WorkList.pop_back_val();
31755d744eeSJay Foad 
31855d744eeSJay Foad     for (auto &CallRecord : *CG[F]) {
31955d744eeSJay Foad       if (!CallRecord.second)
32055d744eeSJay Foad         continue;
32155d744eeSJay Foad 
32255d744eeSJay Foad       Function *Callee = CallRecord.second->getFunction();
32355d744eeSJay Foad       if (!Callee) {
32455d744eeSJay Foad         if (!SeenUnknownCall) {
32555d744eeSJay Foad           SeenUnknownCall = true;
32655d744eeSJay Foad 
32755d744eeSJay Foad           // If we see any indirect calls, assume nothing about potential
32855d744eeSJay Foad           // targets.
32955d744eeSJay Foad           // TODO: This could be refined to possible LDS global users.
33055d744eeSJay Foad           for (auto &ExternalCallRecord : *CG.getExternalCallingNode()) {
33155d744eeSJay Foad             Function *PotentialCallee =
33255d744eeSJay Foad                 ExternalCallRecord.second->getFunction();
33355d744eeSJay Foad             assert(PotentialCallee);
33455d744eeSJay Foad             if (!isKernelLDS(PotentialCallee)) {
33555d744eeSJay Foad               for (StringRef Attr : FnAttrs)
33655d744eeSJay Foad                 PotentialCallee->removeFnAttr(Attr);
33755d744eeSJay Foad             }
33855d744eeSJay Foad           }
33955d744eeSJay Foad         }
34055d744eeSJay Foad       } else {
34155d744eeSJay Foad         for (StringRef Attr : FnAttrs)
34255d744eeSJay Foad           Callee->removeFnAttr(Attr);
34355d744eeSJay Foad         if (Visited.insert(Callee).second)
34455d744eeSJay Foad           WorkList.push_back(Callee);
34555d744eeSJay Foad       }
34655d744eeSJay Foad     }
34755d744eeSJay Foad   }
34855d744eeSJay Foad }
34955d744eeSJay Foad 
35055d744eeSJay Foad bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
35155d744eeSJay Foad   Instruction *DefInst = Def->getMemoryInst();
35255d744eeSJay Foad 
35355d744eeSJay Foad   if (isa<FenceInst>(DefInst))
35455d744eeSJay Foad     return false;
35555d744eeSJay Foad 
35655d744eeSJay Foad   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
35755d744eeSJay Foad     switch (II->getIntrinsicID()) {
35855d744eeSJay Foad     case Intrinsic::amdgcn_s_barrier:
35955d744eeSJay Foad     case Intrinsic::amdgcn_s_barrier_signal:
36055d744eeSJay Foad     case Intrinsic::amdgcn_s_barrier_signal_var:
36155d744eeSJay Foad     case Intrinsic::amdgcn_s_barrier_signal_isfirst:
36255d744eeSJay Foad     case Intrinsic::amdgcn_s_barrier_init:
36355d744eeSJay Foad     case Intrinsic::amdgcn_s_barrier_join:
36455d744eeSJay Foad     case Intrinsic::amdgcn_s_barrier_wait:
36555d744eeSJay Foad     case Intrinsic::amdgcn_s_barrier_leave:
36655d744eeSJay Foad     case Intrinsic::amdgcn_s_get_barrier_state:
36755d744eeSJay Foad     case Intrinsic::amdgcn_wave_barrier:
36855d744eeSJay Foad     case Intrinsic::amdgcn_sched_barrier:
36955d744eeSJay Foad     case Intrinsic::amdgcn_sched_group_barrier:
37055d744eeSJay Foad       return false;
37155d744eeSJay Foad     default:
37255d744eeSJay Foad       break;
37355d744eeSJay Foad     }
37455d744eeSJay Foad   }
37555d744eeSJay Foad 
37655d744eeSJay Foad   // Ignore atomics not aliasing with the original load, any atomic is a
37755d744eeSJay Foad   // universal MemoryDef from MSSA's point of view too, just like a fence.
37855d744eeSJay Foad   const auto checkNoAlias = [AA, Ptr](auto I) -> bool {
37955d744eeSJay Foad     return I && AA->isNoAlias(I->getPointerOperand(), Ptr);
38055d744eeSJay Foad   };
38155d744eeSJay Foad 
38255d744eeSJay Foad   if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) ||
38355d744eeSJay Foad       checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst)))
38455d744eeSJay Foad     return false;
38555d744eeSJay Foad 
38655d744eeSJay Foad   return true;
38755d744eeSJay Foad }
38855d744eeSJay Foad 
38955d744eeSJay Foad bool isClobberedInFunction(const LoadInst *Load, MemorySSA *MSSA,
39055d744eeSJay Foad                            AAResults *AA) {
39155d744eeSJay Foad   MemorySSAWalker *Walker = MSSA->getWalker();
39255d744eeSJay Foad   SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)};
39355d744eeSJay Foad   SmallSet<MemoryAccess *, 8> Visited;
39455d744eeSJay Foad   MemoryLocation Loc(MemoryLocation::get(Load));
39555d744eeSJay Foad 
39655d744eeSJay Foad   LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n');
39755d744eeSJay Foad 
39855d744eeSJay Foad   // Start with a nearest dominating clobbering access, it will be either
39955d744eeSJay Foad   // live on entry (nothing to do, load is not clobbered), MemoryDef, or
40055d744eeSJay Foad   // MemoryPhi if several MemoryDefs can define this memory state. In that
40155d744eeSJay Foad   // case add all Defs to WorkList and continue going up and checking all
40255d744eeSJay Foad   // the definitions of this memory location until the root. When all the
40355d744eeSJay Foad   // defs are exhausted and came to the entry state we have no clobber.
40455d744eeSJay Foad   // Along the scan ignore barriers and fences which are considered clobbers
40555d744eeSJay Foad   // by the MemorySSA, but not really writing anything into the memory.
40655d744eeSJay Foad   while (!WorkList.empty()) {
40755d744eeSJay Foad     MemoryAccess *MA = WorkList.pop_back_val();
40855d744eeSJay Foad     if (!Visited.insert(MA).second)
40955d744eeSJay Foad       continue;
41055d744eeSJay Foad 
41155d744eeSJay Foad     if (MSSA->isLiveOnEntryDef(MA))
41255d744eeSJay Foad       continue;
41355d744eeSJay Foad 
41455d744eeSJay Foad     if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) {
41555d744eeSJay Foad       LLVM_DEBUG(dbgs() << "  Def: " << *Def->getMemoryInst() << '\n');
41655d744eeSJay Foad 
41755d744eeSJay Foad       if (isReallyAClobber(Load->getPointerOperand(), Def, AA)) {
41855d744eeSJay Foad         LLVM_DEBUG(dbgs() << "      -> load is clobbered\n");
41955d744eeSJay Foad         return true;
42055d744eeSJay Foad       }
42155d744eeSJay Foad 
42255d744eeSJay Foad       WorkList.push_back(
42355d744eeSJay Foad           Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc));
42455d744eeSJay Foad       continue;
42555d744eeSJay Foad     }
42655d744eeSJay Foad 
42755d744eeSJay Foad     const MemoryPhi *Phi = cast<MemoryPhi>(MA);
42855d744eeSJay Foad     for (const auto &Use : Phi->incoming_values())
42955d744eeSJay Foad       WorkList.push_back(cast<MemoryAccess>(&Use));
43055d744eeSJay Foad   }
43155d744eeSJay Foad 
43255d744eeSJay Foad   LLVM_DEBUG(dbgs() << "      -> no clobber\n");
43355d744eeSJay Foad   return false;
43455d744eeSJay Foad }
43555d744eeSJay Foad 
43655d744eeSJay Foad } // end namespace llvm::AMDGPU
437