xref: /llvm-project/llvm/lib/Analysis/KernelInfo.cpp (revision 57f17319796a1876ba5a82d9bdc0f6f63ab12945)
1 //===- KernelInfo.cpp - Kernel Analysis -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the KernelInfoPrinter class used to emit remarks about
10 // function properties from a GPU kernel.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Analysis/KernelInfo.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
18 #include "llvm/Analysis/TargetTransformInfo.h"
19 #include "llvm/IR/DebugInfo.h"
20 #include "llvm/IR/Dominators.h"
21 #include "llvm/IR/Instructions.h"
22 #include "llvm/IR/Metadata.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/IR/PassManager.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "kernel-info"
29 
30 namespace {
31 
32 /// Data structure holding function info for kernels.
33 class KernelInfo {
34   void updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE);
35 
36 public:
37   static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
38                              TargetMachine *TM);
39 
40   /// Whether the function has external linkage and is not a kernel function.
41   bool ExternalNotKernel = false;
42 
43   /// Launch bounds.
44   SmallVector<std::pair<StringRef, int64_t>> LaunchBounds;
45 
46   /// The number of alloca instructions inside the function, the number of those
47   /// with allocation sizes that cannot be determined at compile time, and the
48   /// sum of the sizes that can be.
49   ///
50   /// With the current implementation for at least some GPU archs,
51   /// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in
52   /// case the implementation changes.
53   int64_t Allocas = 0;
54   int64_t AllocasDyn = 0;
55   int64_t AllocasStaticSizeSum = 0;
56 
57   /// Number of direct/indirect calls (anything derived from CallBase).
58   int64_t DirectCalls = 0;
59   int64_t IndirectCalls = 0;
60 
61   /// Number of direct calls made from this function to other functions
62   /// defined in this module.
63   int64_t DirectCallsToDefinedFunctions = 0;
64 
65   /// Number of direct calls to inline assembly.
66   int64_t InlineAssemblyCalls = 0;
67 
68   /// Number of calls of type InvokeInst.
69   int64_t Invokes = 0;
70 
71   /// Target-specific flat address space.
72   unsigned FlatAddrspace;
73 
74   /// Number of flat address space memory accesses (via load, store, etc.).
75   int64_t FlatAddrspaceAccesses = 0;
76 };
77 
78 } // end anonymous namespace
79 
80 static void identifyCallee(OptimizationRemark &R, const Module *M,
81                            const Value *V, StringRef Kind = "") {
82   SmallString<100> Name; // might be function name or asm expression
83   if (const Function *F = dyn_cast<Function>(V)) {
84     if (auto *SubProgram = F->getSubprogram()) {
85       if (SubProgram->isArtificial())
86         R << "artificial ";
87       Name = SubProgram->getName();
88     }
89   }
90   if (Name.empty()) {
91     raw_svector_ostream OS(Name);
92     V->printAsOperand(OS, /*PrintType=*/false, M);
93   }
94   if (!Kind.empty())
95     R << Kind << " ";
96   R << "'" << Name << "'";
97 }
98 
99 static void identifyFunction(OptimizationRemark &R, const Function &F) {
100   identifyCallee(R, F.getParent(), &F, "function");
101 }
102 
103 static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
104                          const AllocaInst &Alloca,
105                          TypeSize::ScalarTy StaticSize) {
106   ORE.emit([&] {
107     StringRef DbgName;
108     DebugLoc Loc;
109     bool Artificial = false;
110     auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
111     if (!DVRs.empty()) {
112       const DbgVariableRecord &DVR = **DVRs.begin();
113       DbgName = DVR.getVariable()->getName();
114       Loc = DVR.getDebugLoc();
115       Artificial = DVR.Variable->isArtificial();
116     }
117     OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
118                          Alloca.getParent());
119     R << "in ";
120     identifyFunction(R, Caller);
121     R << ", ";
122     if (Artificial)
123       R << "artificial ";
124     SmallString<20> ValName;
125     raw_svector_ostream OS(ValName);
126     Alloca.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
127     R << "alloca ('" << ValName << "') ";
128     if (!DbgName.empty())
129       R << "for '" << DbgName << "' ";
130     else
131       R << "without debug info ";
132     R << "with ";
133     if (StaticSize)
134       R << "static size of " << itostr(StaticSize) << " bytes";
135     else
136       R << "dynamic size";
137     return R;
138   });
139 }
140 
141 static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
142                        const CallBase &Call, StringRef CallKind,
143                        StringRef RemarkKind) {
144   ORE.emit([&] {
145     OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
146     R << "in ";
147     identifyFunction(R, Caller);
148     R << ", " << CallKind << ", callee is ";
149     identifyCallee(R, Caller.getParent(), Call.getCalledOperand());
150     return R;
151   });
152 }
153 
154 static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
155                                       const Function &Caller,
156                                       const Instruction &Inst) {
157   ORE.emit([&] {
158     OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
159     R << "in ";
160     identifyFunction(R, Caller);
161     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
162       R << ", '" << II->getCalledFunction()->getName() << "' call";
163     } else {
164       R << ", '" << Inst.getOpcodeName() << "' instruction";
165     }
166     if (!Inst.getType()->isVoidTy()) {
167       SmallString<20> Name;
168       raw_svector_ostream OS(Name);
169       Inst.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
170       R << " ('" << Name << "')";
171     }
172     R << " accesses memory in flat address space";
173     return R;
174   });
175 }
176 
177 void KernelInfo::updateForBB(const BasicBlock &BB,
178                              OptimizationRemarkEmitter &ORE) {
179   const Function &F = *BB.getParent();
180   const Module &M = *F.getParent();
181   const DataLayout &DL = M.getDataLayout();
182   for (const Instruction &I : BB.instructionsWithoutDebug()) {
183     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&I)) {
184       ++Allocas;
185       TypeSize::ScalarTy StaticSize = 0;
186       if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) {
187         StaticSize = Size->getFixedValue();
188         assert(StaticSize <=
189                (TypeSize::ScalarTy)std::numeric_limits<int64_t>::max());
190         AllocasStaticSizeSum += StaticSize;
191       } else {
192         ++AllocasDyn;
193       }
194       remarkAlloca(ORE, F, *Alloca, StaticSize);
195     } else if (const CallBase *Call = dyn_cast<CallBase>(&I)) {
196       SmallString<40> CallKind;
197       SmallString<40> RemarkKind;
198       if (Call->isIndirectCall()) {
199         ++IndirectCalls;
200         CallKind += "indirect";
201         RemarkKind += "Indirect";
202       } else {
203         ++DirectCalls;
204         CallKind += "direct";
205         RemarkKind += "Direct";
206       }
207       if (isa<InvokeInst>(Call)) {
208         ++Invokes;
209         CallKind += " invoke";
210         RemarkKind += "Invoke";
211       } else {
212         CallKind += " call";
213         RemarkKind += "Call";
214       }
215       if (!Call->isIndirectCall()) {
216         if (const Function *Callee = Call->getCalledFunction()) {
217           if (!Callee->isIntrinsic() && !Callee->isDeclaration()) {
218             ++DirectCallsToDefinedFunctions;
219             CallKind += " to defined function";
220             RemarkKind += "ToDefinedFunction";
221           }
222         } else if (Call->isInlineAsm()) {
223           ++InlineAssemblyCalls;
224           CallKind += " to inline assembly";
225           RemarkKind += "ToInlineAssembly";
226         }
227       }
228       remarkCall(ORE, F, *Call, CallKind, RemarkKind);
229       if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Call)) {
230         if (MI->getDestAddressSpace() == FlatAddrspace) {
231           ++FlatAddrspaceAccesses;
232           remarkFlatAddrspaceAccess(ORE, F, I);
233         } else if (const AnyMemTransferInst *MT =
234                        dyn_cast<AnyMemTransferInst>(MI)) {
235           if (MT->getSourceAddressSpace() == FlatAddrspace) {
236             ++FlatAddrspaceAccesses;
237             remarkFlatAddrspaceAccess(ORE, F, I);
238           }
239         }
240       }
241     } else if (const LoadInst *Load = dyn_cast<LoadInst>(&I)) {
242       if (Load->getPointerAddressSpace() == FlatAddrspace) {
243         ++FlatAddrspaceAccesses;
244         remarkFlatAddrspaceAccess(ORE, F, I);
245       }
246     } else if (const StoreInst *Store = dyn_cast<StoreInst>(&I)) {
247       if (Store->getPointerAddressSpace() == FlatAddrspace) {
248         ++FlatAddrspaceAccesses;
249         remarkFlatAddrspaceAccess(ORE, F, I);
250       }
251     } else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(&I)) {
252       if (At->getPointerAddressSpace() == FlatAddrspace) {
253         ++FlatAddrspaceAccesses;
254         remarkFlatAddrspaceAccess(ORE, F, I);
255       }
256     } else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(&I)) {
257       if (At->getPointerAddressSpace() == FlatAddrspace) {
258         ++FlatAddrspaceAccesses;
259         remarkFlatAddrspaceAccess(ORE, F, I);
260       }
261     }
262   }
263 }
264 
265 static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
266                            StringRef Name, int64_t Value) {
267   ORE.emit([&] {
268     OptimizationRemark R(DEBUG_TYPE, Name, &F);
269     R << "in ";
270     identifyFunction(R, F);
271     R << ", " << Name << " = " << itostr(Value);
272     return R;
273   });
274 }
275 
276 static std::optional<int64_t> parseFnAttrAsInteger(Function &F,
277                                                    StringRef Name) {
278   if (!F.hasFnAttribute(Name))
279     return std::nullopt;
280   return F.getFnAttributeAsParsedInteger(Name);
281 }
282 
283 void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
284                                 TargetMachine *TM) {
285   KernelInfo KI;
286   TargetTransformInfo &TheTTI = FAM.getResult<TargetIRAnalysis>(F);
287   KI.FlatAddrspace = TheTTI.getFlatAddressSpace();
288 
289   // Record function properties.
290   KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv();
291   for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) {
292     if (auto Val = parseFnAttrAsInteger(F, Name))
293       KI.LaunchBounds.push_back({Name, *Val});
294   }
295   TheTTI.collectKernelLaunchBounds(F, KI.LaunchBounds);
296 
297   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
298   for (const auto &BB : F)
299     KI.updateForBB(BB, ORE);
300 
301 #define REMARK_PROPERTY(PROP_NAME)                                             \
302   remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME)
303   REMARK_PROPERTY(ExternalNotKernel);
304   for (auto LB : KI.LaunchBounds)
305     remarkProperty(ORE, F, LB.first, LB.second);
306   REMARK_PROPERTY(Allocas);
307   REMARK_PROPERTY(AllocasStaticSizeSum);
308   REMARK_PROPERTY(AllocasDyn);
309   REMARK_PROPERTY(DirectCalls);
310   REMARK_PROPERTY(IndirectCalls);
311   REMARK_PROPERTY(DirectCallsToDefinedFunctions);
312   REMARK_PROPERTY(InlineAssemblyCalls);
313   REMARK_PROPERTY(Invokes);
314   REMARK_PROPERTY(FlatAddrspaceAccesses);
315 #undef REMARK_PROPERTY
316 
317   return;
318 }
319 
320 PreservedAnalyses KernelInfoPrinter::run(Function &F,
321                                          FunctionAnalysisManager &AM) {
322   // Skip it if remarks are not enabled as it will do nothing useful.
323   if (F.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE))
324     KernelInfo::emitKernelInfo(F, AM, TM);
325   return PreservedAnalyses::all();
326 }
327