xref: /llvm-project/llvm/lib/Analysis/KernelInfo.cpp (revision 18f8106f310ee702046a11f360af47947c030d2e)
1 //===- KernelInfo.cpp - Kernel Analysis -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the KernelInfoPrinter class used to emit remarks about
10 // function properties from a GPU kernel.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Analysis/KernelInfo.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
18 #include "llvm/IR/DebugInfo.h"
19 #include "llvm/IR/Dominators.h"
20 #include "llvm/IR/Instructions.h"
21 #include "llvm/IR/Metadata.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/IR/PassManager.h"
24 #include "llvm/Passes/PassBuilder.h"
25 #include "llvm/Target/TargetMachine.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "kernel-info"
30 
31 namespace {
32 
33 /// Data structure holding function info for kernels.
34 class KernelInfo {
35   void updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE);
36 
37 public:
38   static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
39                              TargetMachine *TM);
40 
41   /// Whether the function has external linkage and is not a kernel function.
42   bool ExternalNotKernel = false;
43 
44   /// Launch bounds.
45   SmallVector<std::pair<StringRef, int64_t>> LaunchBounds;
46 
47   /// The number of alloca instructions inside the function, the number of those
48   /// with allocation sizes that cannot be determined at compile time, and the
49   /// sum of the sizes that can be.
50   ///
51   /// With the current implementation for at least some GPU archs,
52   /// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in
53   /// case the implementation changes.
54   int64_t Allocas = 0;
55   int64_t AllocasDyn = 0;
56   int64_t AllocasStaticSizeSum = 0;
57 
58   /// Number of direct/indirect calls (anything derived from CallBase).
59   int64_t DirectCalls = 0;
60   int64_t IndirectCalls = 0;
61 
62   /// Number of direct calls made from this function to other functions
63   /// defined in this module.
64   int64_t DirectCallsToDefinedFunctions = 0;
65 
66   /// Number of direct calls to inline assembly.
67   int64_t InlineAssemblyCalls = 0;
68 
69   /// Number of calls of type InvokeInst.
70   int64_t Invokes = 0;
71 
72   /// Target-specific flat address space.
73   unsigned FlatAddrspace;
74 
75   /// Number of flat address space memory accesses (via load, store, etc.).
76   int64_t FlatAddrspaceAccesses = 0;
77 };
78 
79 } // end anonymous namespace
80 
81 static void identifyCallee(OptimizationRemark &R, const Module *M,
82                            const Value *V, StringRef Kind = "") {
83   SmallString<100> Name; // might be function name or asm expression
84   if (const Function *F = dyn_cast<Function>(V)) {
85     if (auto *SubProgram = F->getSubprogram()) {
86       if (SubProgram->isArtificial())
87         R << "artificial ";
88       Name = SubProgram->getName();
89     }
90   }
91   if (Name.empty()) {
92     raw_svector_ostream OS(Name);
93     V->printAsOperand(OS, /*PrintType=*/false, M);
94   }
95   if (!Kind.empty())
96     R << Kind << " ";
97   R << "'" << Name << "'";
98 }
99 
100 static void identifyFunction(OptimizationRemark &R, const Function &F) {
101   identifyCallee(R, F.getParent(), &F, "function");
102 }
103 
104 static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller,
105                          const AllocaInst &Alloca,
106                          TypeSize::ScalarTy StaticSize) {
107   ORE.emit([&] {
108     StringRef DbgName;
109     DebugLoc Loc;
110     bool Artificial = false;
111     auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca));
112     if (!DVRs.empty()) {
113       const DbgVariableRecord &DVR = **DVRs.begin();
114       DbgName = DVR.getVariable()->getName();
115       Loc = DVR.getDebugLoc();
116       Artificial = DVR.Variable->isArtificial();
117     }
118     OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc),
119                          Alloca.getParent());
120     R << "in ";
121     identifyFunction(R, Caller);
122     R << ", ";
123     if (Artificial)
124       R << "artificial ";
125     SmallString<20> ValName;
126     raw_svector_ostream OS(ValName);
127     Alloca.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
128     R << "alloca ('" << ValName << "') ";
129     if (!DbgName.empty())
130       R << "for '" << DbgName << "' ";
131     else
132       R << "without debug info ";
133     R << "with ";
134     if (StaticSize)
135       R << "static size of " << itostr(StaticSize) << " bytes";
136     else
137       R << "dynamic size";
138     return R;
139   });
140 }
141 
142 static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller,
143                        const CallBase &Call, StringRef CallKind,
144                        StringRef RemarkKind) {
145   ORE.emit([&] {
146     OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call);
147     R << "in ";
148     identifyFunction(R, Caller);
149     R << ", " << CallKind << ", callee is ";
150     identifyCallee(R, Caller.getParent(), Call.getCalledOperand());
151     return R;
152   });
153 }
154 
155 static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE,
156                                       const Function &Caller,
157                                       const Instruction &Inst) {
158   ORE.emit([&] {
159     OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst);
160     R << "in ";
161     identifyFunction(R, Caller);
162     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) {
163       R << ", '" << II->getCalledFunction()->getName() << "' call";
164     } else {
165       R << ", '" << Inst.getOpcodeName() << "' instruction";
166     }
167     if (!Inst.getType()->isVoidTy()) {
168       SmallString<20> Name;
169       raw_svector_ostream OS(Name);
170       Inst.printAsOperand(OS, /*PrintType=*/false, Caller.getParent());
171       R << " ('" << Name << "')";
172     }
173     R << " accesses memory in flat address space";
174     return R;
175   });
176 }
177 
178 void KernelInfo::updateForBB(const BasicBlock &BB,
179                              OptimizationRemarkEmitter &ORE) {
180   const Function &F = *BB.getParent();
181   const Module &M = *F.getParent();
182   const DataLayout &DL = M.getDataLayout();
183   for (const Instruction &I : BB.instructionsWithoutDebug()) {
184     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&I)) {
185       ++Allocas;
186       TypeSize::ScalarTy StaticSize = 0;
187       if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) {
188         StaticSize = Size->getFixedValue();
189         assert(StaticSize <= std::numeric_limits<int64_t>::max());
190         AllocasStaticSizeSum += StaticSize;
191       } else {
192         ++AllocasDyn;
193       }
194       remarkAlloca(ORE, F, *Alloca, StaticSize);
195     } else if (const CallBase *Call = dyn_cast<CallBase>(&I)) {
196       SmallString<40> CallKind;
197       SmallString<40> RemarkKind;
198       if (Call->isIndirectCall()) {
199         ++IndirectCalls;
200         CallKind += "indirect";
201         RemarkKind += "Indirect";
202       } else {
203         ++DirectCalls;
204         CallKind += "direct";
205         RemarkKind += "Direct";
206       }
207       if (isa<InvokeInst>(Call)) {
208         ++Invokes;
209         CallKind += " invoke";
210         RemarkKind += "Invoke";
211       } else {
212         CallKind += " call";
213         RemarkKind += "Call";
214       }
215       if (!Call->isIndirectCall()) {
216         if (const Function *Callee = Call->getCalledFunction()) {
217           if (!Callee->isIntrinsic() && !Callee->isDeclaration()) {
218             ++DirectCallsToDefinedFunctions;
219             CallKind += " to defined function";
220             RemarkKind += "ToDefinedFunction";
221           }
222         } else if (Call->isInlineAsm()) {
223           ++InlineAssemblyCalls;
224           CallKind += " to inline assembly";
225           RemarkKind += "ToInlineAssembly";
226         }
227       }
228       remarkCall(ORE, F, *Call, CallKind, RemarkKind);
229       if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Call)) {
230         if (MI->getDestAddressSpace() == FlatAddrspace) {
231           ++FlatAddrspaceAccesses;
232           remarkFlatAddrspaceAccess(ORE, F, I);
233         } else if (const AnyMemTransferInst *MT =
234                        dyn_cast<AnyMemTransferInst>(MI)) {
235           if (MT->getSourceAddressSpace() == FlatAddrspace) {
236             ++FlatAddrspaceAccesses;
237             remarkFlatAddrspaceAccess(ORE, F, I);
238           }
239         }
240       }
241     } else if (const LoadInst *Load = dyn_cast<LoadInst>(&I)) {
242       if (Load->getPointerAddressSpace() == FlatAddrspace) {
243         ++FlatAddrspaceAccesses;
244         remarkFlatAddrspaceAccess(ORE, F, I);
245       }
246     } else if (const StoreInst *Store = dyn_cast<StoreInst>(&I)) {
247       if (Store->getPointerAddressSpace() == FlatAddrspace) {
248         ++FlatAddrspaceAccesses;
249         remarkFlatAddrspaceAccess(ORE, F, I);
250       }
251     } else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(&I)) {
252       if (At->getPointerAddressSpace() == FlatAddrspace) {
253         ++FlatAddrspaceAccesses;
254         remarkFlatAddrspaceAccess(ORE, F, I);
255       }
256     } else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(&I)) {
257       if (At->getPointerAddressSpace() == FlatAddrspace) {
258         ++FlatAddrspaceAccesses;
259         remarkFlatAddrspaceAccess(ORE, F, I);
260       }
261     }
262   }
263 }
264 
265 static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F,
266                            StringRef Name, int64_t Value) {
267   ORE.emit([&] {
268     OptimizationRemark R(DEBUG_TYPE, Name, &F);
269     R << "in ";
270     identifyFunction(R, F);
271     R << ", " << Name << " = " << itostr(Value);
272     return R;
273   });
274 }
275 
276 static std::optional<int64_t> parseFnAttrAsInteger(Function &F,
277                                                    StringRef Name) {
278   if (!F.hasFnAttribute(Name))
279     return std::nullopt;
280   return F.getFnAttributeAsParsedInteger(Name);
281 }
282 
283 void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM,
284                                 TargetMachine *TM) {
285   KernelInfo KI;
286   TargetTransformInfo &TheTTI = FAM.getResult<TargetIRAnalysis>(F);
287   KI.FlatAddrspace = TheTTI.getFlatAddressSpace();
288 
289   // Record function properties.
290   KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv();
291   for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) {
292     if (auto Val = parseFnAttrAsInteger(F, Name))
293       KI.LaunchBounds.push_back({Name, *Val});
294   }
295   TheTTI.collectKernelLaunchBounds(F, KI.LaunchBounds);
296 
297   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
298   for (const auto &BB : F)
299     KI.updateForBB(BB, ORE);
300 
301 #define REMARK_PROPERTY(PROP_NAME)                                             \
302   remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME)
303   REMARK_PROPERTY(ExternalNotKernel);
304   for (auto LB : KI.LaunchBounds)
305     remarkProperty(ORE, F, LB.first, LB.second);
306   REMARK_PROPERTY(Allocas);
307   REMARK_PROPERTY(AllocasStaticSizeSum);
308   REMARK_PROPERTY(AllocasDyn);
309   REMARK_PROPERTY(DirectCalls);
310   REMARK_PROPERTY(IndirectCalls);
311   REMARK_PROPERTY(DirectCallsToDefinedFunctions);
312   REMARK_PROPERTY(InlineAssemblyCalls);
313   REMARK_PROPERTY(Invokes);
314   REMARK_PROPERTY(FlatAddrspaceAccesses);
315 #undef REMARK_PROPERTY
316 
317   return;
318 }
319 
320 PreservedAnalyses KernelInfoPrinter::run(Function &F,
321                                          FunctionAnalysisManager &AM) {
322   // Skip it if remarks are not enabled as it will do nothing useful.
323   if (F.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE))
324     KernelInfo::emitKernelInfo(F, AM, TM);
325   return PreservedAnalyses::all();
326 }
327