1 //===- KernelInfo.cpp - Kernel Analysis -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the KernelInfoPrinter class used to emit remarks about 10 // function properties from a GPU kernel. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/KernelInfo.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/ADT/StringExtras.h" 17 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 18 #include "llvm/IR/DebugInfo.h" 19 #include "llvm/IR/Dominators.h" 20 #include "llvm/IR/Instructions.h" 21 #include "llvm/IR/Metadata.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/IR/PassManager.h" 24 #include "llvm/Passes/PassBuilder.h" 25 #include "llvm/Target/TargetMachine.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "kernel-info" 30 31 namespace { 32 33 /// Data structure holding function info for kernels. 34 class KernelInfo { 35 void updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE); 36 37 public: 38 static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM, 39 TargetMachine *TM); 40 41 /// Whether the function has external linkage and is not a kernel function. 42 bool ExternalNotKernel = false; 43 44 /// Launch bounds. 45 SmallVector<std::pair<StringRef, int64_t>> LaunchBounds; 46 47 /// The number of alloca instructions inside the function, the number of those 48 /// with allocation sizes that cannot be determined at compile time, and the 49 /// sum of the sizes that can be. 50 /// 51 /// With the current implementation for at least some GPU archs, 52 /// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in 53 /// case the implementation changes. 54 int64_t Allocas = 0; 55 int64_t AllocasDyn = 0; 56 int64_t AllocasStaticSizeSum = 0; 57 58 /// Number of direct/indirect calls (anything derived from CallBase). 59 int64_t DirectCalls = 0; 60 int64_t IndirectCalls = 0; 61 62 /// Number of direct calls made from this function to other functions 63 /// defined in this module. 64 int64_t DirectCallsToDefinedFunctions = 0; 65 66 /// Number of direct calls to inline assembly. 67 int64_t InlineAssemblyCalls = 0; 68 69 /// Number of calls of type InvokeInst. 70 int64_t Invokes = 0; 71 72 /// Target-specific flat address space. 73 unsigned FlatAddrspace; 74 75 /// Number of flat address space memory accesses (via load, store, etc.). 76 int64_t FlatAddrspaceAccesses = 0; 77 }; 78 79 } // end anonymous namespace 80 81 static void identifyCallee(OptimizationRemark &R, const Module *M, 82 const Value *V, StringRef Kind = "") { 83 SmallString<100> Name; // might be function name or asm expression 84 if (const Function *F = dyn_cast<Function>(V)) { 85 if (auto *SubProgram = F->getSubprogram()) { 86 if (SubProgram->isArtificial()) 87 R << "artificial "; 88 Name = SubProgram->getName(); 89 } 90 } 91 if (Name.empty()) { 92 raw_svector_ostream OS(Name); 93 V->printAsOperand(OS, /*PrintType=*/false, M); 94 } 95 if (!Kind.empty()) 96 R << Kind << " "; 97 R << "'" << Name << "'"; 98 } 99 100 static void identifyFunction(OptimizationRemark &R, const Function &F) { 101 identifyCallee(R, F.getParent(), &F, "function"); 102 } 103 104 static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller, 105 const AllocaInst &Alloca, 106 TypeSize::ScalarTy StaticSize) { 107 ORE.emit([&] { 108 StringRef DbgName; 109 DebugLoc Loc; 110 bool Artificial = false; 111 auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca)); 112 if (!DVRs.empty()) { 113 const DbgVariableRecord &DVR = **DVRs.begin(); 114 DbgName = DVR.getVariable()->getName(); 115 Loc = DVR.getDebugLoc(); 116 Artificial = DVR.Variable->isArtificial(); 117 } 118 OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc), 119 Alloca.getParent()); 120 R << "in "; 121 identifyFunction(R, Caller); 122 R << ", "; 123 if (Artificial) 124 R << "artificial "; 125 SmallString<20> ValName; 126 raw_svector_ostream OS(ValName); 127 Alloca.printAsOperand(OS, /*PrintType=*/false, Caller.getParent()); 128 R << "alloca ('" << ValName << "') "; 129 if (!DbgName.empty()) 130 R << "for '" << DbgName << "' "; 131 else 132 R << "without debug info "; 133 R << "with "; 134 if (StaticSize) 135 R << "static size of " << itostr(StaticSize) << " bytes"; 136 else 137 R << "dynamic size"; 138 return R; 139 }); 140 } 141 142 static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller, 143 const CallBase &Call, StringRef CallKind, 144 StringRef RemarkKind) { 145 ORE.emit([&] { 146 OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call); 147 R << "in "; 148 identifyFunction(R, Caller); 149 R << ", " << CallKind << ", callee is "; 150 identifyCallee(R, Caller.getParent(), Call.getCalledOperand()); 151 return R; 152 }); 153 } 154 155 static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE, 156 const Function &Caller, 157 const Instruction &Inst) { 158 ORE.emit([&] { 159 OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst); 160 R << "in "; 161 identifyFunction(R, Caller); 162 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) { 163 R << ", '" << II->getCalledFunction()->getName() << "' call"; 164 } else { 165 R << ", '" << Inst.getOpcodeName() << "' instruction"; 166 } 167 if (!Inst.getType()->isVoidTy()) { 168 SmallString<20> Name; 169 raw_svector_ostream OS(Name); 170 Inst.printAsOperand(OS, /*PrintType=*/false, Caller.getParent()); 171 R << " ('" << Name << "')"; 172 } 173 R << " accesses memory in flat address space"; 174 return R; 175 }); 176 } 177 178 void KernelInfo::updateForBB(const BasicBlock &BB, 179 OptimizationRemarkEmitter &ORE) { 180 const Function &F = *BB.getParent(); 181 const Module &M = *F.getParent(); 182 const DataLayout &DL = M.getDataLayout(); 183 for (const Instruction &I : BB.instructionsWithoutDebug()) { 184 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&I)) { 185 ++Allocas; 186 TypeSize::ScalarTy StaticSize = 0; 187 if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) { 188 StaticSize = Size->getFixedValue(); 189 assert(StaticSize <= std::numeric_limits<int64_t>::max()); 190 AllocasStaticSizeSum += StaticSize; 191 } else { 192 ++AllocasDyn; 193 } 194 remarkAlloca(ORE, F, *Alloca, StaticSize); 195 } else if (const CallBase *Call = dyn_cast<CallBase>(&I)) { 196 SmallString<40> CallKind; 197 SmallString<40> RemarkKind; 198 if (Call->isIndirectCall()) { 199 ++IndirectCalls; 200 CallKind += "indirect"; 201 RemarkKind += "Indirect"; 202 } else { 203 ++DirectCalls; 204 CallKind += "direct"; 205 RemarkKind += "Direct"; 206 } 207 if (isa<InvokeInst>(Call)) { 208 ++Invokes; 209 CallKind += " invoke"; 210 RemarkKind += "Invoke"; 211 } else { 212 CallKind += " call"; 213 RemarkKind += "Call"; 214 } 215 if (!Call->isIndirectCall()) { 216 if (const Function *Callee = Call->getCalledFunction()) { 217 if (!Callee->isIntrinsic() && !Callee->isDeclaration()) { 218 ++DirectCallsToDefinedFunctions; 219 CallKind += " to defined function"; 220 RemarkKind += "ToDefinedFunction"; 221 } 222 } else if (Call->isInlineAsm()) { 223 ++InlineAssemblyCalls; 224 CallKind += " to inline assembly"; 225 RemarkKind += "ToInlineAssembly"; 226 } 227 } 228 remarkCall(ORE, F, *Call, CallKind, RemarkKind); 229 if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Call)) { 230 if (MI->getDestAddressSpace() == FlatAddrspace) { 231 ++FlatAddrspaceAccesses; 232 remarkFlatAddrspaceAccess(ORE, F, I); 233 } else if (const AnyMemTransferInst *MT = 234 dyn_cast<AnyMemTransferInst>(MI)) { 235 if (MT->getSourceAddressSpace() == FlatAddrspace) { 236 ++FlatAddrspaceAccesses; 237 remarkFlatAddrspaceAccess(ORE, F, I); 238 } 239 } 240 } 241 } else if (const LoadInst *Load = dyn_cast<LoadInst>(&I)) { 242 if (Load->getPointerAddressSpace() == FlatAddrspace) { 243 ++FlatAddrspaceAccesses; 244 remarkFlatAddrspaceAccess(ORE, F, I); 245 } 246 } else if (const StoreInst *Store = dyn_cast<StoreInst>(&I)) { 247 if (Store->getPointerAddressSpace() == FlatAddrspace) { 248 ++FlatAddrspaceAccesses; 249 remarkFlatAddrspaceAccess(ORE, F, I); 250 } 251 } else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(&I)) { 252 if (At->getPointerAddressSpace() == FlatAddrspace) { 253 ++FlatAddrspaceAccesses; 254 remarkFlatAddrspaceAccess(ORE, F, I); 255 } 256 } else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(&I)) { 257 if (At->getPointerAddressSpace() == FlatAddrspace) { 258 ++FlatAddrspaceAccesses; 259 remarkFlatAddrspaceAccess(ORE, F, I); 260 } 261 } 262 } 263 } 264 265 static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F, 266 StringRef Name, int64_t Value) { 267 ORE.emit([&] { 268 OptimizationRemark R(DEBUG_TYPE, Name, &F); 269 R << "in "; 270 identifyFunction(R, F); 271 R << ", " << Name << " = " << itostr(Value); 272 return R; 273 }); 274 } 275 276 static std::optional<int64_t> parseFnAttrAsInteger(Function &F, 277 StringRef Name) { 278 if (!F.hasFnAttribute(Name)) 279 return std::nullopt; 280 return F.getFnAttributeAsParsedInteger(Name); 281 } 282 283 void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM, 284 TargetMachine *TM) { 285 KernelInfo KI; 286 TargetTransformInfo &TheTTI = FAM.getResult<TargetIRAnalysis>(F); 287 KI.FlatAddrspace = TheTTI.getFlatAddressSpace(); 288 289 // Record function properties. 290 KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv(); 291 for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) { 292 if (auto Val = parseFnAttrAsInteger(F, Name)) 293 KI.LaunchBounds.push_back({Name, *Val}); 294 } 295 TheTTI.collectKernelLaunchBounds(F, KI.LaunchBounds); 296 297 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 298 for (const auto &BB : F) 299 KI.updateForBB(BB, ORE); 300 301 #define REMARK_PROPERTY(PROP_NAME) \ 302 remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME) 303 REMARK_PROPERTY(ExternalNotKernel); 304 for (auto LB : KI.LaunchBounds) 305 remarkProperty(ORE, F, LB.first, LB.second); 306 REMARK_PROPERTY(Allocas); 307 REMARK_PROPERTY(AllocasStaticSizeSum); 308 REMARK_PROPERTY(AllocasDyn); 309 REMARK_PROPERTY(DirectCalls); 310 REMARK_PROPERTY(IndirectCalls); 311 REMARK_PROPERTY(DirectCallsToDefinedFunctions); 312 REMARK_PROPERTY(InlineAssemblyCalls); 313 REMARK_PROPERTY(Invokes); 314 REMARK_PROPERTY(FlatAddrspaceAccesses); 315 #undef REMARK_PROPERTY 316 317 return; 318 } 319 320 PreservedAnalyses KernelInfoPrinter::run(Function &F, 321 FunctionAnalysisManager &AM) { 322 // Skip it if remarks are not enabled as it will do nothing useful. 323 if (F.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE)) 324 KernelInfo::emitKernelInfo(F, AM, TM); 325 return PreservedAnalyses::all(); 326 } 327