1 //===- KernelInfo.cpp - Kernel Analysis -----------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the KernelInfoPrinter class used to emit remarks about 10 // function properties from a GPU kernel. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/KernelInfo.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/ADT/StringExtras.h" 17 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/IR/DebugInfo.h" 20 #include "llvm/IR/Dominators.h" 21 #include "llvm/IR/Instructions.h" 22 #include "llvm/IR/Metadata.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/IR/PassManager.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "kernel-info" 29 30 namespace { 31 32 /// Data structure holding function info for kernels. 33 class KernelInfo { 34 void updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE); 35 36 public: 37 static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM, 38 TargetMachine *TM); 39 40 /// Whether the function has external linkage and is not a kernel function. 41 bool ExternalNotKernel = false; 42 43 /// Launch bounds. 44 SmallVector<std::pair<StringRef, int64_t>> LaunchBounds; 45 46 /// The number of alloca instructions inside the function, the number of those 47 /// with allocation sizes that cannot be determined at compile time, and the 48 /// sum of the sizes that can be. 49 /// 50 /// With the current implementation for at least some GPU archs, 51 /// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in 52 /// case the implementation changes. 53 int64_t Allocas = 0; 54 int64_t AllocasDyn = 0; 55 int64_t AllocasStaticSizeSum = 0; 56 57 /// Number of direct/indirect calls (anything derived from CallBase). 58 int64_t DirectCalls = 0; 59 int64_t IndirectCalls = 0; 60 61 /// Number of direct calls made from this function to other functions 62 /// defined in this module. 63 int64_t DirectCallsToDefinedFunctions = 0; 64 65 /// Number of direct calls to inline assembly. 66 int64_t InlineAssemblyCalls = 0; 67 68 /// Number of calls of type InvokeInst. 69 int64_t Invokes = 0; 70 71 /// Target-specific flat address space. 72 unsigned FlatAddrspace; 73 74 /// Number of flat address space memory accesses (via load, store, etc.). 75 int64_t FlatAddrspaceAccesses = 0; 76 }; 77 78 } // end anonymous namespace 79 80 static void identifyCallee(OptimizationRemark &R, const Module *M, 81 const Value *V, StringRef Kind = "") { 82 SmallString<100> Name; // might be function name or asm expression 83 if (const Function *F = dyn_cast<Function>(V)) { 84 if (auto *SubProgram = F->getSubprogram()) { 85 if (SubProgram->isArtificial()) 86 R << "artificial "; 87 Name = SubProgram->getName(); 88 } 89 } 90 if (Name.empty()) { 91 raw_svector_ostream OS(Name); 92 V->printAsOperand(OS, /*PrintType=*/false, M); 93 } 94 if (!Kind.empty()) 95 R << Kind << " "; 96 R << "'" << Name << "'"; 97 } 98 99 static void identifyFunction(OptimizationRemark &R, const Function &F) { 100 identifyCallee(R, F.getParent(), &F, "function"); 101 } 102 103 static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller, 104 const AllocaInst &Alloca, 105 TypeSize::ScalarTy StaticSize) { 106 ORE.emit([&] { 107 StringRef DbgName; 108 DebugLoc Loc; 109 bool Artificial = false; 110 auto DVRs = findDVRDeclares(&const_cast<AllocaInst &>(Alloca)); 111 if (!DVRs.empty()) { 112 const DbgVariableRecord &DVR = **DVRs.begin(); 113 DbgName = DVR.getVariable()->getName(); 114 Loc = DVR.getDebugLoc(); 115 Artificial = DVR.Variable->isArtificial(); 116 } 117 OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc), 118 Alloca.getParent()); 119 R << "in "; 120 identifyFunction(R, Caller); 121 R << ", "; 122 if (Artificial) 123 R << "artificial "; 124 SmallString<20> ValName; 125 raw_svector_ostream OS(ValName); 126 Alloca.printAsOperand(OS, /*PrintType=*/false, Caller.getParent()); 127 R << "alloca ('" << ValName << "') "; 128 if (!DbgName.empty()) 129 R << "for '" << DbgName << "' "; 130 else 131 R << "without debug info "; 132 R << "with "; 133 if (StaticSize) 134 R << "static size of " << itostr(StaticSize) << " bytes"; 135 else 136 R << "dynamic size"; 137 return R; 138 }); 139 } 140 141 static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller, 142 const CallBase &Call, StringRef CallKind, 143 StringRef RemarkKind) { 144 ORE.emit([&] { 145 OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call); 146 R << "in "; 147 identifyFunction(R, Caller); 148 R << ", " << CallKind << ", callee is "; 149 identifyCallee(R, Caller.getParent(), Call.getCalledOperand()); 150 return R; 151 }); 152 } 153 154 static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE, 155 const Function &Caller, 156 const Instruction &Inst) { 157 ORE.emit([&] { 158 OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst); 159 R << "in "; 160 identifyFunction(R, Caller); 161 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst)) { 162 R << ", '" << II->getCalledFunction()->getName() << "' call"; 163 } else { 164 R << ", '" << Inst.getOpcodeName() << "' instruction"; 165 } 166 if (!Inst.getType()->isVoidTy()) { 167 SmallString<20> Name; 168 raw_svector_ostream OS(Name); 169 Inst.printAsOperand(OS, /*PrintType=*/false, Caller.getParent()); 170 R << " ('" << Name << "')"; 171 } 172 R << " accesses memory in flat address space"; 173 return R; 174 }); 175 } 176 177 void KernelInfo::updateForBB(const BasicBlock &BB, 178 OptimizationRemarkEmitter &ORE) { 179 const Function &F = *BB.getParent(); 180 const Module &M = *F.getParent(); 181 const DataLayout &DL = M.getDataLayout(); 182 for (const Instruction &I : BB.instructionsWithoutDebug()) { 183 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&I)) { 184 ++Allocas; 185 TypeSize::ScalarTy StaticSize = 0; 186 if (std::optional<TypeSize> Size = Alloca->getAllocationSize(DL)) { 187 StaticSize = Size->getFixedValue(); 188 assert(StaticSize <= 189 (TypeSize::ScalarTy)std::numeric_limits<int64_t>::max()); 190 AllocasStaticSizeSum += StaticSize; 191 } else { 192 ++AllocasDyn; 193 } 194 remarkAlloca(ORE, F, *Alloca, StaticSize); 195 } else if (const CallBase *Call = dyn_cast<CallBase>(&I)) { 196 SmallString<40> CallKind; 197 SmallString<40> RemarkKind; 198 if (Call->isIndirectCall()) { 199 ++IndirectCalls; 200 CallKind += "indirect"; 201 RemarkKind += "Indirect"; 202 } else { 203 ++DirectCalls; 204 CallKind += "direct"; 205 RemarkKind += "Direct"; 206 } 207 if (isa<InvokeInst>(Call)) { 208 ++Invokes; 209 CallKind += " invoke"; 210 RemarkKind += "Invoke"; 211 } else { 212 CallKind += " call"; 213 RemarkKind += "Call"; 214 } 215 if (!Call->isIndirectCall()) { 216 if (const Function *Callee = Call->getCalledFunction()) { 217 if (!Callee->isIntrinsic() && !Callee->isDeclaration()) { 218 ++DirectCallsToDefinedFunctions; 219 CallKind += " to defined function"; 220 RemarkKind += "ToDefinedFunction"; 221 } 222 } else if (Call->isInlineAsm()) { 223 ++InlineAssemblyCalls; 224 CallKind += " to inline assembly"; 225 RemarkKind += "ToInlineAssembly"; 226 } 227 } 228 remarkCall(ORE, F, *Call, CallKind, RemarkKind); 229 if (const AnyMemIntrinsic *MI = dyn_cast<AnyMemIntrinsic>(Call)) { 230 if (MI->getDestAddressSpace() == FlatAddrspace) { 231 ++FlatAddrspaceAccesses; 232 remarkFlatAddrspaceAccess(ORE, F, I); 233 } else if (const AnyMemTransferInst *MT = 234 dyn_cast<AnyMemTransferInst>(MI)) { 235 if (MT->getSourceAddressSpace() == FlatAddrspace) { 236 ++FlatAddrspaceAccesses; 237 remarkFlatAddrspaceAccess(ORE, F, I); 238 } 239 } 240 } 241 } else if (const LoadInst *Load = dyn_cast<LoadInst>(&I)) { 242 if (Load->getPointerAddressSpace() == FlatAddrspace) { 243 ++FlatAddrspaceAccesses; 244 remarkFlatAddrspaceAccess(ORE, F, I); 245 } 246 } else if (const StoreInst *Store = dyn_cast<StoreInst>(&I)) { 247 if (Store->getPointerAddressSpace() == FlatAddrspace) { 248 ++FlatAddrspaceAccesses; 249 remarkFlatAddrspaceAccess(ORE, F, I); 250 } 251 } else if (const AtomicRMWInst *At = dyn_cast<AtomicRMWInst>(&I)) { 252 if (At->getPointerAddressSpace() == FlatAddrspace) { 253 ++FlatAddrspaceAccesses; 254 remarkFlatAddrspaceAccess(ORE, F, I); 255 } 256 } else if (const AtomicCmpXchgInst *At = dyn_cast<AtomicCmpXchgInst>(&I)) { 257 if (At->getPointerAddressSpace() == FlatAddrspace) { 258 ++FlatAddrspaceAccesses; 259 remarkFlatAddrspaceAccess(ORE, F, I); 260 } 261 } 262 } 263 } 264 265 static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F, 266 StringRef Name, int64_t Value) { 267 ORE.emit([&] { 268 OptimizationRemark R(DEBUG_TYPE, Name, &F); 269 R << "in "; 270 identifyFunction(R, F); 271 R << ", " << Name << " = " << itostr(Value); 272 return R; 273 }); 274 } 275 276 static std::optional<int64_t> parseFnAttrAsInteger(Function &F, 277 StringRef Name) { 278 if (!F.hasFnAttribute(Name)) 279 return std::nullopt; 280 return F.getFnAttributeAsParsedInteger(Name); 281 } 282 283 void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM, 284 TargetMachine *TM) { 285 KernelInfo KI; 286 TargetTransformInfo &TheTTI = FAM.getResult<TargetIRAnalysis>(F); 287 KI.FlatAddrspace = TheTTI.getFlatAddressSpace(); 288 289 // Record function properties. 290 KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv(); 291 for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) { 292 if (auto Val = parseFnAttrAsInteger(F, Name)) 293 KI.LaunchBounds.push_back({Name, *Val}); 294 } 295 TheTTI.collectKernelLaunchBounds(F, KI.LaunchBounds); 296 297 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 298 for (const auto &BB : F) 299 KI.updateForBB(BB, ORE); 300 301 #define REMARK_PROPERTY(PROP_NAME) \ 302 remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME) 303 REMARK_PROPERTY(ExternalNotKernel); 304 for (auto LB : KI.LaunchBounds) 305 remarkProperty(ORE, F, LB.first, LB.second); 306 REMARK_PROPERTY(Allocas); 307 REMARK_PROPERTY(AllocasStaticSizeSum); 308 REMARK_PROPERTY(AllocasDyn); 309 REMARK_PROPERTY(DirectCalls); 310 REMARK_PROPERTY(IndirectCalls); 311 REMARK_PROPERTY(DirectCallsToDefinedFunctions); 312 REMARK_PROPERTY(InlineAssemblyCalls); 313 REMARK_PROPERTY(Invokes); 314 REMARK_PROPERTY(FlatAddrspaceAccesses); 315 #undef REMARK_PROPERTY 316 317 return; 318 } 319 320 PreservedAnalyses KernelInfoPrinter::run(Function &F, 321 FunctionAnalysisManager &AM) { 322 // Skip it if remarks are not enabled as it will do nothing useful. 323 if (F.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE)) 324 KernelInfo::emitKernelInfo(F, AM, TM); 325 return PreservedAnalyses::all(); 326 } 327