//===- KernelInfo.cpp - Kernel Analysis -----------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the KernelInfoPrinter class used to emit remarks about // function properties from a GPU kernel. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/KernelInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" using namespace llvm; #define DEBUG_TYPE "kernel-info" namespace { /// Data structure holding function info for kernels. class KernelInfo { void updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE); public: static void emitKernelInfo(Function &F, FunctionAnalysisManager &FAM, TargetMachine *TM); /// Whether the function has external linkage and is not a kernel function. bool ExternalNotKernel = false; /// Launch bounds. SmallVector> LaunchBounds; /// The number of alloca instructions inside the function, the number of those /// with allocation sizes that cannot be determined at compile time, and the /// sum of the sizes that can be. /// /// With the current implementation for at least some GPU archs, /// AllocasDyn > 0 might not be possible, but we report AllocasDyn anyway in /// case the implementation changes. int64_t Allocas = 0; int64_t AllocasDyn = 0; int64_t AllocasStaticSizeSum = 0; /// Number of direct/indirect calls (anything derived from CallBase). int64_t DirectCalls = 0; int64_t IndirectCalls = 0; /// Number of direct calls made from this function to other functions /// defined in this module. int64_t DirectCallsToDefinedFunctions = 0; /// Number of direct calls to inline assembly. int64_t InlineAssemblyCalls = 0; /// Number of calls of type InvokeInst. int64_t Invokes = 0; /// Target-specific flat address space. unsigned FlatAddrspace; /// Number of flat address space memory accesses (via load, store, etc.). int64_t FlatAddrspaceAccesses = 0; }; } // end anonymous namespace static void identifyCallee(OptimizationRemark &R, const Module *M, const Value *V, StringRef Kind = "") { SmallString<100> Name; // might be function name or asm expression if (const Function *F = dyn_cast(V)) { if (auto *SubProgram = F->getSubprogram()) { if (SubProgram->isArtificial()) R << "artificial "; Name = SubProgram->getName(); } } if (Name.empty()) { raw_svector_ostream OS(Name); V->printAsOperand(OS, /*PrintType=*/false, M); } if (!Kind.empty()) R << Kind << " "; R << "'" << Name << "'"; } static void identifyFunction(OptimizationRemark &R, const Function &F) { identifyCallee(R, F.getParent(), &F, "function"); } static void remarkAlloca(OptimizationRemarkEmitter &ORE, const Function &Caller, const AllocaInst &Alloca, TypeSize::ScalarTy StaticSize) { ORE.emit([&] { StringRef DbgName; DebugLoc Loc; bool Artificial = false; auto DVRs = findDVRDeclares(&const_cast(Alloca)); if (!DVRs.empty()) { const DbgVariableRecord &DVR = **DVRs.begin(); DbgName = DVR.getVariable()->getName(); Loc = DVR.getDebugLoc(); Artificial = DVR.Variable->isArtificial(); } OptimizationRemark R(DEBUG_TYPE, "Alloca", DiagnosticLocation(Loc), Alloca.getParent()); R << "in "; identifyFunction(R, Caller); R << ", "; if (Artificial) R << "artificial "; SmallString<20> ValName; raw_svector_ostream OS(ValName); Alloca.printAsOperand(OS, /*PrintType=*/false, Caller.getParent()); R << "alloca ('" << ValName << "') "; if (!DbgName.empty()) R << "for '" << DbgName << "' "; else R << "without debug info "; R << "with "; if (StaticSize) R << "static size of " << itostr(StaticSize) << " bytes"; else R << "dynamic size"; return R; }); } static void remarkCall(OptimizationRemarkEmitter &ORE, const Function &Caller, const CallBase &Call, StringRef CallKind, StringRef RemarkKind) { ORE.emit([&] { OptimizationRemark R(DEBUG_TYPE, RemarkKind, &Call); R << "in "; identifyFunction(R, Caller); R << ", " << CallKind << ", callee is "; identifyCallee(R, Caller.getParent(), Call.getCalledOperand()); return R; }); } static void remarkFlatAddrspaceAccess(OptimizationRemarkEmitter &ORE, const Function &Caller, const Instruction &Inst) { ORE.emit([&] { OptimizationRemark R(DEBUG_TYPE, "FlatAddrspaceAccess", &Inst); R << "in "; identifyFunction(R, Caller); if (const IntrinsicInst *II = dyn_cast(&Inst)) { R << ", '" << II->getCalledFunction()->getName() << "' call"; } else { R << ", '" << Inst.getOpcodeName() << "' instruction"; } if (!Inst.getType()->isVoidTy()) { SmallString<20> Name; raw_svector_ostream OS(Name); Inst.printAsOperand(OS, /*PrintType=*/false, Caller.getParent()); R << " ('" << Name << "')"; } R << " accesses memory in flat address space"; return R; }); } void KernelInfo::updateForBB(const BasicBlock &BB, OptimizationRemarkEmitter &ORE) { const Function &F = *BB.getParent(); const Module &M = *F.getParent(); const DataLayout &DL = M.getDataLayout(); for (const Instruction &I : BB.instructionsWithoutDebug()) { if (const AllocaInst *Alloca = dyn_cast(&I)) { ++Allocas; TypeSize::ScalarTy StaticSize = 0; if (std::optional Size = Alloca->getAllocationSize(DL)) { StaticSize = Size->getFixedValue(); assert(StaticSize <= (TypeSize::ScalarTy)std::numeric_limits::max()); AllocasStaticSizeSum += StaticSize; } else { ++AllocasDyn; } remarkAlloca(ORE, F, *Alloca, StaticSize); } else if (const CallBase *Call = dyn_cast(&I)) { SmallString<40> CallKind; SmallString<40> RemarkKind; if (Call->isIndirectCall()) { ++IndirectCalls; CallKind += "indirect"; RemarkKind += "Indirect"; } else { ++DirectCalls; CallKind += "direct"; RemarkKind += "Direct"; } if (isa(Call)) { ++Invokes; CallKind += " invoke"; RemarkKind += "Invoke"; } else { CallKind += " call"; RemarkKind += "Call"; } if (!Call->isIndirectCall()) { if (const Function *Callee = Call->getCalledFunction()) { if (!Callee->isIntrinsic() && !Callee->isDeclaration()) { ++DirectCallsToDefinedFunctions; CallKind += " to defined function"; RemarkKind += "ToDefinedFunction"; } } else if (Call->isInlineAsm()) { ++InlineAssemblyCalls; CallKind += " to inline assembly"; RemarkKind += "ToInlineAssembly"; } } remarkCall(ORE, F, *Call, CallKind, RemarkKind); if (const AnyMemIntrinsic *MI = dyn_cast(Call)) { if (MI->getDestAddressSpace() == FlatAddrspace) { ++FlatAddrspaceAccesses; remarkFlatAddrspaceAccess(ORE, F, I); } else if (const AnyMemTransferInst *MT = dyn_cast(MI)) { if (MT->getSourceAddressSpace() == FlatAddrspace) { ++FlatAddrspaceAccesses; remarkFlatAddrspaceAccess(ORE, F, I); } } } } else if (const LoadInst *Load = dyn_cast(&I)) { if (Load->getPointerAddressSpace() == FlatAddrspace) { ++FlatAddrspaceAccesses; remarkFlatAddrspaceAccess(ORE, F, I); } } else if (const StoreInst *Store = dyn_cast(&I)) { if (Store->getPointerAddressSpace() == FlatAddrspace) { ++FlatAddrspaceAccesses; remarkFlatAddrspaceAccess(ORE, F, I); } } else if (const AtomicRMWInst *At = dyn_cast(&I)) { if (At->getPointerAddressSpace() == FlatAddrspace) { ++FlatAddrspaceAccesses; remarkFlatAddrspaceAccess(ORE, F, I); } } else if (const AtomicCmpXchgInst *At = dyn_cast(&I)) { if (At->getPointerAddressSpace() == FlatAddrspace) { ++FlatAddrspaceAccesses; remarkFlatAddrspaceAccess(ORE, F, I); } } } } static void remarkProperty(OptimizationRemarkEmitter &ORE, const Function &F, StringRef Name, int64_t Value) { ORE.emit([&] { OptimizationRemark R(DEBUG_TYPE, Name, &F); R << "in "; identifyFunction(R, F); R << ", " << Name << " = " << itostr(Value); return R; }); } static std::optional parseFnAttrAsInteger(Function &F, StringRef Name) { if (!F.hasFnAttribute(Name)) return std::nullopt; return F.getFnAttributeAsParsedInteger(Name); } void KernelInfo::emitKernelInfo(Function &F, FunctionAnalysisManager &FAM, TargetMachine *TM) { KernelInfo KI; TargetTransformInfo &TheTTI = FAM.getResult(F); KI.FlatAddrspace = TheTTI.getFlatAddressSpace(); // Record function properties. KI.ExternalNotKernel = F.hasExternalLinkage() && !F.hasKernelCallingConv(); for (StringRef Name : {"omp_target_num_teams", "omp_target_thread_limit"}) { if (auto Val = parseFnAttrAsInteger(F, Name)) KI.LaunchBounds.push_back({Name, *Val}); } TheTTI.collectKernelLaunchBounds(F, KI.LaunchBounds); auto &ORE = FAM.getResult(F); for (const auto &BB : F) KI.updateForBB(BB, ORE); #define REMARK_PROPERTY(PROP_NAME) \ remarkProperty(ORE, F, #PROP_NAME, KI.PROP_NAME) REMARK_PROPERTY(ExternalNotKernel); for (auto LB : KI.LaunchBounds) remarkProperty(ORE, F, LB.first, LB.second); REMARK_PROPERTY(Allocas); REMARK_PROPERTY(AllocasStaticSizeSum); REMARK_PROPERTY(AllocasDyn); REMARK_PROPERTY(DirectCalls); REMARK_PROPERTY(IndirectCalls); REMARK_PROPERTY(DirectCallsToDefinedFunctions); REMARK_PROPERTY(InlineAssemblyCalls); REMARK_PROPERTY(Invokes); REMARK_PROPERTY(FlatAddrspaceAccesses); #undef REMARK_PROPERTY return; } PreservedAnalyses KernelInfoPrinter::run(Function &F, FunctionAnalysisManager &AM) { // Skip it if remarks are not enabled as it will do nothing useful. if (F.getContext().getDiagHandlerPtr()->isPassedOptRemarkEnabled(DEBUG_TYPE)) KernelInfo::emitKernelInfo(F, AM, TM); return PreservedAnalyses::all(); }