10b57cec5SDimitry Andric //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This pass marks all internal functions as always_inline and creates 110b57cec5SDimitry Andric /// duplicates of all other functions and marks the duplicates as always_inline. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "AMDGPU.h" 160b57cec5SDimitry Andric #include "AMDGPUTargetMachine.h" 170b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 18349cc55cSDimitry Andric #include "llvm/CodeGen/CommandFlags.h" 190b57cec5SDimitry Andric #include "llvm/IR/Module.h" 20e8d8bef9SDimitry Andric #include "llvm/Pass.h" 21e8d8bef9SDimitry Andric #include "llvm/Support/CommandLine.h" 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric using namespace llvm; 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric namespace { 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric static cl::opt<bool> StressCalls( 280b57cec5SDimitry Andric "amdgpu-stress-function-calls", 290b57cec5SDimitry Andric cl::Hidden, 300b57cec5SDimitry Andric cl::desc("Force all functions to be noinline"), 310b57cec5SDimitry Andric cl::init(false)); 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric class AMDGPUAlwaysInline : public ModulePass { 340b57cec5SDimitry Andric bool GlobalOpt; 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric public: 370b57cec5SDimitry Andric static char ID; 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric AMDGPUAlwaysInline(bool GlobalOpt = false) : 400b57cec5SDimitry Andric ModulePass(ID), GlobalOpt(GlobalOpt) { } 410b57cec5SDimitry Andric bool runOnModule(Module &M) override; 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 440b57cec5SDimitry Andric AU.setPreservesAll(); 450b57cec5SDimitry Andric } 460b57cec5SDimitry Andric }; 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric } // End anonymous namespace 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", 510b57cec5SDimitry Andric "AMDGPU Inline All Functions", false, false) 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric char AMDGPUAlwaysInline::ID = 0; 540b57cec5SDimitry Andric 55e8d8bef9SDimitry Andric static void 56e8d8bef9SDimitry Andric recursivelyVisitUsers(GlobalValue &GV, 570b57cec5SDimitry Andric SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) { 58e8d8bef9SDimitry Andric SmallVector<User *, 16> Stack(GV.users()); 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric SmallPtrSet<const Value *, 8> Visited; 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric while (!Stack.empty()) { 630b57cec5SDimitry Andric User *U = Stack.pop_back_val(); 640b57cec5SDimitry Andric if (!Visited.insert(U).second) 650b57cec5SDimitry Andric continue; 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(U)) { 680b57cec5SDimitry Andric Function *F = I->getParent()->getParent(); 690b57cec5SDimitry Andric if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { 705ffd83dbSDimitry Andric // FIXME: This is a horrible hack. We should always respect noinline, 715ffd83dbSDimitry Andric // and just let us hit the error when we can't handle this. 725ffd83dbSDimitry Andric // 735ffd83dbSDimitry Andric // Unfortunately, clang adds noinline to all functions at -O0. We have 74349cc55cSDimitry Andric // to override this here until that's fixed. 755ffd83dbSDimitry Andric F->removeFnAttr(Attribute::NoInline); 765ffd83dbSDimitry Andric 770b57cec5SDimitry Andric FuncsToAlwaysInline.insert(F); 780b57cec5SDimitry Andric Stack.push_back(F); 790b57cec5SDimitry Andric } 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric // No need to look at further users, but we do need to inline any callers. 820b57cec5SDimitry Andric continue; 830b57cec5SDimitry Andric } 840b57cec5SDimitry Andric 85e8d8bef9SDimitry Andric append_range(Stack, U->users()); 860b57cec5SDimitry Andric } 870b57cec5SDimitry Andric } 880b57cec5SDimitry Andric 89e8d8bef9SDimitry Andric static bool alwaysInlineImpl(Module &M, bool GlobalOpt) { 900b57cec5SDimitry Andric std::vector<GlobalAlias*> AliasesToRemove; 910b57cec5SDimitry Andric 92*0fca6ea1SDimitry Andric bool Changed = false; 930b57cec5SDimitry Andric SmallPtrSet<Function *, 8> FuncsToAlwaysInline; 940b57cec5SDimitry Andric SmallPtrSet<Function *, 8> FuncsToNoInline; 95349cc55cSDimitry Andric Triple TT(M.getTargetTriple()); 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric for (GlobalAlias &A : M.aliases()) { 980b57cec5SDimitry Andric if (Function* F = dyn_cast<Function>(A.getAliasee())) { 99349cc55cSDimitry Andric if (TT.getArch() == Triple::amdgcn && 100349cc55cSDimitry Andric A.getLinkage() != GlobalValue::InternalLinkage) 101349cc55cSDimitry Andric continue; 102*0fca6ea1SDimitry Andric Changed = true; 1030b57cec5SDimitry Andric A.replaceAllUsesWith(F); 1040b57cec5SDimitry Andric AliasesToRemove.push_back(&A); 1050b57cec5SDimitry Andric } 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric // FIXME: If the aliasee isn't a function, it's some kind of constant expr 1080b57cec5SDimitry Andric // cast that won't be inlined through. 1090b57cec5SDimitry Andric } 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric if (GlobalOpt) { 1120b57cec5SDimitry Andric for (GlobalAlias* A : AliasesToRemove) { 1130b57cec5SDimitry Andric A->eraseFromParent(); 1140b57cec5SDimitry Andric } 1150b57cec5SDimitry Andric } 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric // Always force inlining of any function that uses an LDS global address. This 1180b57cec5SDimitry Andric // is something of a workaround because we don't have a way of supporting LDS 1190b57cec5SDimitry Andric // objects defined in functions. LDS is always allocated by a kernel, and it 1200b57cec5SDimitry Andric // is difficult to manage LDS usage if a function may be used by multiple 1210b57cec5SDimitry Andric // kernels. 1220b57cec5SDimitry Andric // 1230b57cec5SDimitry Andric // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this 1240b57cec5SDimitry Andric // should only appear when IPO passes manages to move LDs defined in a kernel 1250b57cec5SDimitry Andric // into a single user function. 1260b57cec5SDimitry Andric 1270b57cec5SDimitry Andric for (GlobalVariable &GV : M.globals()) { 1280b57cec5SDimitry Andric // TODO: Region address 129480093f4SDimitry Andric unsigned AS = GV.getAddressSpace(); 130fe6060f1SDimitry Andric if ((AS == AMDGPUAS::REGION_ADDRESS) || 131fe6060f1SDimitry Andric (AS == AMDGPUAS::LOCAL_ADDRESS && 13206c3fb27SDimitry Andric (!AMDGPUTargetMachine::EnableLowerModuleLDS))) 1330b57cec5SDimitry Andric recursivelyVisitUsers(GV, FuncsToAlwaysInline); 1340b57cec5SDimitry Andric } 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) { 1370b57cec5SDimitry Andric auto IncompatAttr 1380b57cec5SDimitry Andric = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline; 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric for (Function &F : M) { 1410b57cec5SDimitry Andric if (!F.isDeclaration() && !F.use_empty() && 1420b57cec5SDimitry Andric !F.hasFnAttribute(IncompatAttr)) { 1430b57cec5SDimitry Andric if (StressCalls) { 1440b57cec5SDimitry Andric if (!FuncsToAlwaysInline.count(&F)) 1450b57cec5SDimitry Andric FuncsToNoInline.insert(&F); 1460b57cec5SDimitry Andric } else 1470b57cec5SDimitry Andric FuncsToAlwaysInline.insert(&F); 1480b57cec5SDimitry Andric } 1490b57cec5SDimitry Andric } 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric for (Function *F : FuncsToAlwaysInline) 1530b57cec5SDimitry Andric F->addFnAttr(Attribute::AlwaysInline); 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric for (Function *F : FuncsToNoInline) 1560b57cec5SDimitry Andric F->addFnAttr(Attribute::NoInline); 1570b57cec5SDimitry Andric 158*0fca6ea1SDimitry Andric return Changed || !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); 1590b57cec5SDimitry Andric } 1600b57cec5SDimitry Andric 161e8d8bef9SDimitry Andric bool AMDGPUAlwaysInline::runOnModule(Module &M) { 162e8d8bef9SDimitry Andric return alwaysInlineImpl(M, GlobalOpt); 163e8d8bef9SDimitry Andric } 164e8d8bef9SDimitry Andric 1650b57cec5SDimitry Andric ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { 1660b57cec5SDimitry Andric return new AMDGPUAlwaysInline(GlobalOpt); 1670b57cec5SDimitry Andric } 1680b57cec5SDimitry Andric 169e8d8bef9SDimitry Andric PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M, 170e8d8bef9SDimitry Andric ModuleAnalysisManager &AM) { 171*0fca6ea1SDimitry Andric const bool Changed = alwaysInlineImpl(M, GlobalOpt); 172*0fca6ea1SDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 173e8d8bef9SDimitry Andric } 174