xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This pass marks all internal functions as always_inline and creates
110b57cec5SDimitry Andric /// duplicates of all other functions and marks the duplicates as always_inline.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AMDGPU.h"
160b57cec5SDimitry Andric #include "AMDGPUTargetMachine.h"
170b57cec5SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
18349cc55cSDimitry Andric #include "llvm/CodeGen/CommandFlags.h"
190b57cec5SDimitry Andric #include "llvm/IR/Module.h"
20e8d8bef9SDimitry Andric #include "llvm/Pass.h"
21e8d8bef9SDimitry Andric #include "llvm/Support/CommandLine.h"
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric using namespace llvm;
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric namespace {
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric static cl::opt<bool> StressCalls(
280b57cec5SDimitry Andric   "amdgpu-stress-function-calls",
290b57cec5SDimitry Andric   cl::Hidden,
300b57cec5SDimitry Andric   cl::desc("Force all functions to be noinline"),
310b57cec5SDimitry Andric   cl::init(false));
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric class AMDGPUAlwaysInline : public ModulePass {
340b57cec5SDimitry Andric   bool GlobalOpt;
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric public:
370b57cec5SDimitry Andric   static char ID;
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric   AMDGPUAlwaysInline(bool GlobalOpt = false) :
400b57cec5SDimitry Andric     ModulePass(ID), GlobalOpt(GlobalOpt) { }
410b57cec5SDimitry Andric   bool runOnModule(Module &M) override;
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
440b57cec5SDimitry Andric     AU.setPreservesAll();
450b57cec5SDimitry Andric   }
460b57cec5SDimitry Andric };
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric } // End anonymous namespace
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
510b57cec5SDimitry Andric                 "AMDGPU Inline All Functions", false, false)
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric char AMDGPUAlwaysInline::ID = 0;
540b57cec5SDimitry Andric 
55e8d8bef9SDimitry Andric static void
56e8d8bef9SDimitry Andric recursivelyVisitUsers(GlobalValue &GV,
570b57cec5SDimitry Andric                       SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
58e8d8bef9SDimitry Andric   SmallVector<User *, 16> Stack(GV.users());
590b57cec5SDimitry Andric 
600b57cec5SDimitry Andric   SmallPtrSet<const Value *, 8> Visited;
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric   while (!Stack.empty()) {
630b57cec5SDimitry Andric     User *U = Stack.pop_back_val();
640b57cec5SDimitry Andric     if (!Visited.insert(U).second)
650b57cec5SDimitry Andric       continue;
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric     if (Instruction *I = dyn_cast<Instruction>(U)) {
680b57cec5SDimitry Andric       Function *F = I->getParent()->getParent();
690b57cec5SDimitry Andric       if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
705ffd83dbSDimitry Andric         // FIXME: This is a horrible hack. We should always respect noinline,
715ffd83dbSDimitry Andric         // and just let us hit the error when we can't handle this.
725ffd83dbSDimitry Andric         //
735ffd83dbSDimitry Andric         // Unfortunately, clang adds noinline to all functions at -O0. We have
74349cc55cSDimitry Andric         // to override this here until that's fixed.
755ffd83dbSDimitry Andric         F->removeFnAttr(Attribute::NoInline);
765ffd83dbSDimitry Andric 
770b57cec5SDimitry Andric         FuncsToAlwaysInline.insert(F);
780b57cec5SDimitry Andric         Stack.push_back(F);
790b57cec5SDimitry Andric       }
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric       // No need to look at further users, but we do need to inline any callers.
820b57cec5SDimitry Andric       continue;
830b57cec5SDimitry Andric     }
840b57cec5SDimitry Andric 
85e8d8bef9SDimitry Andric     append_range(Stack, U->users());
860b57cec5SDimitry Andric   }
870b57cec5SDimitry Andric }
880b57cec5SDimitry Andric 
89e8d8bef9SDimitry Andric static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
900b57cec5SDimitry Andric   std::vector<GlobalAlias*> AliasesToRemove;
910b57cec5SDimitry Andric 
92*0fca6ea1SDimitry Andric   bool Changed = false;
930b57cec5SDimitry Andric   SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
940b57cec5SDimitry Andric   SmallPtrSet<Function *, 8> FuncsToNoInline;
95349cc55cSDimitry Andric   Triple TT(M.getTargetTriple());
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric   for (GlobalAlias &A : M.aliases()) {
980b57cec5SDimitry Andric     if (Function* F = dyn_cast<Function>(A.getAliasee())) {
99349cc55cSDimitry Andric       if (TT.getArch() == Triple::amdgcn &&
100349cc55cSDimitry Andric           A.getLinkage() != GlobalValue::InternalLinkage)
101349cc55cSDimitry Andric         continue;
102*0fca6ea1SDimitry Andric       Changed = true;
1030b57cec5SDimitry Andric       A.replaceAllUsesWith(F);
1040b57cec5SDimitry Andric       AliasesToRemove.push_back(&A);
1050b57cec5SDimitry Andric     }
1060b57cec5SDimitry Andric 
1070b57cec5SDimitry Andric     // FIXME: If the aliasee isn't a function, it's some kind of constant expr
1080b57cec5SDimitry Andric     // cast that won't be inlined through.
1090b57cec5SDimitry Andric   }
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric   if (GlobalOpt) {
1120b57cec5SDimitry Andric     for (GlobalAlias* A : AliasesToRemove) {
1130b57cec5SDimitry Andric       A->eraseFromParent();
1140b57cec5SDimitry Andric     }
1150b57cec5SDimitry Andric   }
1160b57cec5SDimitry Andric 
1170b57cec5SDimitry Andric   // Always force inlining of any function that uses an LDS global address. This
1180b57cec5SDimitry Andric   // is something of a workaround because we don't have a way of supporting LDS
1190b57cec5SDimitry Andric   // objects defined in functions. LDS is always allocated by a kernel, and it
1200b57cec5SDimitry Andric   // is difficult to manage LDS usage if a function may be used by multiple
1210b57cec5SDimitry Andric   // kernels.
1220b57cec5SDimitry Andric   //
1230b57cec5SDimitry Andric   // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
1240b57cec5SDimitry Andric   // should only appear when IPO passes manages to move LDs defined in a kernel
1250b57cec5SDimitry Andric   // into a single user function.
1260b57cec5SDimitry Andric 
1270b57cec5SDimitry Andric   for (GlobalVariable &GV : M.globals()) {
1280b57cec5SDimitry Andric     // TODO: Region address
129480093f4SDimitry Andric     unsigned AS = GV.getAddressSpace();
130fe6060f1SDimitry Andric     if ((AS == AMDGPUAS::REGION_ADDRESS) ||
131fe6060f1SDimitry Andric         (AS == AMDGPUAS::LOCAL_ADDRESS &&
13206c3fb27SDimitry Andric          (!AMDGPUTargetMachine::EnableLowerModuleLDS)))
1330b57cec5SDimitry Andric       recursivelyVisitUsers(GV, FuncsToAlwaysInline);
1340b57cec5SDimitry Andric   }
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric   if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
1370b57cec5SDimitry Andric     auto IncompatAttr
1380b57cec5SDimitry Andric       = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric     for (Function &F : M) {
1410b57cec5SDimitry Andric       if (!F.isDeclaration() && !F.use_empty() &&
1420b57cec5SDimitry Andric           !F.hasFnAttribute(IncompatAttr)) {
1430b57cec5SDimitry Andric         if (StressCalls) {
1440b57cec5SDimitry Andric           if (!FuncsToAlwaysInline.count(&F))
1450b57cec5SDimitry Andric             FuncsToNoInline.insert(&F);
1460b57cec5SDimitry Andric         } else
1470b57cec5SDimitry Andric           FuncsToAlwaysInline.insert(&F);
1480b57cec5SDimitry Andric       }
1490b57cec5SDimitry Andric     }
1500b57cec5SDimitry Andric   }
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric   for (Function *F : FuncsToAlwaysInline)
1530b57cec5SDimitry Andric     F->addFnAttr(Attribute::AlwaysInline);
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric   for (Function *F : FuncsToNoInline)
1560b57cec5SDimitry Andric     F->addFnAttr(Attribute::NoInline);
1570b57cec5SDimitry Andric 
158*0fca6ea1SDimitry Andric   return Changed || !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
1590b57cec5SDimitry Andric }
1600b57cec5SDimitry Andric 
161e8d8bef9SDimitry Andric bool AMDGPUAlwaysInline::runOnModule(Module &M) {
162e8d8bef9SDimitry Andric   return alwaysInlineImpl(M, GlobalOpt);
163e8d8bef9SDimitry Andric }
164e8d8bef9SDimitry Andric 
1650b57cec5SDimitry Andric ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
1660b57cec5SDimitry Andric   return new AMDGPUAlwaysInline(GlobalOpt);
1670b57cec5SDimitry Andric }
1680b57cec5SDimitry Andric 
169e8d8bef9SDimitry Andric PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M,
170e8d8bef9SDimitry Andric                                               ModuleAnalysisManager &AM) {
171*0fca6ea1SDimitry Andric   const bool Changed = alwaysInlineImpl(M, GlobalOpt);
172*0fca6ea1SDimitry Andric   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
173e8d8bef9SDimitry Andric }
174