xref: /openbsd-src/gnu/llvm/llvm/lib/Transforms/IPO/Internalize.cpp (revision 73471bf04ceb096474c7f0fa83b1b65c70a787a1)
109467b48Spatrick //===-- Internalize.cpp - Mark functions internal -------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This pass loops over all of the functions and variables in the input module.
1009467b48Spatrick // If the function or variable does not need to be preserved according to the
1109467b48Spatrick // client supplied callback, it is marked as internal.
1209467b48Spatrick //
1309467b48Spatrick // This transformation would not be legal in a regular compilation, but it gets
1409467b48Spatrick // extra information from the linker about what is safe.
1509467b48Spatrick //
1609467b48Spatrick // For example: Internalizing a function with external linkage. Only if we are
1709467b48Spatrick // told it is only used from within this module, it is safe to do it.
1809467b48Spatrick //
1909467b48Spatrick //===----------------------------------------------------------------------===//
2009467b48Spatrick 
2109467b48Spatrick #include "llvm/Transforms/IPO/Internalize.h"
2209467b48Spatrick #include "llvm/ADT/SmallPtrSet.h"
2309467b48Spatrick #include "llvm/ADT/Statistic.h"
2409467b48Spatrick #include "llvm/ADT/StringSet.h"
25*73471bf0Spatrick #include "llvm/ADT/Triple.h"
2609467b48Spatrick #include "llvm/Analysis/CallGraph.h"
2709467b48Spatrick #include "llvm/IR/Module.h"
2809467b48Spatrick #include "llvm/InitializePasses.h"
2909467b48Spatrick #include "llvm/Pass.h"
3009467b48Spatrick #include "llvm/Support/CommandLine.h"
3109467b48Spatrick #include "llvm/Support/Debug.h"
3209467b48Spatrick #include "llvm/Support/LineIterator.h"
3309467b48Spatrick #include "llvm/Support/MemoryBuffer.h"
3409467b48Spatrick #include "llvm/Support/raw_ostream.h"
3509467b48Spatrick #include "llvm/Transforms/IPO.h"
3609467b48Spatrick #include "llvm/Transforms/Utils/GlobalStatus.h"
37*73471bf0Spatrick #include "llvm/Transforms/Utils/ModuleUtils.h"
3809467b48Spatrick using namespace llvm;
3909467b48Spatrick 
4009467b48Spatrick #define DEBUG_TYPE "internalize"
4109467b48Spatrick 
4209467b48Spatrick STATISTIC(NumAliases, "Number of aliases internalized");
4309467b48Spatrick STATISTIC(NumFunctions, "Number of functions internalized");
4409467b48Spatrick STATISTIC(NumGlobals, "Number of global vars internalized");
4509467b48Spatrick 
4609467b48Spatrick // APIFile - A file which contains a list of symbols that should not be marked
4709467b48Spatrick // external.
4809467b48Spatrick static cl::opt<std::string>
4909467b48Spatrick     APIFile("internalize-public-api-file", cl::value_desc("filename"),
5009467b48Spatrick             cl::desc("A file containing list of symbol names to preserve"));
5109467b48Spatrick 
5209467b48Spatrick // APIList - A list of symbols that should not be marked internal.
5309467b48Spatrick static cl::list<std::string>
5409467b48Spatrick     APIList("internalize-public-api-list", cl::value_desc("list"),
5509467b48Spatrick             cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
5609467b48Spatrick 
5709467b48Spatrick namespace {
5809467b48Spatrick // Helper to load an API list to preserve from file and expose it as a functor
5909467b48Spatrick // for internalization.
6009467b48Spatrick class PreserveAPIList {
6109467b48Spatrick public:
6209467b48Spatrick   PreserveAPIList() {
6309467b48Spatrick     if (!APIFile.empty())
6409467b48Spatrick       LoadFile(APIFile);
6509467b48Spatrick     ExternalNames.insert(APIList.begin(), APIList.end());
6609467b48Spatrick   }
6709467b48Spatrick 
6809467b48Spatrick   bool operator()(const GlobalValue &GV) {
6909467b48Spatrick     return ExternalNames.count(GV.getName());
7009467b48Spatrick   }
7109467b48Spatrick 
7209467b48Spatrick private:
7309467b48Spatrick   // Contains the set of symbols loaded from file
7409467b48Spatrick   StringSet<> ExternalNames;
7509467b48Spatrick 
7609467b48Spatrick   void LoadFile(StringRef Filename) {
7709467b48Spatrick     // Load the APIFile...
7809467b48Spatrick     ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
7909467b48Spatrick         MemoryBuffer::getFile(Filename);
8009467b48Spatrick     if (!Buf) {
8109467b48Spatrick       errs() << "WARNING: Internalize couldn't load file '" << Filename
8209467b48Spatrick              << "'! Continuing as if it's empty.\n";
8309467b48Spatrick       return; // Just continue as if the file were empty
8409467b48Spatrick     }
8509467b48Spatrick     for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
8609467b48Spatrick       ExternalNames.insert(*I);
8709467b48Spatrick   }
8809467b48Spatrick };
8909467b48Spatrick } // end anonymous namespace
9009467b48Spatrick 
9109467b48Spatrick bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
9209467b48Spatrick   // Function must be defined here
9309467b48Spatrick   if (GV.isDeclaration())
9409467b48Spatrick     return true;
9509467b48Spatrick 
9609467b48Spatrick   // Available externally is really just a "declaration with a body".
9709467b48Spatrick   if (GV.hasAvailableExternallyLinkage())
9809467b48Spatrick     return true;
9909467b48Spatrick 
10009467b48Spatrick   // Assume that dllexported symbols are referenced elsewhere
10109467b48Spatrick   if (GV.hasDLLExportStorageClass())
10209467b48Spatrick     return true;
10309467b48Spatrick 
104*73471bf0Spatrick   // As the name suggests, externally initialized variables need preserving as
105*73471bf0Spatrick   // they would be initialized elsewhere externally.
106*73471bf0Spatrick   if (const auto *G = dyn_cast<GlobalVariable>(&GV))
107*73471bf0Spatrick     if (G->isExternallyInitialized())
108*73471bf0Spatrick       return true;
109*73471bf0Spatrick 
11009467b48Spatrick   // Already local, has nothing to do.
11109467b48Spatrick   if (GV.hasLocalLinkage())
11209467b48Spatrick     return false;
11309467b48Spatrick 
11409467b48Spatrick   // Check some special cases
11509467b48Spatrick   if (AlwaysPreserved.count(GV.getName()))
11609467b48Spatrick     return true;
11709467b48Spatrick 
11809467b48Spatrick   return MustPreserveGV(GV);
11909467b48Spatrick }
12009467b48Spatrick 
12109467b48Spatrick bool InternalizePass::maybeInternalize(
122*73471bf0Spatrick     GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) {
123*73471bf0Spatrick   SmallString<0> ComdatName;
12409467b48Spatrick   if (Comdat *C = GV.getComdat()) {
125*73471bf0Spatrick     // For GlobalAlias, C is the aliasee object's comdat which may have been
126*73471bf0Spatrick     // redirected. So ComdatMap may not contain C.
127*73471bf0Spatrick     if (ComdatMap.lookup(C).External)
12809467b48Spatrick       return false;
12909467b48Spatrick 
130*73471bf0Spatrick     if (auto *GO = dyn_cast<GlobalObject>(&GV)) {
131*73471bf0Spatrick       // If a comdat with one member is not externally visible, we can drop it.
132*73471bf0Spatrick       // Otherwise, the comdat can be used to establish dependencies among the
133*73471bf0Spatrick       // group of sections. Thus we have to keep the comdat but switch it to
134*73471bf0Spatrick       // nodeduplicate.
135*73471bf0Spatrick       // Note: nodeduplicate is not necessary for COFF. wasm doesn't support
136*73471bf0Spatrick       // nodeduplicate.
137*73471bf0Spatrick       ComdatInfo &Info = ComdatMap.find(C)->second;
138*73471bf0Spatrick       if (Info.Size == 1)
13909467b48Spatrick         GO->setComdat(nullptr);
140*73471bf0Spatrick       else if (!IsWasm)
141*73471bf0Spatrick         C->setSelectionKind(Comdat::NoDeduplicate);
142*73471bf0Spatrick     }
14309467b48Spatrick 
14409467b48Spatrick     if (GV.hasLocalLinkage())
14509467b48Spatrick       return false;
14609467b48Spatrick   } else {
14709467b48Spatrick     if (GV.hasLocalLinkage())
14809467b48Spatrick       return false;
14909467b48Spatrick 
15009467b48Spatrick     if (shouldPreserveGV(GV))
15109467b48Spatrick       return false;
15209467b48Spatrick   }
15309467b48Spatrick 
15409467b48Spatrick   GV.setVisibility(GlobalValue::DefaultVisibility);
15509467b48Spatrick   GV.setLinkage(GlobalValue::InternalLinkage);
15609467b48Spatrick   return true;
15709467b48Spatrick }
15809467b48Spatrick 
159*73471bf0Spatrick // If GV is part of a comdat and is externally visible, update the comdat size
160*73471bf0Spatrick // and keep track of its comdat so that we don't internalize any of its members.
161*73471bf0Spatrick void InternalizePass::checkComdat(
162*73471bf0Spatrick     GlobalValue &GV, DenseMap<const Comdat *, ComdatInfo> &ComdatMap) {
16309467b48Spatrick   Comdat *C = GV.getComdat();
16409467b48Spatrick   if (!C)
16509467b48Spatrick     return;
16609467b48Spatrick 
167*73471bf0Spatrick   ComdatInfo &Info = ComdatMap.try_emplace(C).first->second;
168*73471bf0Spatrick   ++Info.Size;
16909467b48Spatrick   if (shouldPreserveGV(GV))
170*73471bf0Spatrick     Info.External = true;
17109467b48Spatrick }
17209467b48Spatrick 
17309467b48Spatrick bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
17409467b48Spatrick   bool Changed = false;
17509467b48Spatrick   CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
17609467b48Spatrick 
177*73471bf0Spatrick   SmallVector<GlobalValue *, 4> Used;
17809467b48Spatrick   collectUsedGlobalVariables(M, Used, false);
17909467b48Spatrick 
180*73471bf0Spatrick   // Collect comdat size and visiblity information for the module.
181*73471bf0Spatrick   DenseMap<const Comdat *, ComdatInfo> ComdatMap;
18209467b48Spatrick   if (!M.getComdatSymbolTable().empty()) {
18309467b48Spatrick     for (Function &F : M)
184*73471bf0Spatrick       checkComdat(F, ComdatMap);
18509467b48Spatrick     for (GlobalVariable &GV : M.globals())
186*73471bf0Spatrick       checkComdat(GV, ComdatMap);
18709467b48Spatrick     for (GlobalAlias &GA : M.aliases())
188*73471bf0Spatrick       checkComdat(GA, ComdatMap);
18909467b48Spatrick   }
19009467b48Spatrick 
19109467b48Spatrick   // We must assume that globals in llvm.used have a reference that not even
19209467b48Spatrick   // the linker can see, so we don't internalize them.
19309467b48Spatrick   // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
19409467b48Spatrick   // linker can drop those symbols. If this pass is running as part of LTO,
19509467b48Spatrick   // one might think that it could just drop llvm.compiler.used. The problem
19609467b48Spatrick   // is that even in LTO llvm doesn't see every reference. For example,
19709467b48Spatrick   // we don't see references from function local inline assembly. To be
19809467b48Spatrick   // conservative, we internalize symbols in llvm.compiler.used, but we
19909467b48Spatrick   // keep llvm.compiler.used so that the symbol is not deleted by llvm.
20009467b48Spatrick   for (GlobalValue *V : Used) {
20109467b48Spatrick     AlwaysPreserved.insert(V->getName());
20209467b48Spatrick   }
20309467b48Spatrick 
20409467b48Spatrick   // Mark all functions not in the api as internal.
205*73471bf0Spatrick   IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
20609467b48Spatrick   for (Function &I : M) {
207*73471bf0Spatrick     if (!maybeInternalize(I, ComdatMap))
20809467b48Spatrick       continue;
20909467b48Spatrick     Changed = true;
21009467b48Spatrick 
21109467b48Spatrick     if (ExternalNode)
21209467b48Spatrick       // Remove a callgraph edge from the external node to this function.
21309467b48Spatrick       ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
21409467b48Spatrick 
21509467b48Spatrick     ++NumFunctions;
21609467b48Spatrick     LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
21709467b48Spatrick   }
21809467b48Spatrick 
21909467b48Spatrick   // Never internalize the llvm.used symbol.  It is used to implement
22009467b48Spatrick   // attribute((used)).
22109467b48Spatrick   // FIXME: Shouldn't this just filter on llvm.metadata section??
22209467b48Spatrick   AlwaysPreserved.insert("llvm.used");
22309467b48Spatrick   AlwaysPreserved.insert("llvm.compiler.used");
22409467b48Spatrick 
22509467b48Spatrick   // Never internalize anchors used by the machine module info, else the info
22609467b48Spatrick   // won't find them.  (see MachineModuleInfo.)
22709467b48Spatrick   AlwaysPreserved.insert("llvm.global_ctors");
22809467b48Spatrick   AlwaysPreserved.insert("llvm.global_dtors");
22909467b48Spatrick   AlwaysPreserved.insert("llvm.global.annotations");
23009467b48Spatrick 
23109467b48Spatrick   // Never internalize symbols code-gen inserts.
23209467b48Spatrick   // FIXME: We should probably add this (and the __stack_chk_guard) via some
23309467b48Spatrick   // type of call-back in CodeGen.
23409467b48Spatrick   AlwaysPreserved.insert("__stack_chk_fail");
235*73471bf0Spatrick   if (Triple(M.getTargetTriple()).isOSAIX())
236*73471bf0Spatrick     AlwaysPreserved.insert("__ssp_canary_word");
237*73471bf0Spatrick   else
23809467b48Spatrick     AlwaysPreserved.insert("__stack_chk_guard");
23909467b48Spatrick 
24009467b48Spatrick   // Mark all global variables with initializers that are not in the api as
24109467b48Spatrick   // internal as well.
24209467b48Spatrick   for (auto &GV : M.globals()) {
243*73471bf0Spatrick     if (!maybeInternalize(GV, ComdatMap))
24409467b48Spatrick       continue;
24509467b48Spatrick     Changed = true;
24609467b48Spatrick 
24709467b48Spatrick     ++NumGlobals;
24809467b48Spatrick     LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
24909467b48Spatrick   }
25009467b48Spatrick 
25109467b48Spatrick   // Mark all aliases that are not in the api as internal as well.
25209467b48Spatrick   for (auto &GA : M.aliases()) {
253*73471bf0Spatrick     if (!maybeInternalize(GA, ComdatMap))
25409467b48Spatrick       continue;
25509467b48Spatrick     Changed = true;
25609467b48Spatrick 
25709467b48Spatrick     ++NumAliases;
25809467b48Spatrick     LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
25909467b48Spatrick   }
26009467b48Spatrick 
26109467b48Spatrick   return Changed;
26209467b48Spatrick }
26309467b48Spatrick 
26409467b48Spatrick InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
26509467b48Spatrick 
26609467b48Spatrick PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) {
26709467b48Spatrick   if (!internalizeModule(M, AM.getCachedResult<CallGraphAnalysis>(M)))
26809467b48Spatrick     return PreservedAnalyses::all();
26909467b48Spatrick 
27009467b48Spatrick   PreservedAnalyses PA;
27109467b48Spatrick   PA.preserve<CallGraphAnalysis>();
27209467b48Spatrick   return PA;
27309467b48Spatrick }
27409467b48Spatrick 
27509467b48Spatrick namespace {
27609467b48Spatrick class InternalizeLegacyPass : public ModulePass {
27709467b48Spatrick   // Client supplied callback to control wheter a symbol must be preserved.
27809467b48Spatrick   std::function<bool(const GlobalValue &)> MustPreserveGV;
27909467b48Spatrick 
28009467b48Spatrick public:
28109467b48Spatrick   static char ID; // Pass identification, replacement for typeid
28209467b48Spatrick 
28309467b48Spatrick   InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
28409467b48Spatrick 
28509467b48Spatrick   InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
28609467b48Spatrick       : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
28709467b48Spatrick     initializeInternalizeLegacyPassPass(*PassRegistry::getPassRegistry());
28809467b48Spatrick   }
28909467b48Spatrick 
29009467b48Spatrick   bool runOnModule(Module &M) override {
29109467b48Spatrick     if (skipModule(M))
29209467b48Spatrick       return false;
29309467b48Spatrick 
29409467b48Spatrick     CallGraphWrapperPass *CGPass =
29509467b48Spatrick         getAnalysisIfAvailable<CallGraphWrapperPass>();
29609467b48Spatrick     CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
29709467b48Spatrick     return internalizeModule(M, MustPreserveGV, CG);
29809467b48Spatrick   }
29909467b48Spatrick 
30009467b48Spatrick   void getAnalysisUsage(AnalysisUsage &AU) const override {
30109467b48Spatrick     AU.setPreservesCFG();
30209467b48Spatrick     AU.addPreserved<CallGraphWrapperPass>();
30309467b48Spatrick   }
30409467b48Spatrick };
30509467b48Spatrick }
30609467b48Spatrick 
30709467b48Spatrick char InternalizeLegacyPass::ID = 0;
30809467b48Spatrick INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
30909467b48Spatrick                 "Internalize Global Symbols", false, false)
31009467b48Spatrick 
31109467b48Spatrick ModulePass *llvm::createInternalizePass() {
31209467b48Spatrick   return new InternalizeLegacyPass();
31309467b48Spatrick }
31409467b48Spatrick 
31509467b48Spatrick ModulePass *llvm::createInternalizePass(
31609467b48Spatrick     std::function<bool(const GlobalValue &)> MustPreserveGV) {
31709467b48Spatrick   return new InternalizeLegacyPass(std::move(MustPreserveGV));
31809467b48Spatrick }
319