1*09467b48Spatrick //===-- Internalize.cpp - Mark functions internal -------------------------===// 2*09467b48Spatrick // 3*09467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*09467b48Spatrick // See https://llvm.org/LICENSE.txt for license information. 5*09467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*09467b48Spatrick // 7*09467b48Spatrick //===----------------------------------------------------------------------===// 8*09467b48Spatrick // 9*09467b48Spatrick // This pass loops over all of the functions and variables in the input module. 10*09467b48Spatrick // If the function or variable does not need to be preserved according to the 11*09467b48Spatrick // client supplied callback, it is marked as internal. 12*09467b48Spatrick // 13*09467b48Spatrick // This transformation would not be legal in a regular compilation, but it gets 14*09467b48Spatrick // extra information from the linker about what is safe. 15*09467b48Spatrick // 16*09467b48Spatrick // For example: Internalizing a function with external linkage. Only if we are 17*09467b48Spatrick // told it is only used from within this module, it is safe to do it. 18*09467b48Spatrick // 19*09467b48Spatrick //===----------------------------------------------------------------------===// 20*09467b48Spatrick 21*09467b48Spatrick #include "llvm/Transforms/IPO/Internalize.h" 22*09467b48Spatrick #include "llvm/ADT/SmallPtrSet.h" 23*09467b48Spatrick #include "llvm/ADT/Statistic.h" 24*09467b48Spatrick #include "llvm/ADT/StringSet.h" 25*09467b48Spatrick #include "llvm/Analysis/CallGraph.h" 26*09467b48Spatrick #include "llvm/IR/Module.h" 27*09467b48Spatrick #include "llvm/InitializePasses.h" 28*09467b48Spatrick #include "llvm/Pass.h" 29*09467b48Spatrick #include "llvm/Support/CommandLine.h" 30*09467b48Spatrick #include "llvm/Support/Debug.h" 31*09467b48Spatrick #include "llvm/Support/LineIterator.h" 32*09467b48Spatrick #include "llvm/Support/MemoryBuffer.h" 33*09467b48Spatrick #include "llvm/Support/raw_ostream.h" 34*09467b48Spatrick #include "llvm/Transforms/IPO.h" 35*09467b48Spatrick #include "llvm/Transforms/Utils/GlobalStatus.h" 36*09467b48Spatrick using namespace llvm; 37*09467b48Spatrick 38*09467b48Spatrick #define DEBUG_TYPE "internalize" 39*09467b48Spatrick 40*09467b48Spatrick STATISTIC(NumAliases, "Number of aliases internalized"); 41*09467b48Spatrick STATISTIC(NumFunctions, "Number of functions internalized"); 42*09467b48Spatrick STATISTIC(NumGlobals, "Number of global vars internalized"); 43*09467b48Spatrick 44*09467b48Spatrick // APIFile - A file which contains a list of symbols that should not be marked 45*09467b48Spatrick // external. 46*09467b48Spatrick static cl::opt<std::string> 47*09467b48Spatrick APIFile("internalize-public-api-file", cl::value_desc("filename"), 48*09467b48Spatrick cl::desc("A file containing list of symbol names to preserve")); 49*09467b48Spatrick 50*09467b48Spatrick // APIList - A list of symbols that should not be marked internal. 51*09467b48Spatrick static cl::list<std::string> 52*09467b48Spatrick APIList("internalize-public-api-list", cl::value_desc("list"), 53*09467b48Spatrick cl::desc("A list of symbol names to preserve"), cl::CommaSeparated); 54*09467b48Spatrick 55*09467b48Spatrick namespace { 56*09467b48Spatrick // Helper to load an API list to preserve from file and expose it as a functor 57*09467b48Spatrick // for internalization. 58*09467b48Spatrick class PreserveAPIList { 59*09467b48Spatrick public: 60*09467b48Spatrick PreserveAPIList() { 61*09467b48Spatrick if (!APIFile.empty()) 62*09467b48Spatrick LoadFile(APIFile); 63*09467b48Spatrick ExternalNames.insert(APIList.begin(), APIList.end()); 64*09467b48Spatrick } 65*09467b48Spatrick 66*09467b48Spatrick bool operator()(const GlobalValue &GV) { 67*09467b48Spatrick return ExternalNames.count(GV.getName()); 68*09467b48Spatrick } 69*09467b48Spatrick 70*09467b48Spatrick private: 71*09467b48Spatrick // Contains the set of symbols loaded from file 72*09467b48Spatrick StringSet<> ExternalNames; 73*09467b48Spatrick 74*09467b48Spatrick void LoadFile(StringRef Filename) { 75*09467b48Spatrick // Load the APIFile... 76*09467b48Spatrick ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = 77*09467b48Spatrick MemoryBuffer::getFile(Filename); 78*09467b48Spatrick if (!Buf) { 79*09467b48Spatrick errs() << "WARNING: Internalize couldn't load file '" << Filename 80*09467b48Spatrick << "'! Continuing as if it's empty.\n"; 81*09467b48Spatrick return; // Just continue as if the file were empty 82*09467b48Spatrick } 83*09467b48Spatrick for (line_iterator I(*Buf->get(), true), E; I != E; ++I) 84*09467b48Spatrick ExternalNames.insert(*I); 85*09467b48Spatrick } 86*09467b48Spatrick }; 87*09467b48Spatrick } // end anonymous namespace 88*09467b48Spatrick 89*09467b48Spatrick bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) { 90*09467b48Spatrick // Function must be defined here 91*09467b48Spatrick if (GV.isDeclaration()) 92*09467b48Spatrick return true; 93*09467b48Spatrick 94*09467b48Spatrick // Available externally is really just a "declaration with a body". 95*09467b48Spatrick if (GV.hasAvailableExternallyLinkage()) 96*09467b48Spatrick return true; 97*09467b48Spatrick 98*09467b48Spatrick // Assume that dllexported symbols are referenced elsewhere 99*09467b48Spatrick if (GV.hasDLLExportStorageClass()) 100*09467b48Spatrick return true; 101*09467b48Spatrick 102*09467b48Spatrick // Already local, has nothing to do. 103*09467b48Spatrick if (GV.hasLocalLinkage()) 104*09467b48Spatrick return false; 105*09467b48Spatrick 106*09467b48Spatrick // Check some special cases 107*09467b48Spatrick if (AlwaysPreserved.count(GV.getName())) 108*09467b48Spatrick return true; 109*09467b48Spatrick 110*09467b48Spatrick return MustPreserveGV(GV); 111*09467b48Spatrick } 112*09467b48Spatrick 113*09467b48Spatrick bool InternalizePass::maybeInternalize( 114*09467b48Spatrick GlobalValue &GV, const DenseSet<const Comdat *> &ExternalComdats) { 115*09467b48Spatrick if (Comdat *C = GV.getComdat()) { 116*09467b48Spatrick if (ExternalComdats.count(C)) 117*09467b48Spatrick return false; 118*09467b48Spatrick 119*09467b48Spatrick // If a comdat is not externally visible we can drop it. 120*09467b48Spatrick if (auto GO = dyn_cast<GlobalObject>(&GV)) 121*09467b48Spatrick GO->setComdat(nullptr); 122*09467b48Spatrick 123*09467b48Spatrick if (GV.hasLocalLinkage()) 124*09467b48Spatrick return false; 125*09467b48Spatrick } else { 126*09467b48Spatrick if (GV.hasLocalLinkage()) 127*09467b48Spatrick return false; 128*09467b48Spatrick 129*09467b48Spatrick if (shouldPreserveGV(GV)) 130*09467b48Spatrick return false; 131*09467b48Spatrick } 132*09467b48Spatrick 133*09467b48Spatrick GV.setVisibility(GlobalValue::DefaultVisibility); 134*09467b48Spatrick GV.setLinkage(GlobalValue::InternalLinkage); 135*09467b48Spatrick return true; 136*09467b48Spatrick } 137*09467b48Spatrick 138*09467b48Spatrick // If GV is part of a comdat and is externally visible, keep track of its 139*09467b48Spatrick // comdat so that we don't internalize any of its members. 140*09467b48Spatrick void InternalizePass::checkComdatVisibility( 141*09467b48Spatrick GlobalValue &GV, DenseSet<const Comdat *> &ExternalComdats) { 142*09467b48Spatrick Comdat *C = GV.getComdat(); 143*09467b48Spatrick if (!C) 144*09467b48Spatrick return; 145*09467b48Spatrick 146*09467b48Spatrick if (shouldPreserveGV(GV)) 147*09467b48Spatrick ExternalComdats.insert(C); 148*09467b48Spatrick } 149*09467b48Spatrick 150*09467b48Spatrick bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) { 151*09467b48Spatrick bool Changed = false; 152*09467b48Spatrick CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; 153*09467b48Spatrick 154*09467b48Spatrick SmallPtrSet<GlobalValue *, 8> Used; 155*09467b48Spatrick collectUsedGlobalVariables(M, Used, false); 156*09467b48Spatrick 157*09467b48Spatrick // Collect comdat visiblity information for the module. 158*09467b48Spatrick DenseSet<const Comdat *> ExternalComdats; 159*09467b48Spatrick if (!M.getComdatSymbolTable().empty()) { 160*09467b48Spatrick for (Function &F : M) 161*09467b48Spatrick checkComdatVisibility(F, ExternalComdats); 162*09467b48Spatrick for (GlobalVariable &GV : M.globals()) 163*09467b48Spatrick checkComdatVisibility(GV, ExternalComdats); 164*09467b48Spatrick for (GlobalAlias &GA : M.aliases()) 165*09467b48Spatrick checkComdatVisibility(GA, ExternalComdats); 166*09467b48Spatrick } 167*09467b48Spatrick 168*09467b48Spatrick // We must assume that globals in llvm.used have a reference that not even 169*09467b48Spatrick // the linker can see, so we don't internalize them. 170*09467b48Spatrick // For llvm.compiler.used the situation is a bit fuzzy. The assembler and 171*09467b48Spatrick // linker can drop those symbols. If this pass is running as part of LTO, 172*09467b48Spatrick // one might think that it could just drop llvm.compiler.used. The problem 173*09467b48Spatrick // is that even in LTO llvm doesn't see every reference. For example, 174*09467b48Spatrick // we don't see references from function local inline assembly. To be 175*09467b48Spatrick // conservative, we internalize symbols in llvm.compiler.used, but we 176*09467b48Spatrick // keep llvm.compiler.used so that the symbol is not deleted by llvm. 177*09467b48Spatrick for (GlobalValue *V : Used) { 178*09467b48Spatrick AlwaysPreserved.insert(V->getName()); 179*09467b48Spatrick } 180*09467b48Spatrick 181*09467b48Spatrick // Mark all functions not in the api as internal. 182*09467b48Spatrick for (Function &I : M) { 183*09467b48Spatrick if (!maybeInternalize(I, ExternalComdats)) 184*09467b48Spatrick continue; 185*09467b48Spatrick Changed = true; 186*09467b48Spatrick 187*09467b48Spatrick if (ExternalNode) 188*09467b48Spatrick // Remove a callgraph edge from the external node to this function. 189*09467b48Spatrick ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]); 190*09467b48Spatrick 191*09467b48Spatrick ++NumFunctions; 192*09467b48Spatrick LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n"); 193*09467b48Spatrick } 194*09467b48Spatrick 195*09467b48Spatrick // Never internalize the llvm.used symbol. It is used to implement 196*09467b48Spatrick // attribute((used)). 197*09467b48Spatrick // FIXME: Shouldn't this just filter on llvm.metadata section?? 198*09467b48Spatrick AlwaysPreserved.insert("llvm.used"); 199*09467b48Spatrick AlwaysPreserved.insert("llvm.compiler.used"); 200*09467b48Spatrick 201*09467b48Spatrick // Never internalize anchors used by the machine module info, else the info 202*09467b48Spatrick // won't find them. (see MachineModuleInfo.) 203*09467b48Spatrick AlwaysPreserved.insert("llvm.global_ctors"); 204*09467b48Spatrick AlwaysPreserved.insert("llvm.global_dtors"); 205*09467b48Spatrick AlwaysPreserved.insert("llvm.global.annotations"); 206*09467b48Spatrick 207*09467b48Spatrick // Never internalize symbols code-gen inserts. 208*09467b48Spatrick // FIXME: We should probably add this (and the __stack_chk_guard) via some 209*09467b48Spatrick // type of call-back in CodeGen. 210*09467b48Spatrick AlwaysPreserved.insert("__stack_chk_fail"); 211*09467b48Spatrick AlwaysPreserved.insert("__stack_chk_guard"); 212*09467b48Spatrick 213*09467b48Spatrick // Mark all global variables with initializers that are not in the api as 214*09467b48Spatrick // internal as well. 215*09467b48Spatrick for (auto &GV : M.globals()) { 216*09467b48Spatrick if (!maybeInternalize(GV, ExternalComdats)) 217*09467b48Spatrick continue; 218*09467b48Spatrick Changed = true; 219*09467b48Spatrick 220*09467b48Spatrick ++NumGlobals; 221*09467b48Spatrick LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n"); 222*09467b48Spatrick } 223*09467b48Spatrick 224*09467b48Spatrick // Mark all aliases that are not in the api as internal as well. 225*09467b48Spatrick for (auto &GA : M.aliases()) { 226*09467b48Spatrick if (!maybeInternalize(GA, ExternalComdats)) 227*09467b48Spatrick continue; 228*09467b48Spatrick Changed = true; 229*09467b48Spatrick 230*09467b48Spatrick ++NumAliases; 231*09467b48Spatrick LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n"); 232*09467b48Spatrick } 233*09467b48Spatrick 234*09467b48Spatrick return Changed; 235*09467b48Spatrick } 236*09467b48Spatrick 237*09467b48Spatrick InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {} 238*09467b48Spatrick 239*09467b48Spatrick PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) { 240*09467b48Spatrick if (!internalizeModule(M, AM.getCachedResult<CallGraphAnalysis>(M))) 241*09467b48Spatrick return PreservedAnalyses::all(); 242*09467b48Spatrick 243*09467b48Spatrick PreservedAnalyses PA; 244*09467b48Spatrick PA.preserve<CallGraphAnalysis>(); 245*09467b48Spatrick return PA; 246*09467b48Spatrick } 247*09467b48Spatrick 248*09467b48Spatrick namespace { 249*09467b48Spatrick class InternalizeLegacyPass : public ModulePass { 250*09467b48Spatrick // Client supplied callback to control wheter a symbol must be preserved. 251*09467b48Spatrick std::function<bool(const GlobalValue &)> MustPreserveGV; 252*09467b48Spatrick 253*09467b48Spatrick public: 254*09467b48Spatrick static char ID; // Pass identification, replacement for typeid 255*09467b48Spatrick 256*09467b48Spatrick InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {} 257*09467b48Spatrick 258*09467b48Spatrick InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV) 259*09467b48Spatrick : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) { 260*09467b48Spatrick initializeInternalizeLegacyPassPass(*PassRegistry::getPassRegistry()); 261*09467b48Spatrick } 262*09467b48Spatrick 263*09467b48Spatrick bool runOnModule(Module &M) override { 264*09467b48Spatrick if (skipModule(M)) 265*09467b48Spatrick return false; 266*09467b48Spatrick 267*09467b48Spatrick CallGraphWrapperPass *CGPass = 268*09467b48Spatrick getAnalysisIfAvailable<CallGraphWrapperPass>(); 269*09467b48Spatrick CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; 270*09467b48Spatrick return internalizeModule(M, MustPreserveGV, CG); 271*09467b48Spatrick } 272*09467b48Spatrick 273*09467b48Spatrick void getAnalysisUsage(AnalysisUsage &AU) const override { 274*09467b48Spatrick AU.setPreservesCFG(); 275*09467b48Spatrick AU.addPreserved<CallGraphWrapperPass>(); 276*09467b48Spatrick } 277*09467b48Spatrick }; 278*09467b48Spatrick } 279*09467b48Spatrick 280*09467b48Spatrick char InternalizeLegacyPass::ID = 0; 281*09467b48Spatrick INITIALIZE_PASS(InternalizeLegacyPass, "internalize", 282*09467b48Spatrick "Internalize Global Symbols", false, false) 283*09467b48Spatrick 284*09467b48Spatrick ModulePass *llvm::createInternalizePass() { 285*09467b48Spatrick return new InternalizeLegacyPass(); 286*09467b48Spatrick } 287*09467b48Spatrick 288*09467b48Spatrick ModulePass *llvm::createInternalizePass( 289*09467b48Spatrick std::function<bool(const GlobalValue &)> MustPreserveGV) { 290*09467b48Spatrick return new InternalizeLegacyPass(std::move(MustPreserveGV)); 291*09467b48Spatrick } 292