10b57cec5SDimitry Andric //===- ModuleSymbolTable.cpp - symbol table for in-memory IR --------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This class represents a symbol table built from in-memory IR. It provides 100b57cec5SDimitry Andric // access to GlobalValues and should only be used if such access is required 110b57cec5SDimitry Andric // (e.g. in the LTO implementation). 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "llvm/Object/ModuleSymbolTable.h" 160b57cec5SDimitry Andric #include "RecordStreamer.h" 170b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 180b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 19cb14a3feSDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 200b57cec5SDimitry Andric #include "llvm/IR/Function.h" 210b57cec5SDimitry Andric #include "llvm/IR/GlobalAlias.h" 220b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 230b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h" 245ffd83dbSDimitry Andric #include "llvm/IR/InlineAsm.h" 250b57cec5SDimitry Andric #include "llvm/IR/Module.h" 260b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h" 270b57cec5SDimitry Andric #include "llvm/MC/MCContext.h" 280b57cec5SDimitry Andric #include "llvm/MC/MCInstrInfo.h" 290b57cec5SDimitry Andric #include "llvm/MC/MCObjectFileInfo.h" 300b57cec5SDimitry Andric #include "llvm/MC/MCParser/MCAsmParser.h" 310b57cec5SDimitry Andric #include "llvm/MC/MCParser/MCTargetAsmParser.h" 320b57cec5SDimitry Andric #include "llvm/MC/MCRegisterInfo.h" 330b57cec5SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h" 340b57cec5SDimitry Andric #include "llvm/MC/MCSymbol.h" 350b57cec5SDimitry Andric #include "llvm/MC/MCTargetOptions.h" 36349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h" 370b57cec5SDimitry Andric #include "llvm/Object/SymbolicFile.h" 380b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 390b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 400b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h" 410b57cec5SDimitry Andric #include "llvm/Support/SMLoc.h" 420b57cec5SDimitry Andric #include "llvm/Support/SourceMgr.h" 430b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 4406c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h" 450b57cec5SDimitry Andric #include <algorithm> 460b57cec5SDimitry Andric #include <cassert> 470b57cec5SDimitry Andric #include <cstdint> 480b57cec5SDimitry Andric #include <memory> 490b57cec5SDimitry Andric #include <string> 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric using namespace llvm; 520b57cec5SDimitry Andric using namespace object; 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric void ModuleSymbolTable::addModule(Module *M) { 550b57cec5SDimitry Andric if (FirstMod) 560b57cec5SDimitry Andric assert(FirstMod->getTargetTriple() == M->getTargetTriple()); 570b57cec5SDimitry Andric else 580b57cec5SDimitry Andric FirstMod = M; 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric for (GlobalValue &GV : M->global_values()) 610b57cec5SDimitry Andric SymTab.push_back(&GV); 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric CollectAsmSymbols(*M, [this](StringRef Name, BasicSymbolRef::Flags Flags) { 645ffd83dbSDimitry Andric SymTab.push_back(new (AsmSymbols.Allocate()) 655ffd83dbSDimitry Andric AsmSymbol(std::string(Name), Flags)); 660b57cec5SDimitry Andric }); 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric static void 700b57cec5SDimitry Andric initializeRecordStreamer(const Module &M, 710b57cec5SDimitry Andric function_ref<void(RecordStreamer &)> Init) { 72cb14a3feSDimitry Andric // This function may be called twice, once for ModuleSummaryIndexAnalysis and 73cb14a3feSDimitry Andric // the other when writing the IR symbol table. If parsing inline assembly has 74cb14a3feSDimitry Andric // caused errors in the first run, suppress the second run. 75cb14a3feSDimitry Andric if (M.getContext().getDiagHandlerPtr()->HasErrors) 76cb14a3feSDimitry Andric return; 770b57cec5SDimitry Andric StringRef InlineAsm = M.getModuleInlineAsm(); 780b57cec5SDimitry Andric if (InlineAsm.empty()) 790b57cec5SDimitry Andric return; 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric std::string Err; 820b57cec5SDimitry Andric const Triple TT(M.getTargetTriple()); 830b57cec5SDimitry Andric const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); 840b57cec5SDimitry Andric assert(T && T->hasMCAsmParser()); 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str())); 870b57cec5SDimitry Andric if (!MRI) 880b57cec5SDimitry Andric return; 890b57cec5SDimitry Andric 90480093f4SDimitry Andric MCTargetOptions MCOptions; 91480093f4SDimitry Andric std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str(), MCOptions)); 920b57cec5SDimitry Andric if (!MAI) 930b57cec5SDimitry Andric return; 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric std::unique_ptr<MCSubtargetInfo> STI( 960b57cec5SDimitry Andric T->createMCSubtargetInfo(TT.str(), "", "")); 970b57cec5SDimitry Andric if (!STI) 980b57cec5SDimitry Andric return; 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo()); 1010b57cec5SDimitry Andric if (!MCII) 1020b57cec5SDimitry Andric return; 1030b57cec5SDimitry Andric 104cb14a3feSDimitry Andric std::unique_ptr<MemoryBuffer> Buffer( 105cb14a3feSDimitry Andric MemoryBuffer::getMemBuffer(InlineAsm, "<inline asm>")); 1060b57cec5SDimitry Andric SourceMgr SrcMgr; 1070b57cec5SDimitry Andric SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); 108fe6060f1SDimitry Andric 109fe6060f1SDimitry Andric MCContext MCCtx(TT, MAI.get(), MRI.get(), STI.get(), &SrcMgr); 110fe6060f1SDimitry Andric std::unique_ptr<MCObjectFileInfo> MOFI( 111fe6060f1SDimitry Andric T->createMCObjectFileInfo(MCCtx, /*PIC=*/false)); 112fe6060f1SDimitry Andric MOFI->setSDKVersion(M.getSDKVersion()); 113fe6060f1SDimitry Andric MCCtx.setObjectFileInfo(MOFI.get()); 114fe6060f1SDimitry Andric RecordStreamer Streamer(MCCtx, M); 115fe6060f1SDimitry Andric T->createNullTargetStreamer(Streamer); 116fe6060f1SDimitry Andric 1170b57cec5SDimitry Andric std::unique_ptr<MCAsmParser> Parser( 1180b57cec5SDimitry Andric createMCAsmParser(SrcMgr, MCCtx, Streamer, *MAI)); 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric std::unique_ptr<MCTargetAsmParser> TAP( 1210b57cec5SDimitry Andric T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions)); 1220b57cec5SDimitry Andric if (!TAP) 1230b57cec5SDimitry Andric return; 1240b57cec5SDimitry Andric 125cb14a3feSDimitry Andric MCCtx.setDiagnosticHandler([&](const SMDiagnostic &SMD, bool IsInlineAsm, 126cb14a3feSDimitry Andric const SourceMgr &SrcMgr, 127cb14a3feSDimitry Andric std::vector<const MDNode *> &LocInfos) { 128cb14a3feSDimitry Andric M.getContext().diagnose( 129cb14a3feSDimitry Andric DiagnosticInfoSrcMgr(SMD, M.getName(), IsInlineAsm, /*LocCookie=*/0)); 130cb14a3feSDimitry Andric }); 131cb14a3feSDimitry Andric 1325ffd83dbSDimitry Andric // Module-level inline asm is assumed to use At&t syntax (see 1335ffd83dbSDimitry Andric // AsmPrinter::doInitialization()). 1345ffd83dbSDimitry Andric Parser->setAssemblerDialect(InlineAsm::AD_ATT); 1355ffd83dbSDimitry Andric 1360b57cec5SDimitry Andric Parser->setTargetParser(*TAP); 1370b57cec5SDimitry Andric if (Parser->Run(false)) 1380b57cec5SDimitry Andric return; 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric Init(Streamer); 1410b57cec5SDimitry Andric } 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric void ModuleSymbolTable::CollectAsmSymbols( 1440b57cec5SDimitry Andric const Module &M, 1450b57cec5SDimitry Andric function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) { 1460b57cec5SDimitry Andric initializeRecordStreamer(M, [&](RecordStreamer &Streamer) { 1470b57cec5SDimitry Andric Streamer.flushSymverDirectives(); 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric for (auto &KV : Streamer) { 1500b57cec5SDimitry Andric StringRef Key = KV.first(); 1510b57cec5SDimitry Andric RecordStreamer::State Value = KV.second; 1520b57cec5SDimitry Andric // FIXME: For now we just assume that all asm symbols are executable. 1530b57cec5SDimitry Andric uint32_t Res = BasicSymbolRef::SF_Executable; 1540b57cec5SDimitry Andric switch (Value) { 1550b57cec5SDimitry Andric case RecordStreamer::NeverSeen: 1560b57cec5SDimitry Andric llvm_unreachable("NeverSeen should have been replaced earlier"); 1570b57cec5SDimitry Andric case RecordStreamer::DefinedGlobal: 1580b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Global; 1590b57cec5SDimitry Andric break; 1600b57cec5SDimitry Andric case RecordStreamer::Defined: 1610b57cec5SDimitry Andric break; 1620b57cec5SDimitry Andric case RecordStreamer::Global: 1630b57cec5SDimitry Andric case RecordStreamer::Used: 1640b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Undefined; 1650b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Global; 1660b57cec5SDimitry Andric break; 1670b57cec5SDimitry Andric case RecordStreamer::DefinedWeak: 1680b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Weak; 1690b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Global; 1700b57cec5SDimitry Andric break; 1710b57cec5SDimitry Andric case RecordStreamer::UndefinedWeak: 1720b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Weak; 1730b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Undefined; 1740b57cec5SDimitry Andric } 1750b57cec5SDimitry Andric AsmSymbol(Key, BasicSymbolRef::Flags(Res)); 1760b57cec5SDimitry Andric } 1770b57cec5SDimitry Andric }); 178*0fca6ea1SDimitry Andric 179*0fca6ea1SDimitry Andric // In ELF, object code generated for x86-32 and some code models of x86-64 may 180*0fca6ea1SDimitry Andric // reference the special symbol _GLOBAL_OFFSET_TABLE_ that is not used in the 181*0fca6ea1SDimitry Andric // IR. Record it like inline asm symbols. 182*0fca6ea1SDimitry Andric Triple TT(M.getTargetTriple()); 183*0fca6ea1SDimitry Andric if (!TT.isOSBinFormatELF() || !TT.isX86()) 184*0fca6ea1SDimitry Andric return; 185*0fca6ea1SDimitry Andric auto CM = M.getCodeModel(); 186*0fca6ea1SDimitry Andric if (TT.getArch() == Triple::x86 || CM == CodeModel::Medium || 187*0fca6ea1SDimitry Andric CM == CodeModel::Large) { 188*0fca6ea1SDimitry Andric AsmSymbol("_GLOBAL_OFFSET_TABLE_", 189*0fca6ea1SDimitry Andric BasicSymbolRef::Flags(BasicSymbolRef::SF_Undefined | 190*0fca6ea1SDimitry Andric BasicSymbolRef::SF_Global)); 191*0fca6ea1SDimitry Andric } 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric void ModuleSymbolTable::CollectAsmSymvers( 1950b57cec5SDimitry Andric const Module &M, function_ref<void(StringRef, StringRef)> AsmSymver) { 1960b57cec5SDimitry Andric initializeRecordStreamer(M, [&](RecordStreamer &Streamer) { 1970b57cec5SDimitry Andric for (auto &KV : Streamer.symverAliases()) 1980b57cec5SDimitry Andric for (auto &Alias : KV.second) 1990b57cec5SDimitry Andric AsmSymver(KV.first->getName(), Alias); 2000b57cec5SDimitry Andric }); 2010b57cec5SDimitry Andric } 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const { 20406c3fb27SDimitry Andric if (isa<AsmSymbol *>(S)) { 20506c3fb27SDimitry Andric OS << cast<AsmSymbol *>(S)->first; 2060b57cec5SDimitry Andric return; 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric 20906c3fb27SDimitry Andric auto *GV = cast<GlobalValue *>(S); 2100b57cec5SDimitry Andric if (GV->hasDLLImportStorageClass()) 2110b57cec5SDimitry Andric OS << "__imp_"; 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric Mang.getNameWithPrefix(OS, GV, false); 2140b57cec5SDimitry Andric } 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const { 21706c3fb27SDimitry Andric if (isa<AsmSymbol *>(S)) 21806c3fb27SDimitry Andric return cast<AsmSymbol *>(S)->second; 2190b57cec5SDimitry Andric 22006c3fb27SDimitry Andric auto *GV = cast<GlobalValue *>(S); 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric uint32_t Res = BasicSymbolRef::SF_None; 2230b57cec5SDimitry Andric if (GV->isDeclarationForLinker()) 2240b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Undefined; 2250b57cec5SDimitry Andric else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage()) 2260b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Hidden; 2270b57cec5SDimitry Andric if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { 2280b57cec5SDimitry Andric if (GVar->isConstant()) 2290b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Const; 2300b57cec5SDimitry Andric } 231349cc55cSDimitry Andric if (const GlobalObject *GO = GV->getAliaseeObject()) 232349cc55cSDimitry Andric if (isa<Function>(GO) || isa<GlobalIFunc>(GO)) 2330b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Executable; 2340b57cec5SDimitry Andric if (isa<GlobalAlias>(GV)) 2350b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Indirect; 2360b57cec5SDimitry Andric if (GV->hasPrivateLinkage()) 2370b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_FormatSpecific; 2380b57cec5SDimitry Andric if (!GV->hasLocalLinkage()) 2390b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Global; 2400b57cec5SDimitry Andric if (GV->hasCommonLinkage()) 2410b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Common; 2420b57cec5SDimitry Andric if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || 2430b57cec5SDimitry Andric GV->hasExternalWeakLinkage()) 2440b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_Weak; 2450b57cec5SDimitry Andric 2465f757f3fSDimitry Andric if (GV->getName().starts_with("llvm.")) 2470b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_FormatSpecific; 2480b57cec5SDimitry Andric else if (auto *Var = dyn_cast<GlobalVariable>(GV)) { 2490b57cec5SDimitry Andric if (Var->getSection() == "llvm.metadata") 2500b57cec5SDimitry Andric Res |= BasicSymbolRef::SF_FormatSpecific; 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric 2530b57cec5SDimitry Andric return Res; 2540b57cec5SDimitry Andric } 255