1bdd1243dSDimitry Andric //===-- LVBinaryReader.h ----------------------------------------*- C++ -*-===// 2bdd1243dSDimitry Andric // 3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6bdd1243dSDimitry Andric // 7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 8bdd1243dSDimitry Andric // 9bdd1243dSDimitry Andric // This file defines the LVBinaryReader class, which is used to describe a 10bdd1243dSDimitry Andric // binary reader. 11bdd1243dSDimitry Andric // 12bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 13bdd1243dSDimitry Andric 14bdd1243dSDimitry Andric #ifndef LLVM_DEBUGINFO_LOGICALVIEW_READERS_LVBINARYREADER_H 15bdd1243dSDimitry Andric #define LLVM_DEBUGINFO_LOGICALVIEW_READERS_LVBINARYREADER_H 16bdd1243dSDimitry Andric 17bdd1243dSDimitry Andric #include "llvm/DebugInfo/LogicalView/Core/LVReader.h" 18bdd1243dSDimitry Andric #include "llvm/MC/MCAsmInfo.h" 19bdd1243dSDimitry Andric #include "llvm/MC/MCContext.h" 20bdd1243dSDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h" 21bdd1243dSDimitry Andric #include "llvm/MC/MCInstPrinter.h" 22bdd1243dSDimitry Andric #include "llvm/MC/MCInstrInfo.h" 23bdd1243dSDimitry Andric #include "llvm/MC/MCObjectFileInfo.h" 24bdd1243dSDimitry Andric #include "llvm/MC/MCRegisterInfo.h" 25bdd1243dSDimitry Andric #include "llvm/MC/MCSubtargetInfo.h" 26bdd1243dSDimitry Andric #include "llvm/MC/TargetRegistry.h" 2706c3fb27SDimitry Andric #include "llvm/Object/COFF.h" 28bdd1243dSDimitry Andric #include "llvm/Object/ObjectFile.h" 29bdd1243dSDimitry Andric 30bdd1243dSDimitry Andric namespace llvm { 31bdd1243dSDimitry Andric namespace logicalview { 32bdd1243dSDimitry Andric 33bdd1243dSDimitry Andric constexpr bool UpdateHighAddress = false; 34bdd1243dSDimitry Andric 35bdd1243dSDimitry Andric // Logical scope, Section address, Section index, IsComdat. 36bdd1243dSDimitry Andric struct LVSymbolTableEntry final { 37bdd1243dSDimitry Andric LVScope *Scope = nullptr; 38bdd1243dSDimitry Andric LVAddress Address = 0; 39bdd1243dSDimitry Andric LVSectionIndex SectionIndex = 0; 40bdd1243dSDimitry Andric bool IsComdat = false; 41bdd1243dSDimitry Andric LVSymbolTableEntry() = default; 42bdd1243dSDimitry Andric LVSymbolTableEntry(LVScope *Scope, LVAddress Address, 43bdd1243dSDimitry Andric LVSectionIndex SectionIndex, bool IsComdat) 44bdd1243dSDimitry Andric : Scope(Scope), Address(Address), SectionIndex(SectionIndex), 45bdd1243dSDimitry Andric IsComdat(IsComdat) {} 46bdd1243dSDimitry Andric }; 47bdd1243dSDimitry Andric 48bdd1243dSDimitry Andric // Function names extracted from the object symbol table. 49bdd1243dSDimitry Andric class LVSymbolTable final { 50bdd1243dSDimitry Andric using LVSymbolNames = std::map<std::string, LVSymbolTableEntry>; 51bdd1243dSDimitry Andric LVSymbolNames SymbolNames; 52bdd1243dSDimitry Andric 53bdd1243dSDimitry Andric public: 54bdd1243dSDimitry Andric LVSymbolTable() = default; 55bdd1243dSDimitry Andric 56bdd1243dSDimitry Andric void add(StringRef Name, LVScope *Function, LVSectionIndex SectionIndex = 0); 57bdd1243dSDimitry Andric void add(StringRef Name, LVAddress Address, LVSectionIndex SectionIndex, 58bdd1243dSDimitry Andric bool IsComdat); 59bdd1243dSDimitry Andric LVSectionIndex update(LVScope *Function); 60bdd1243dSDimitry Andric 61bdd1243dSDimitry Andric const LVSymbolTableEntry &getEntry(StringRef Name); 62bdd1243dSDimitry Andric LVAddress getAddress(StringRef Name); 63bdd1243dSDimitry Andric LVSectionIndex getIndex(StringRef Name); 64bdd1243dSDimitry Andric bool getIsComdat(StringRef Name); 65bdd1243dSDimitry Andric 66bdd1243dSDimitry Andric void print(raw_ostream &OS); 67bdd1243dSDimitry Andric }; 68bdd1243dSDimitry Andric 69bdd1243dSDimitry Andric class LVBinaryReader : public LVReader { 70bdd1243dSDimitry Andric // Function names extracted from the object symbol table. 71bdd1243dSDimitry Andric LVSymbolTable SymbolTable; 72bdd1243dSDimitry Andric 7306c3fb27SDimitry Andric // It contains the LVLineDebug elements representing the inlined logical 7406c3fb27SDimitry Andric // lines for the current compile unit, created by parsing the CodeView 7506c3fb27SDimitry Andric // S_INLINESITE symbol annotation data. 7606c3fb27SDimitry Andric using LVInlineeLine = std::map<LVScope *, std::unique_ptr<LVLines>>; 7706c3fb27SDimitry Andric LVInlineeLine CUInlineeLines; 7806c3fb27SDimitry Andric 79bdd1243dSDimitry Andric // Instruction lines for a logical scope. These instructions are fetched 80bdd1243dSDimitry Andric // during its merge with the debug lines. 81bdd1243dSDimitry Andric LVDoubleMap<LVSectionIndex, LVScope *, LVLines *> ScopeInstructions; 82bdd1243dSDimitry Andric 83bdd1243dSDimitry Andric // Links the scope with its first assembler address line. 84bdd1243dSDimitry Andric LVDoubleMap<LVSectionIndex, LVAddress, LVScope *> AssemblerMappings; 85bdd1243dSDimitry Andric 86bdd1243dSDimitry Andric // Mapping from virtual address to section. 87bdd1243dSDimitry Andric // The virtual address refers to the address where the section is loaded. 88bdd1243dSDimitry Andric using LVSectionAddresses = std::map<LVSectionIndex, object::SectionRef>; 89bdd1243dSDimitry Andric LVSectionAddresses SectionAddresses; 90bdd1243dSDimitry Andric 91bdd1243dSDimitry Andric void addSectionAddress(const object::SectionRef &Section) { 92bdd1243dSDimitry Andric if (SectionAddresses.find(Section.getAddress()) == SectionAddresses.end()) 93bdd1243dSDimitry Andric SectionAddresses.emplace(Section.getAddress(), Section); 94bdd1243dSDimitry Andric } 95bdd1243dSDimitry Andric 96bdd1243dSDimitry Andric // Scopes with ranges for current compile unit. It is used to find a line 97bdd1243dSDimitry Andric // giving its exact or closest address. To support comdat functions, all 98bdd1243dSDimitry Andric // addresses for the same section are recorded in the same map. 9906c3fb27SDimitry Andric using LVSectionRanges = std::map<LVSectionIndex, std::unique_ptr<LVRange>>; 100bdd1243dSDimitry Andric LVSectionRanges SectionRanges; 101bdd1243dSDimitry Andric 102bdd1243dSDimitry Andric // Image base and virtual address for Executable file. 103bdd1243dSDimitry Andric uint64_t ImageBaseAddress = 0; 104bdd1243dSDimitry Andric uint64_t VirtualAddress = 0; 105bdd1243dSDimitry Andric 106bdd1243dSDimitry Andric // Object sections with machine code. 107bdd1243dSDimitry Andric using LVSections = std::map<LVSectionIndex, object::SectionRef>; 108bdd1243dSDimitry Andric LVSections Sections; 109bdd1243dSDimitry Andric 11006c3fb27SDimitry Andric std::vector<std::unique_ptr<LVLines>> DiscoveredLines; 11106c3fb27SDimitry Andric 112bdd1243dSDimitry Andric protected: 113bdd1243dSDimitry Andric // It contains the LVLineDebug elements representing the logical lines for 114bdd1243dSDimitry Andric // the current compile unit, created by parsing the debug line section. 115bdd1243dSDimitry Andric LVLines CULines; 116bdd1243dSDimitry Andric 117bdd1243dSDimitry Andric std::unique_ptr<const MCRegisterInfo> MRI; 118bdd1243dSDimitry Andric std::unique_ptr<const MCAsmInfo> MAI; 119bdd1243dSDimitry Andric std::unique_ptr<const MCSubtargetInfo> STI; 120bdd1243dSDimitry Andric std::unique_ptr<const MCInstrInfo> MII; 121bdd1243dSDimitry Andric std::unique_ptr<const MCDisassembler> MD; 122bdd1243dSDimitry Andric std::unique_ptr<MCContext> MC; 123bdd1243dSDimitry Andric std::unique_ptr<MCInstPrinter> MIP; 124bdd1243dSDimitry Andric 125*0fca6ea1SDimitry Andric // https://yurydelendik.github.io/webassembly-dwarf/ 126*0fca6ea1SDimitry Andric // 2. Consuming and Generating DWARF for WebAssembly Code 127*0fca6ea1SDimitry Andric // Note: Some DWARF constructs don't map one-to-one onto WebAssembly 128*0fca6ea1SDimitry Andric // constructs. We strive to enumerate and resolve any ambiguities here. 129*0fca6ea1SDimitry Andric // 130*0fca6ea1SDimitry Andric // 2.1. Code Addresses 131*0fca6ea1SDimitry Andric // Note: DWARF associates various bits of debug info 132*0fca6ea1SDimitry Andric // with particular locations in the program via its code address (instruction 133*0fca6ea1SDimitry Andric // pointer or PC). However, WebAssembly's linear memory address space does not 134*0fca6ea1SDimitry Andric // contain WebAssembly instructions. 135*0fca6ea1SDimitry Andric // 136*0fca6ea1SDimitry Andric // Wherever a code address (see 2.17 of [DWARF]) is used in DWARF for 137*0fca6ea1SDimitry Andric // WebAssembly, it must be the offset of an instruction relative within the 138*0fca6ea1SDimitry Andric // Code section of the WebAssembly file. The DWARF is considered malformed if 139*0fca6ea1SDimitry Andric // a PC offset is between instruction boundaries within the Code section. 140*0fca6ea1SDimitry Andric // 141*0fca6ea1SDimitry Andric // Note: It is expected that a DWARF consumer does not know how to decode 142*0fca6ea1SDimitry Andric // WebAssembly instructions. The instruction pointer is selected as the offset 143*0fca6ea1SDimitry Andric // in the binary file of the first byte of the instruction, and it is 144*0fca6ea1SDimitry Andric // consistent with the WebAssembly Web API conventions definition of the code 145*0fca6ea1SDimitry Andric // location. 146*0fca6ea1SDimitry Andric // 147*0fca6ea1SDimitry Andric // EXAMPLE: .DEBUG_LINE INSTRUCTION POINTERS 148*0fca6ea1SDimitry Andric // The .debug_line DWARF section maps instruction pointers to source 149*0fca6ea1SDimitry Andric // locations. With WebAssembly, the .debug_line section maps Code 150*0fca6ea1SDimitry Andric // section-relative instruction offsets to source locations. 151*0fca6ea1SDimitry Andric // 152*0fca6ea1SDimitry Andric // EXAMPLE: DW_AT_* ATTRIBUTES 153*0fca6ea1SDimitry Andric // For entities with a single associated code address, DWARF uses 154*0fca6ea1SDimitry Andric // the DW_AT_low_pc attribute to specify the associated code address value. 155*0fca6ea1SDimitry Andric // For WebAssembly, the DW_AT_low_pc's value is a Code section-relative 156*0fca6ea1SDimitry Andric // instruction offset. 157*0fca6ea1SDimitry Andric // 158*0fca6ea1SDimitry Andric // For entities with a single contiguous range of code, DWARF uses a 159*0fca6ea1SDimitry Andric // pair of DW_AT_low_pc and DW_AT_high_pc attributes to specify the associated 160*0fca6ea1SDimitry Andric // contiguous range of code address values. For WebAssembly, these attributes 161*0fca6ea1SDimitry Andric // are Code section-relative instruction offsets. 162*0fca6ea1SDimitry Andric // 163*0fca6ea1SDimitry Andric // For entities with multiple ranges of code, DWARF uses the DW_AT_ranges 164*0fca6ea1SDimitry Andric // attribute, which refers to the array located at the .debug_ranges section. 165*0fca6ea1SDimitry Andric LVAddress WasmCodeSectionOffset = 0; 166*0fca6ea1SDimitry Andric 167bdd1243dSDimitry Andric // Loads all info for the architecture of the provided object file. 168bdd1243dSDimitry Andric Error loadGenericTargetInfo(StringRef TheTriple, StringRef TheFeatures); 169bdd1243dSDimitry Andric 170bdd1243dSDimitry Andric virtual void mapRangeAddress(const object::ObjectFile &Obj) {} 171bdd1243dSDimitry Andric virtual void mapRangeAddress(const object::ObjectFile &Obj, 172bdd1243dSDimitry Andric const object::SectionRef &Section, 173bdd1243dSDimitry Andric bool IsComdat) {} 174bdd1243dSDimitry Andric 175bdd1243dSDimitry Andric // Create a mapping from virtual address to section. 176bdd1243dSDimitry Andric void mapVirtualAddress(const object::ObjectFile &Obj); 177bdd1243dSDimitry Andric void mapVirtualAddress(const object::COFFObjectFile &COFFObj); 178bdd1243dSDimitry Andric 179bdd1243dSDimitry Andric Expected<std::pair<LVSectionIndex, object::SectionRef>> 180bdd1243dSDimitry Andric getSection(LVScope *Scope, LVAddress Address, LVSectionIndex SectionIndex); 181bdd1243dSDimitry Andric 182bdd1243dSDimitry Andric void addSectionRange(LVSectionIndex SectionIndex, LVScope *Scope); 183bdd1243dSDimitry Andric void addSectionRange(LVSectionIndex SectionIndex, LVScope *Scope, 184bdd1243dSDimitry Andric LVAddress LowerAddress, LVAddress UpperAddress); 185bdd1243dSDimitry Andric LVRange *getSectionRanges(LVSectionIndex SectionIndex); 186bdd1243dSDimitry Andric 18706c3fb27SDimitry Andric void includeInlineeLines(LVSectionIndex SectionIndex, LVScope *Function); 18806c3fb27SDimitry Andric 189bdd1243dSDimitry Andric Error createInstructions(); 190bdd1243dSDimitry Andric Error createInstructions(LVScope *Function, LVSectionIndex SectionIndex); 191bdd1243dSDimitry Andric Error createInstructions(LVScope *Function, LVSectionIndex SectionIndex, 192bdd1243dSDimitry Andric const LVNameInfo &NameInfo); 193bdd1243dSDimitry Andric 194bdd1243dSDimitry Andric void processLines(LVLines *DebugLines, LVSectionIndex SectionIndex); 195bdd1243dSDimitry Andric void processLines(LVLines *DebugLines, LVSectionIndex SectionIndex, 196bdd1243dSDimitry Andric LVScope *Function); 197bdd1243dSDimitry Andric 198bdd1243dSDimitry Andric public: 199bdd1243dSDimitry Andric LVBinaryReader() = delete; 200bdd1243dSDimitry Andric LVBinaryReader(StringRef Filename, StringRef FileFormatName, ScopedPrinter &W, 201bdd1243dSDimitry Andric LVBinaryType BinaryType) 202bdd1243dSDimitry Andric : LVReader(Filename, FileFormatName, W, BinaryType) {} 203bdd1243dSDimitry Andric LVBinaryReader(const LVBinaryReader &) = delete; 204bdd1243dSDimitry Andric LVBinaryReader &operator=(const LVBinaryReader &) = delete; 20506c3fb27SDimitry Andric virtual ~LVBinaryReader() = default; 20606c3fb27SDimitry Andric 20706c3fb27SDimitry Andric void addInlineeLines(LVScope *Scope, LVLines &Lines) { 20806c3fb27SDimitry Andric CUInlineeLines.emplace(Scope, std::make_unique<LVLines>(std::move(Lines))); 20906c3fb27SDimitry Andric } 21006c3fb27SDimitry Andric 21106c3fb27SDimitry Andric // Convert Segment::Offset pair to absolute address. 21206c3fb27SDimitry Andric LVAddress linearAddress(uint16_t Segment, uint32_t Offset, 21306c3fb27SDimitry Andric LVAddress Addendum = 0) { 21406c3fb27SDimitry Andric return ImageBaseAddress + (Segment * VirtualAddress) + Offset + Addendum; 21506c3fb27SDimitry Andric } 216bdd1243dSDimitry Andric 217bdd1243dSDimitry Andric void addToSymbolTable(StringRef Name, LVScope *Function, 218bdd1243dSDimitry Andric LVSectionIndex SectionIndex = 0); 219bdd1243dSDimitry Andric void addToSymbolTable(StringRef Name, LVAddress Address, 220bdd1243dSDimitry Andric LVSectionIndex SectionIndex, bool IsComdat); 221bdd1243dSDimitry Andric LVSectionIndex updateSymbolTable(LVScope *Function); 222bdd1243dSDimitry Andric 223bdd1243dSDimitry Andric const LVSymbolTableEntry &getSymbolTableEntry(StringRef Name); 224bdd1243dSDimitry Andric LVAddress getSymbolTableAddress(StringRef Name); 225bdd1243dSDimitry Andric LVSectionIndex getSymbolTableIndex(StringRef Name); 226bdd1243dSDimitry Andric bool getSymbolTableIsComdat(StringRef Name); 227bdd1243dSDimitry Andric 228bdd1243dSDimitry Andric LVSectionIndex getSectionIndex(LVScope *Scope) override { 229bdd1243dSDimitry Andric return Scope ? getSymbolTableIndex(Scope->getLinkageName()) 230bdd1243dSDimitry Andric : DotTextSectionIndex; 231bdd1243dSDimitry Andric } 232bdd1243dSDimitry Andric 233bdd1243dSDimitry Andric void print(raw_ostream &OS) const; 234bdd1243dSDimitry Andric 235bdd1243dSDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 236bdd1243dSDimitry Andric void dump() const { print(dbgs()); } 237bdd1243dSDimitry Andric #endif 238bdd1243dSDimitry Andric }; 239bdd1243dSDimitry Andric 240bdd1243dSDimitry Andric } // end namespace logicalview 241bdd1243dSDimitry Andric } // end namespace llvm 242bdd1243dSDimitry Andric 243bdd1243dSDimitry Andric #endif // LLVM_DEBUGINFO_LOGICALVIEW_READERS_LVBINARYREADER_H 244