1fe6060f1SDimitry Andric //===- MapFile.cpp --------------------------------------------------------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9bdd1243dSDimitry Andric // This file implements the -map option, which maps address ranges to their 10bdd1243dSDimitry Andric // respective contents, plus the input file these contents were originally from. 11bdd1243dSDimitry Andric // The contents (typically symbols) are listed in address order. Dead-stripped 12bdd1243dSDimitry Andric // contents are included as well. 13fe6060f1SDimitry Andric // 14fe6060f1SDimitry Andric // # Path: test 15fe6060f1SDimitry Andric // # Arch: x86_84 16fe6060f1SDimitry Andric // # Object files: 17fe6060f1SDimitry Andric // [ 0] linker synthesized 18fe6060f1SDimitry Andric // [ 1] a.o 19fe6060f1SDimitry Andric // # Sections: 20fe6060f1SDimitry Andric // # Address Size Segment Section 21fe6060f1SDimitry Andric // 0x1000005C0 0x0000004C __TEXT __text 22fe6060f1SDimitry Andric // # Symbols: 23bdd1243dSDimitry Andric // # Address Size File Name 24bdd1243dSDimitry Andric // 0x1000005C0 0x00000001 [ 1] _main 25bdd1243dSDimitry Andric // # Dead Stripped Symbols: 26bdd1243dSDimitry Andric // # Size File Name 27bdd1243dSDimitry Andric // <<dead>> 0x00000001 [ 1] _foo 28fe6060f1SDimitry Andric // 29fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 30fe6060f1SDimitry Andric 31fe6060f1SDimitry Andric #include "MapFile.h" 32bdd1243dSDimitry Andric #include "ConcatOutputSection.h" 33fe6060f1SDimitry Andric #include "Config.h" 34fe6060f1SDimitry Andric #include "InputFiles.h" 35fe6060f1SDimitry Andric #include "InputSection.h" 36fe6060f1SDimitry Andric #include "OutputSegment.h" 37fe6060f1SDimitry Andric #include "Symbols.h" 3881ad6265SDimitry Andric #include "SyntheticSections.h" 39fe6060f1SDimitry Andric #include "Target.h" 40bdd1243dSDimitry Andric #include "lld/Common/ErrorHandler.h" 41bdd1243dSDimitry Andric #include "llvm/ADT/DenseMap.h" 42fe6060f1SDimitry Andric #include "llvm/Support/Parallel.h" 43fe6060f1SDimitry Andric #include "llvm/Support/TimeProfiler.h" 44fe6060f1SDimitry Andric 45fe6060f1SDimitry Andric using namespace llvm; 46fe6060f1SDimitry Andric using namespace llvm::sys; 47fe6060f1SDimitry Andric using namespace lld; 48fe6060f1SDimitry Andric using namespace lld::macho; 49fe6060f1SDimitry Andric 50bdd1243dSDimitry Andric struct CStringInfo { 51bdd1243dSDimitry Andric uint32_t fileIndex; 52bdd1243dSDimitry Andric StringRef str; 53bdd1243dSDimitry Andric }; 54bdd1243dSDimitry Andric 55bdd1243dSDimitry Andric struct MapInfo { 56bdd1243dSDimitry Andric SmallVector<InputFile *> files; 57bdd1243dSDimitry Andric SmallVector<Defined *> deadSymbols; 58bdd1243dSDimitry Andric DenseMap<const OutputSection *, 59bdd1243dSDimitry Andric SmallVector<std::pair<uint64_t /*addr*/, CStringInfo>>> 60bdd1243dSDimitry Andric liveCStringsForSection; 61bdd1243dSDimitry Andric SmallVector<CStringInfo> deadCStrings; 62bdd1243dSDimitry Andric }; 63bdd1243dSDimitry Andric 64bdd1243dSDimitry Andric static MapInfo gatherMapInfo() { 65bdd1243dSDimitry Andric MapInfo info; 66bdd1243dSDimitry Andric for (InputFile *file : inputFiles) { 67bdd1243dSDimitry Andric bool isReferencedFile = false; 68bdd1243dSDimitry Andric 69bdd1243dSDimitry Andric if (isa<ObjFile>(file) || isa<BitcodeFile>(file)) { 70bdd1243dSDimitry Andric uint32_t fileIndex = info.files.size() + 1; 71bdd1243dSDimitry Andric 72bdd1243dSDimitry Andric // Gather the dead symbols. We don't have to bother with the live ones 73bdd1243dSDimitry Andric // because we will pick them up as we iterate over the OutputSections 74bdd1243dSDimitry Andric // later. 75bdd1243dSDimitry Andric for (Symbol *sym : file->symbols) { 76fe6060f1SDimitry Andric if (auto *d = dyn_cast_or_null<Defined>(sym)) 77bdd1243dSDimitry Andric // Only emit the prevailing definition of a symbol. Also, don't emit 78bdd1243dSDimitry Andric // the symbol if it is part of a cstring section (we use the literal 79bdd1243dSDimitry Andric // value instead, similar to ld64) 80*0fca6ea1SDimitry Andric if (d->isec() && d->getFile() == file && 81*0fca6ea1SDimitry Andric !isa<CStringInputSection>(d->isec())) { 82bdd1243dSDimitry Andric isReferencedFile = true; 83bdd1243dSDimitry Andric if (!d->isLive()) 84bdd1243dSDimitry Andric info.deadSymbols.push_back(d); 85bdd1243dSDimitry Andric } 86bdd1243dSDimitry Andric } 87bdd1243dSDimitry Andric 88bdd1243dSDimitry Andric // Gather all the cstrings (both live and dead). A CString(Output)Section 89bdd1243dSDimitry Andric // doesn't provide us a way of figuring out which InputSections its 90bdd1243dSDimitry Andric // cstring contents came from, so we need to build up that mapping here. 91bdd1243dSDimitry Andric for (const Section *sec : file->sections) { 92bdd1243dSDimitry Andric for (const Subsection &subsec : sec->subsections) { 93bdd1243dSDimitry Andric if (auto isec = dyn_cast<CStringInputSection>(subsec.isec)) { 94bdd1243dSDimitry Andric auto &liveCStrings = info.liveCStringsForSection[isec->parent]; 95bdd1243dSDimitry Andric for (const auto &[i, piece] : llvm::enumerate(isec->pieces)) { 96bdd1243dSDimitry Andric if (piece.live) 97bdd1243dSDimitry Andric liveCStrings.push_back({isec->parent->addr + piece.outSecOff, 98bdd1243dSDimitry Andric {fileIndex, isec->getStringRef(i)}}); 99bdd1243dSDimitry Andric else 100bdd1243dSDimitry Andric info.deadCStrings.push_back({fileIndex, isec->getStringRef(i)}); 101bdd1243dSDimitry Andric isReferencedFile = true; 102bdd1243dSDimitry Andric } 1031fd87a68SDimitry Andric } else { 10481ad6265SDimitry Andric break; 10581ad6265SDimitry Andric } 10681ad6265SDimitry Andric } 107bdd1243dSDimitry Andric } 108bdd1243dSDimitry Andric } else if (const auto *dylibFile = dyn_cast<DylibFile>(file)) { 109bdd1243dSDimitry Andric isReferencedFile = dylibFile->isReferenced(); 110bdd1243dSDimitry Andric } 111fe6060f1SDimitry Andric 112bdd1243dSDimitry Andric if (isReferencedFile) 113bdd1243dSDimitry Andric info.files.push_back(file); 114bdd1243dSDimitry Andric } 115bdd1243dSDimitry Andric 116bdd1243dSDimitry Andric // cstrings are not stored in sorted order in their OutputSections, so we sort 117bdd1243dSDimitry Andric // them here. 118bdd1243dSDimitry Andric for (auto &liveCStrings : info.liveCStringsForSection) 119bdd1243dSDimitry Andric parallelSort(liveCStrings.second, [](const auto &p1, const auto &p2) { 120bdd1243dSDimitry Andric return p1.first < p2.first; 121bdd1243dSDimitry Andric }); 122bdd1243dSDimitry Andric return info; 123bdd1243dSDimitry Andric } 124bdd1243dSDimitry Andric 12506c3fb27SDimitry Andric // We use this instead of `toString(const InputFile *)` as we don't want to 12606c3fb27SDimitry Andric // include the dylib install name in our output. 12706c3fb27SDimitry Andric static void printFileName(raw_fd_ostream &os, const InputFile *f) { 12806c3fb27SDimitry Andric if (f->archiveName.empty()) 12906c3fb27SDimitry Andric os << f->getName(); 13006c3fb27SDimitry Andric else 13106c3fb27SDimitry Andric os << f->archiveName << "(" << path::filename(f->getName()) + ")"; 13206c3fb27SDimitry Andric } 13306c3fb27SDimitry Andric 134bdd1243dSDimitry Andric // For printing the contents of the __stubs and __la_symbol_ptr sections. 13506c3fb27SDimitry Andric static void printStubsEntries( 136bdd1243dSDimitry Andric raw_fd_ostream &os, 137bdd1243dSDimitry Andric const DenseMap<lld::macho::InputFile *, uint32_t> &readerToFileOrdinal, 138bdd1243dSDimitry Andric const OutputSection *osec, size_t entrySize) { 139bdd1243dSDimitry Andric for (const Symbol *sym : in.stubs->getEntries()) 140bdd1243dSDimitry Andric os << format("0x%08llX\t0x%08zX\t[%3u] %s\n", 141bdd1243dSDimitry Andric osec->addr + sym->stubsIndex * entrySize, entrySize, 142bdd1243dSDimitry Andric readerToFileOrdinal.lookup(sym->getFile()), 143bdd1243dSDimitry Andric sym->getName().str().data()); 144bdd1243dSDimitry Andric } 145bdd1243dSDimitry Andric 14606c3fb27SDimitry Andric static void printNonLazyPointerSection(raw_fd_ostream &os, 147bdd1243dSDimitry Andric NonLazyPointerSectionBase *osec) { 148bdd1243dSDimitry Andric // ld64 considers stubs to belong to particular files, but considers GOT 149bdd1243dSDimitry Andric // entries to be linker-synthesized. Not sure why they made that decision, but 150bdd1243dSDimitry Andric // I think we can follow suit unless there's demand for better symbol-to-file 151bdd1243dSDimitry Andric // associations. 152bdd1243dSDimitry Andric for (const Symbol *sym : osec->getEntries()) 153bdd1243dSDimitry Andric os << format("0x%08llX\t0x%08zX\t[ 0] non-lazy-pointer-to-local: %s\n", 154bdd1243dSDimitry Andric osec->addr + sym->gotIndex * target->wordSize, 155bdd1243dSDimitry Andric target->wordSize, sym->getName().str().data()); 156fe6060f1SDimitry Andric } 157fe6060f1SDimitry Andric 158*0fca6ea1SDimitry Andric static uint64_t getSymSizeForMap(Defined *sym) { 159*0fca6ea1SDimitry Andric if (sym->wasIdenticalCodeFolded) 160*0fca6ea1SDimitry Andric return 0; 161*0fca6ea1SDimitry Andric return sym->size; 162*0fca6ea1SDimitry Andric } 163*0fca6ea1SDimitry Andric 164fe6060f1SDimitry Andric void macho::writeMapFile() { 165fe6060f1SDimitry Andric if (config->mapFile.empty()) 166fe6060f1SDimitry Andric return; 167fe6060f1SDimitry Andric 168fe6060f1SDimitry Andric TimeTraceScope timeScope("Write map file"); 169fe6060f1SDimitry Andric 170fe6060f1SDimitry Andric // Open a map file for writing. 171fe6060f1SDimitry Andric std::error_code ec; 172fe6060f1SDimitry Andric raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None); 173fe6060f1SDimitry Andric if (ec) { 174fe6060f1SDimitry Andric error("cannot open " + config->mapFile + ": " + ec.message()); 175fe6060f1SDimitry Andric return; 176fe6060f1SDimitry Andric } 177fe6060f1SDimitry Andric 178fe6060f1SDimitry Andric os << format("# Path: %s\n", config->outputFile.str().c_str()); 179fe6060f1SDimitry Andric os << format("# Arch: %s\n", 180fe6060f1SDimitry Andric getArchitectureName(config->arch()).str().c_str()); 181fe6060f1SDimitry Andric 182bdd1243dSDimitry Andric MapInfo info = gatherMapInfo(); 183bdd1243dSDimitry Andric 184fe6060f1SDimitry Andric os << "# Object files:\n"; 185fe6060f1SDimitry Andric os << format("[%3u] %s\n", 0, (const char *)"linker synthesized"); 186fe6060f1SDimitry Andric uint32_t fileIndex = 1; 187fe6060f1SDimitry Andric DenseMap<lld::macho::InputFile *, uint32_t> readerToFileOrdinal; 188bdd1243dSDimitry Andric for (InputFile *file : info.files) { 18906c3fb27SDimitry Andric os << format("[%3u] ", fileIndex); 19006c3fb27SDimitry Andric printFileName(os, file); 19106c3fb27SDimitry Andric os << "\n"; 192fe6060f1SDimitry Andric readerToFileOrdinal[file] = fileIndex++; 193fe6060f1SDimitry Andric } 194fe6060f1SDimitry Andric 195fe6060f1SDimitry Andric os << "# Sections:\n"; 196fe6060f1SDimitry Andric os << "# Address\tSize \tSegment\tSection\n"; 197fe6060f1SDimitry Andric for (OutputSegment *seg : outputSegments) 198fe6060f1SDimitry Andric for (OutputSection *osec : seg->getSections()) { 199fe6060f1SDimitry Andric if (osec->isHidden()) 200fe6060f1SDimitry Andric continue; 201fe6060f1SDimitry Andric 202fe6060f1SDimitry Andric os << format("0x%08llX\t0x%08llX\t%s\t%s\n", osec->addr, osec->getSize(), 203fe6060f1SDimitry Andric seg->name.str().c_str(), osec->name.str().c_str()); 204fe6060f1SDimitry Andric } 205fe6060f1SDimitry Andric 206*0fca6ea1SDimitry Andric // Shared function to print an array of symbols. 207*0fca6ea1SDimitry Andric auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) { 208*0fca6ea1SDimitry Andric for (const ConcatInputSection *isec : arr) { 209*0fca6ea1SDimitry Andric for (Defined *sym : isec->symbols) { 210*0fca6ea1SDimitry Andric if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0)) 211*0fca6ea1SDimitry Andric os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(), 212*0fca6ea1SDimitry Andric getSymSizeForMap(sym), 213*0fca6ea1SDimitry Andric readerToFileOrdinal[sym->getFile()], 214*0fca6ea1SDimitry Andric sym->getName().str().data()); 215*0fca6ea1SDimitry Andric } 216*0fca6ea1SDimitry Andric } 217*0fca6ea1SDimitry Andric }; 218*0fca6ea1SDimitry Andric 219fe6060f1SDimitry Andric os << "# Symbols:\n"; 220bdd1243dSDimitry Andric os << "# Address\tSize \tFile Name\n"; 221bdd1243dSDimitry Andric for (const OutputSegment *seg : outputSegments) { 222bdd1243dSDimitry Andric for (const OutputSection *osec : seg->getSections()) { 223bdd1243dSDimitry Andric if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) { 224*0fca6ea1SDimitry Andric printIsecArrSyms(concatOsec->inputs); 225bdd1243dSDimitry Andric } else if (osec == in.cStringSection || osec == in.objcMethnameSection) { 226bdd1243dSDimitry Andric const auto &liveCStrings = info.liveCStringsForSection.lookup(osec); 227bdd1243dSDimitry Andric uint64_t lastAddr = 0; // strings will never start at address 0, so this 228bdd1243dSDimitry Andric // is a sentinel value 229bdd1243dSDimitry Andric for (const auto &[addr, info] : liveCStrings) { 230bdd1243dSDimitry Andric uint64_t size = 0; 231bdd1243dSDimitry Andric if (addr != lastAddr) 232bdd1243dSDimitry Andric size = info.str.size() + 1; // include null terminator 233bdd1243dSDimitry Andric lastAddr = addr; 234bdd1243dSDimitry Andric os << format("0x%08llX\t0x%08llX\t[%3u] literal string: ", addr, size, 235bdd1243dSDimitry Andric info.fileIndex); 236bdd1243dSDimitry Andric os.write_escaped(info.str) << "\n"; 237bdd1243dSDimitry Andric } 238bdd1243dSDimitry Andric } else if (osec == (void *)in.unwindInfo) { 239bdd1243dSDimitry Andric os << format("0x%08llX\t0x%08llX\t[ 0] compact unwind info\n", 240bdd1243dSDimitry Andric osec->addr, osec->getSize()); 241bdd1243dSDimitry Andric } else if (osec == in.stubs) { 242bdd1243dSDimitry Andric printStubsEntries(os, readerToFileOrdinal, osec, target->stubSize); 243bdd1243dSDimitry Andric } else if (osec == in.lazyPointers) { 244bdd1243dSDimitry Andric printStubsEntries(os, readerToFileOrdinal, osec, target->wordSize); 245bdd1243dSDimitry Andric } else if (osec == in.stubHelper) { 246bdd1243dSDimitry Andric // yes, ld64 calls it "helper helper"... 247bdd1243dSDimitry Andric os << format("0x%08llX\t0x%08llX\t[ 0] helper helper\n", osec->addr, 248bdd1243dSDimitry Andric osec->getSize()); 249bdd1243dSDimitry Andric } else if (osec == in.got) { 250bdd1243dSDimitry Andric printNonLazyPointerSection(os, in.got); 251bdd1243dSDimitry Andric } else if (osec == in.tlvPointers) { 252bdd1243dSDimitry Andric printNonLazyPointerSection(os, in.tlvPointers); 253*0fca6ea1SDimitry Andric } else if (osec == in.objcMethList) { 254*0fca6ea1SDimitry Andric printIsecArrSyms(in.objcMethList->getInputs()); 255bdd1243dSDimitry Andric } 256bdd1243dSDimitry Andric // TODO print other synthetic sections 257bdd1243dSDimitry Andric } 258fe6060f1SDimitry Andric } 259fe6060f1SDimitry Andric 2601fd87a68SDimitry Andric if (config->deadStrip) { 2611fd87a68SDimitry Andric os << "# Dead Stripped Symbols:\n"; 262bdd1243dSDimitry Andric os << "# \tSize \tFile Name\n"; 263bdd1243dSDimitry Andric for (Defined *sym : info.deadSymbols) { 2641fd87a68SDimitry Andric assert(!sym->isLive()); 265*0fca6ea1SDimitry Andric os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", getSymSizeForMap(sym), 266bdd1243dSDimitry Andric readerToFileOrdinal[sym->getFile()], 267bdd1243dSDimitry Andric sym->getName().str().data()); 268bdd1243dSDimitry Andric } 269bdd1243dSDimitry Andric for (CStringInfo &cstrInfo : info.deadCStrings) { 270bdd1243dSDimitry Andric os << format("<<dead>>\t0x%08zX\t[%3u] literal string: ", 271bdd1243dSDimitry Andric cstrInfo.str.size() + 1, cstrInfo.fileIndex); 272bdd1243dSDimitry Andric os.write_escaped(cstrInfo.str) << "\n"; 2731fd87a68SDimitry Andric } 2741fd87a68SDimitry Andric } 275fe6060f1SDimitry Andric } 276