15ffd83dbSDimitry Andric //===- InputSection.cpp ---------------------------------------------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric 95ffd83dbSDimitry Andric #include "InputSection.h" 10fe6060f1SDimitry Andric #include "ConcatOutputSection.h" 11fe6060f1SDimitry Andric #include "Config.h" 12e8d8bef9SDimitry Andric #include "InputFiles.h" 135ffd83dbSDimitry Andric #include "OutputSegment.h" 145ffd83dbSDimitry Andric #include "Symbols.h" 15fe6060f1SDimitry Andric #include "SyntheticSections.h" 165ffd83dbSDimitry Andric #include "Target.h" 17fe6060f1SDimitry Andric #include "UnwindInfoSection.h" 18e8d8bef9SDimitry Andric #include "Writer.h" 195ffd83dbSDimitry Andric #include "lld/Common/Memory.h" 205ffd83dbSDimitry Andric #include "llvm/Support/Endian.h" 21fe6060f1SDimitry Andric #include "llvm/Support/xxhash.h" 225ffd83dbSDimitry Andric 235ffd83dbSDimitry Andric using namespace llvm; 245ffd83dbSDimitry Andric using namespace llvm::MachO; 255ffd83dbSDimitry Andric using namespace llvm::support; 265ffd83dbSDimitry Andric using namespace lld; 275ffd83dbSDimitry Andric using namespace lld::macho; 285ffd83dbSDimitry Andric 294824e7fdSDimitry Andric // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector 304824e7fdSDimitry Andric // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL), 314824e7fdSDimitry Andric // so account for that. 3281ad6265SDimitry Andric static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection) == 3381ad6265SDimitry Andric sizeof(std::vector<Reloc>) + 104, 34349cc55cSDimitry Andric "Try to minimize ConcatInputSection's size, we create many " 35349cc55cSDimitry Andric "instances of it"); 36349cc55cSDimitry Andric 37fe6060f1SDimitry Andric std::vector<ConcatInputSection *> macho::inputSections; 385ffd83dbSDimitry Andric 39e8d8bef9SDimitry Andric uint64_t InputSection::getFileSize() const { 40fe6060f1SDimitry Andric return isZeroFill(getFlags()) ? 0 : getSize(); 41e8d8bef9SDimitry Andric } 42e8d8bef9SDimitry Andric 43fe6060f1SDimitry Andric uint64_t InputSection::getVA(uint64_t off) const { 44fe6060f1SDimitry Andric return parent->addr + getOffset(off); 45fe6060f1SDimitry Andric } 465ffd83dbSDimitry Andric 47fe6060f1SDimitry Andric static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { 48fe6060f1SDimitry Andric const RelocAttrs &relocAttrs = target->getRelocAttrs(type); 49fe6060f1SDimitry Andric if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) 50fe6060f1SDimitry Andric return sym->resolveBranchVA(); 51fe6060f1SDimitry Andric if (relocAttrs.hasAttr(RelocAttrBits::GOT)) 52fe6060f1SDimitry Andric return sym->resolveGotVA(); 53fe6060f1SDimitry Andric if (relocAttrs.hasAttr(RelocAttrBits::TLV)) 54fe6060f1SDimitry Andric return sym->resolveTlvVA(); 55fe6060f1SDimitry Andric return sym->getVA(); 56fe6060f1SDimitry Andric } 57fe6060f1SDimitry Andric 5881ad6265SDimitry Andric const Defined *InputSection::getContainingSymbol(uint64_t off) const { 5981ad6265SDimitry Andric auto *nextSym = llvm::upper_bound( 6081ad6265SDimitry Andric symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; }); 6181ad6265SDimitry Andric if (nextSym == symbols.begin()) 6281ad6265SDimitry Andric return nullptr; 6381ad6265SDimitry Andric return *std::prev(nextSym); 6481ad6265SDimitry Andric } 6581ad6265SDimitry Andric 6681ad6265SDimitry Andric std::string InputSection::getLocation(uint64_t off) const { 6781ad6265SDimitry Andric // First, try to find a symbol that's near the offset. Use it as a reference 6881ad6265SDimitry Andric // point. 6981ad6265SDimitry Andric if (auto *sym = getContainingSymbol(off)) 70*972a253aSDimitry Andric return (toString(getFile()) + ":(symbol " + toString(*sym) + "+0x" + 7181ad6265SDimitry Andric Twine::utohexstr(off - sym->value) + ")") 7281ad6265SDimitry Andric .str(); 7381ad6265SDimitry Andric 7481ad6265SDimitry Andric // If that fails, use the section itself as a reference point. 7581ad6265SDimitry Andric for (const Subsection &subsec : section.subsections) { 7681ad6265SDimitry Andric if (subsec.isec == this) { 7781ad6265SDimitry Andric off += subsec.offset; 7881ad6265SDimitry Andric break; 79fe6060f1SDimitry Andric } 80fe6060f1SDimitry Andric } 81fe6060f1SDimitry Andric 8281ad6265SDimitry Andric return (toString(getFile()) + ":(" + getName() + "+0x" + 8381ad6265SDimitry Andric Twine::utohexstr(off) + ")") 8481ad6265SDimitry Andric .str(); 8581ad6265SDimitry Andric } 8681ad6265SDimitry Andric 8781ad6265SDimitry Andric std::string InputSection::getSourceLocation(uint64_t off) const { 8881ad6265SDimitry Andric auto *obj = dyn_cast_or_null<ObjFile>(getFile()); 8981ad6265SDimitry Andric if (!obj) 9081ad6265SDimitry Andric return {}; 9181ad6265SDimitry Andric 9281ad6265SDimitry Andric DWARFCache *dwarf = obj->getDwarf(); 9381ad6265SDimitry Andric if (!dwarf) 9481ad6265SDimitry Andric return std::string(); 9581ad6265SDimitry Andric 9681ad6265SDimitry Andric for (const Subsection &subsec : section.subsections) { 9781ad6265SDimitry Andric if (subsec.isec == this) { 9881ad6265SDimitry Andric off += subsec.offset; 9981ad6265SDimitry Andric break; 10081ad6265SDimitry Andric } 10181ad6265SDimitry Andric } 10281ad6265SDimitry Andric 10381ad6265SDimitry Andric auto createMsg = [&](StringRef path, unsigned line) { 10481ad6265SDimitry Andric std::string filename = sys::path::filename(path).str(); 10581ad6265SDimitry Andric std::string lineStr = (":" + Twine(line)).str(); 10681ad6265SDimitry Andric if (filename == path) 10781ad6265SDimitry Andric return filename + lineStr; 10881ad6265SDimitry Andric return (filename + lineStr + " (" + path + lineStr + ")").str(); 10981ad6265SDimitry Andric }; 11081ad6265SDimitry Andric 11181ad6265SDimitry Andric // First, look up a function for a given offset. 11281ad6265SDimitry Andric if (Optional<DILineInfo> li = dwarf->getDILineInfo( 11381ad6265SDimitry Andric section.addr + off, object::SectionedAddress::UndefSection)) 11481ad6265SDimitry Andric return createMsg(li->FileName, li->Line); 11581ad6265SDimitry Andric 11681ad6265SDimitry Andric // If it failed, look up again as a variable. 11781ad6265SDimitry Andric if (const Defined *sym = getContainingSymbol(off)) { 11881ad6265SDimitry Andric // Symbols are generally prefixed with an underscore, which is not included 11981ad6265SDimitry Andric // in the debug information. 12081ad6265SDimitry Andric StringRef symName = sym->getName(); 12181ad6265SDimitry Andric if (!symName.empty() && symName[0] == '_') 12281ad6265SDimitry Andric symName = symName.substr(1); 12381ad6265SDimitry Andric 12481ad6265SDimitry Andric if (Optional<std::pair<std::string, unsigned>> fileLine = 12581ad6265SDimitry Andric dwarf->getVariableLoc(symName)) 12681ad6265SDimitry Andric return createMsg(fileLine->first, fileLine->second); 12781ad6265SDimitry Andric } 12881ad6265SDimitry Andric 12981ad6265SDimitry Andric // Try to get the source file's name from the DWARF information. 13081ad6265SDimitry Andric if (obj->compileUnit) 13181ad6265SDimitry Andric return obj->sourceFile(); 13281ad6265SDimitry Andric 13381ad6265SDimitry Andric return {}; 134fe6060f1SDimitry Andric } 135fe6060f1SDimitry Andric 136fe6060f1SDimitry Andric void ConcatInputSection::foldIdentical(ConcatInputSection *copy) { 137fe6060f1SDimitry Andric align = std::max(align, copy->align); 138fe6060f1SDimitry Andric copy->live = false; 139fe6060f1SDimitry Andric copy->wasCoalesced = true; 140fe6060f1SDimitry Andric copy->replacement = this; 14181ad6265SDimitry Andric for (auto ©Sym : copy->symbols) 14281ad6265SDimitry Andric copySym->wasIdenticalCodeFolded = true; 143349cc55cSDimitry Andric 144349cc55cSDimitry Andric // Merge the sorted vectors of symbols together. 145349cc55cSDimitry Andric auto it = symbols.begin(); 146349cc55cSDimitry Andric for (auto copyIt = copy->symbols.begin(); copyIt != copy->symbols.end();) { 147349cc55cSDimitry Andric if (it == symbols.end()) { 148349cc55cSDimitry Andric symbols.push_back(*copyIt++); 149349cc55cSDimitry Andric it = symbols.end(); 150349cc55cSDimitry Andric } else if ((*it)->value > (*copyIt)->value) { 151349cc55cSDimitry Andric std::swap(*it++, *copyIt); 152349cc55cSDimitry Andric } else { 153349cc55cSDimitry Andric ++it; 154349cc55cSDimitry Andric } 155349cc55cSDimitry Andric } 156349cc55cSDimitry Andric copy->symbols.clear(); 157349cc55cSDimitry Andric 158349cc55cSDimitry Andric // Remove duplicate compact unwind info for symbols at the same address. 159349cc55cSDimitry Andric if (symbols.empty()) 160349cc55cSDimitry Andric return; 161349cc55cSDimitry Andric it = symbols.begin(); 162349cc55cSDimitry Andric uint64_t v = (*it)->value; 163349cc55cSDimitry Andric for (++it; it != symbols.end(); ++it) { 164349cc55cSDimitry Andric Defined *d = *it; 165349cc55cSDimitry Andric if (d->value == v) 166349cc55cSDimitry Andric d->unwindEntry = nullptr; 167349cc55cSDimitry Andric else 168349cc55cSDimitry Andric v = d->value; 169349cc55cSDimitry Andric } 170fe6060f1SDimitry Andric } 171fe6060f1SDimitry Andric 172fe6060f1SDimitry Andric void ConcatInputSection::writeTo(uint8_t *buf) { 173fe6060f1SDimitry Andric assert(!shouldOmitFromOutput()); 174fe6060f1SDimitry Andric 1755ffd83dbSDimitry Andric if (getFileSize() == 0) 1765ffd83dbSDimitry Andric return; 1775ffd83dbSDimitry Andric 1785ffd83dbSDimitry Andric memcpy(buf, data.data(), data.size()); 1795ffd83dbSDimitry Andric 18081ad6265SDimitry Andric std::vector<uint64_t> relocTargets; 18181ad6265SDimitry Andric if (!optimizationHints.empty()) 18281ad6265SDimitry Andric relocTargets.reserve(relocs.size()); 18381ad6265SDimitry Andric 184fe6060f1SDimitry Andric for (size_t i = 0; i < relocs.size(); i++) { 185fe6060f1SDimitry Andric const Reloc &r = relocs[i]; 186fe6060f1SDimitry Andric uint8_t *loc = buf + r.offset; 187e8d8bef9SDimitry Andric uint64_t referentVA = 0; 188fe6060f1SDimitry Andric if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) { 189fe6060f1SDimitry Andric const Symbol *fromSym = r.referent.get<Symbol *>(); 190fe6060f1SDimitry Andric const Reloc &minuend = relocs[++i]; 191fe6060f1SDimitry Andric uint64_t minuendVA; 192fe6060f1SDimitry Andric if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>()) 193fe6060f1SDimitry Andric minuendVA = toSym->getVA() + minuend.addend; 194fe6060f1SDimitry Andric else { 195fe6060f1SDimitry Andric auto *referentIsec = minuend.referent.get<InputSection *>(); 196fe6060f1SDimitry Andric assert(!::shouldOmitFromOutput(referentIsec)); 197fe6060f1SDimitry Andric minuendVA = referentIsec->getVA(minuend.addend); 198fe6060f1SDimitry Andric } 199fe6060f1SDimitry Andric referentVA = minuendVA - fromSym->getVA(); 200fe6060f1SDimitry Andric } else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) { 201fe6060f1SDimitry Andric if (target->hasAttr(r.type, RelocAttrBits::LOAD) && 202fe6060f1SDimitry Andric !referentSym->isInGot()) 203fe6060f1SDimitry Andric target->relaxGotLoad(loc, r.type); 204753f127fSDimitry Andric // For dtrace symbols, do not handle them as normal undefined symbols 205753f127fSDimitry Andric if (referentSym->getName().startswith("___dtrace_")) { 206753f127fSDimitry Andric // Change dtrace call site to pre-defined instructions 207753f127fSDimitry Andric target->handleDtraceReloc(referentSym, r, loc); 208753f127fSDimitry Andric continue; 209753f127fSDimitry Andric } 210fe6060f1SDimitry Andric referentVA = resolveSymbolVA(referentSym, r.type) + r.addend; 2115ffd83dbSDimitry Andric 212fe6060f1SDimitry Andric if (isThreadLocalVariables(getFlags())) { 213e8d8bef9SDimitry Andric // References from thread-local variable sections are treated as offsets 214e8d8bef9SDimitry Andric // relative to the start of the thread-local data memory area, which 215e8d8bef9SDimitry Andric // is initialized via copying all the TLV data sections (which are all 216e8d8bef9SDimitry Andric // contiguous). 217e8d8bef9SDimitry Andric if (isa<Defined>(referentSym)) 218e8d8bef9SDimitry Andric referentVA -= firstTLVDataSection->addr; 2195ffd83dbSDimitry Andric } 220e8d8bef9SDimitry Andric } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { 221fe6060f1SDimitry Andric assert(!::shouldOmitFromOutput(referentIsec)); 222fe6060f1SDimitry Andric referentVA = referentIsec->getVA(r.addend); 223e8d8bef9SDimitry Andric } 224fe6060f1SDimitry Andric target->relocateOne(loc, r, referentVA, getVA() + r.offset); 22581ad6265SDimitry Andric 22681ad6265SDimitry Andric if (!optimizationHints.empty()) 22781ad6265SDimitry Andric relocTargets.push_back(referentVA); 228e8d8bef9SDimitry Andric } 22981ad6265SDimitry Andric 23081ad6265SDimitry Andric if (!optimizationHints.empty()) 23181ad6265SDimitry Andric target->applyOptimizationHints(buf, this, relocTargets); 23281ad6265SDimitry Andric } 23381ad6265SDimitry Andric 23481ad6265SDimitry Andric ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName, 23581ad6265SDimitry Andric StringRef sectName, 23681ad6265SDimitry Andric uint32_t flags, 23781ad6265SDimitry Andric ArrayRef<uint8_t> data, 23881ad6265SDimitry Andric uint32_t align) { 23981ad6265SDimitry Andric Section §ion = 24081ad6265SDimitry Andric *make<Section>(/*file=*/nullptr, segName, sectName, flags, /*addr=*/0); 24181ad6265SDimitry Andric auto isec = make<ConcatInputSection>(section, data, align); 24281ad6265SDimitry Andric section.subsections.push_back({0, isec}); 24381ad6265SDimitry Andric return isec; 244e8d8bef9SDimitry Andric } 245e8d8bef9SDimitry Andric 246fe6060f1SDimitry Andric void CStringInputSection::splitIntoPieces() { 247fe6060f1SDimitry Andric size_t off = 0; 248fe6060f1SDimitry Andric StringRef s = toStringRef(data); 249fe6060f1SDimitry Andric while (!s.empty()) { 250fe6060f1SDimitry Andric size_t end = s.find(0); 251fe6060f1SDimitry Andric if (end == StringRef::npos) 25281ad6265SDimitry Andric fatal(getLocation(off) + ": string is not null terminated"); 253fe6060f1SDimitry Andric size_t size = end + 1; 254fe6060f1SDimitry Andric uint32_t hash = config->dedupLiterals ? xxHash64(s.substr(0, size)) : 0; 255fe6060f1SDimitry Andric pieces.emplace_back(off, hash); 256fe6060f1SDimitry Andric s = s.substr(size); 257fe6060f1SDimitry Andric off += size; 258fe6060f1SDimitry Andric } 259fe6060f1SDimitry Andric } 260fe6060f1SDimitry Andric 261fe6060f1SDimitry Andric StringPiece &CStringInputSection::getStringPiece(uint64_t off) { 262fe6060f1SDimitry Andric if (off >= data.size()) 263fe6060f1SDimitry Andric fatal(toString(this) + ": offset is outside the section"); 264fe6060f1SDimitry Andric 265fe6060f1SDimitry Andric auto it = 266fe6060f1SDimitry Andric partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; }); 267fe6060f1SDimitry Andric return it[-1]; 268fe6060f1SDimitry Andric } 269fe6060f1SDimitry Andric 270fe6060f1SDimitry Andric const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const { 271fe6060f1SDimitry Andric return const_cast<CStringInputSection *>(this)->getStringPiece(off); 272fe6060f1SDimitry Andric } 273fe6060f1SDimitry Andric 274fe6060f1SDimitry Andric uint64_t CStringInputSection::getOffset(uint64_t off) const { 275fe6060f1SDimitry Andric const StringPiece &piece = getStringPiece(off); 276fe6060f1SDimitry Andric uint64_t addend = off - piece.inSecOff; 277fe6060f1SDimitry Andric return piece.outSecOff + addend; 278fe6060f1SDimitry Andric } 279fe6060f1SDimitry Andric 28081ad6265SDimitry Andric WordLiteralInputSection::WordLiteralInputSection(const Section §ion, 281fe6060f1SDimitry Andric ArrayRef<uint8_t> data, 28281ad6265SDimitry Andric uint32_t align) 28381ad6265SDimitry Andric : InputSection(WordLiteralKind, section, data, align) { 28481ad6265SDimitry Andric switch (sectionType(getFlags())) { 285fe6060f1SDimitry Andric case S_4BYTE_LITERALS: 286fe6060f1SDimitry Andric power2LiteralSize = 2; 287fe6060f1SDimitry Andric break; 288fe6060f1SDimitry Andric case S_8BYTE_LITERALS: 289fe6060f1SDimitry Andric power2LiteralSize = 3; 290fe6060f1SDimitry Andric break; 291fe6060f1SDimitry Andric case S_16BYTE_LITERALS: 292fe6060f1SDimitry Andric power2LiteralSize = 4; 293fe6060f1SDimitry Andric break; 294fe6060f1SDimitry Andric default: 295fe6060f1SDimitry Andric llvm_unreachable("invalid literal section type"); 296fe6060f1SDimitry Andric } 297fe6060f1SDimitry Andric 298fe6060f1SDimitry Andric live.resize(data.size() >> power2LiteralSize, !config->deadStrip); 299fe6060f1SDimitry Andric } 300fe6060f1SDimitry Andric 301fe6060f1SDimitry Andric uint64_t WordLiteralInputSection::getOffset(uint64_t off) const { 302fe6060f1SDimitry Andric auto *osec = cast<WordLiteralSection>(parent); 303349cc55cSDimitry Andric const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data()); 304fe6060f1SDimitry Andric switch (sectionType(getFlags())) { 305fe6060f1SDimitry Andric case S_4BYTE_LITERALS: 306349cc55cSDimitry Andric return osec->getLiteral4Offset(buf + (off & ~3LLU)) | (off & 3); 307fe6060f1SDimitry Andric case S_8BYTE_LITERALS: 308349cc55cSDimitry Andric return osec->getLiteral8Offset(buf + (off & ~7LLU)) | (off & 7); 309fe6060f1SDimitry Andric case S_16BYTE_LITERALS: 310349cc55cSDimitry Andric return osec->getLiteral16Offset(buf + (off & ~15LLU)) | (off & 15); 311fe6060f1SDimitry Andric default: 312fe6060f1SDimitry Andric llvm_unreachable("invalid literal section type"); 313fe6060f1SDimitry Andric } 314fe6060f1SDimitry Andric } 315fe6060f1SDimitry Andric 316fe6060f1SDimitry Andric bool macho::isCodeSection(const InputSection *isec) { 317fe6060f1SDimitry Andric uint32_t type = sectionType(isec->getFlags()); 318e8d8bef9SDimitry Andric if (type != S_REGULAR && type != S_COALESCED) 319e8d8bef9SDimitry Andric return false; 320e8d8bef9SDimitry Andric 321fe6060f1SDimitry Andric uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR; 322e8d8bef9SDimitry Andric if (attr == S_ATTR_PURE_INSTRUCTIONS) 323e8d8bef9SDimitry Andric return true; 324e8d8bef9SDimitry Andric 325fe6060f1SDimitry Andric if (isec->getSegName() == segment_names::text) 326fe6060f1SDimitry Andric return StringSwitch<bool>(isec->getName()) 327fe6060f1SDimitry Andric .Cases(section_names::textCoalNt, section_names::staticInit, true) 328e8d8bef9SDimitry Andric .Default(false); 329e8d8bef9SDimitry Andric 330e8d8bef9SDimitry Andric return false; 331e8d8bef9SDimitry Andric } 332e8d8bef9SDimitry Andric 333fe6060f1SDimitry Andric bool macho::isCfStringSection(const InputSection *isec) { 334fe6060f1SDimitry Andric return isec->getName() == section_names::cfString && 335fe6060f1SDimitry Andric isec->getSegName() == segment_names::data; 336fe6060f1SDimitry Andric } 337fe6060f1SDimitry Andric 33881ad6265SDimitry Andric bool macho::isClassRefsSection(const InputSection *isec) { 33981ad6265SDimitry Andric return isec->getName() == section_names::objcClassRefs && 34081ad6265SDimitry Andric isec->getSegName() == segment_names::data; 34181ad6265SDimitry Andric } 34281ad6265SDimitry Andric 34381ad6265SDimitry Andric bool macho::isEhFrameSection(const InputSection *isec) { 34481ad6265SDimitry Andric return isec->getName() == section_names::ehFrame && 34581ad6265SDimitry Andric isec->getSegName() == segment_names::text; 34681ad6265SDimitry Andric } 34781ad6265SDimitry Andric 348fcaf7f86SDimitry Andric bool macho::isGccExceptTabSection(const InputSection *isec) { 349fcaf7f86SDimitry Andric return isec->getName() == section_names::gccExceptTab && 350fcaf7f86SDimitry Andric isec->getSegName() == segment_names::text; 351fcaf7f86SDimitry Andric } 352fcaf7f86SDimitry Andric 353e8d8bef9SDimitry Andric std::string lld::toString(const InputSection *isec) { 354fe6060f1SDimitry Andric return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str(); 3555ffd83dbSDimitry Andric } 356