15ffd83dbSDimitry Andric //===- InputSection.cpp ---------------------------------------------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric 95ffd83dbSDimitry Andric #include "InputSection.h" 10fe6060f1SDimitry Andric #include "ConcatOutputSection.h" 11fe6060f1SDimitry Andric #include "Config.h" 12e8d8bef9SDimitry Andric #include "InputFiles.h" 135ffd83dbSDimitry Andric #include "OutputSegment.h" 145ffd83dbSDimitry Andric #include "Symbols.h" 15fe6060f1SDimitry Andric #include "SyntheticSections.h" 165ffd83dbSDimitry Andric #include "Target.h" 17fe6060f1SDimitry Andric #include "UnwindInfoSection.h" 18e8d8bef9SDimitry Andric #include "Writer.h" 195ffd83dbSDimitry Andric #include "lld/Common/Memory.h" 205ffd83dbSDimitry Andric #include "llvm/Support/Endian.h" 21fe6060f1SDimitry Andric #include "llvm/Support/xxhash.h" 225ffd83dbSDimitry Andric 235ffd83dbSDimitry Andric using namespace llvm; 245ffd83dbSDimitry Andric using namespace llvm::MachO; 255ffd83dbSDimitry Andric using namespace llvm::support; 265ffd83dbSDimitry Andric using namespace lld; 275ffd83dbSDimitry Andric using namespace lld::macho; 285ffd83dbSDimitry Andric 294824e7fdSDimitry Andric // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector 304824e7fdSDimitry Andric // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL), 314824e7fdSDimitry Andric // so account for that. 32*81ad6265SDimitry Andric static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection) == 33*81ad6265SDimitry Andric sizeof(std::vector<Reloc>) + 104, 34349cc55cSDimitry Andric "Try to minimize ConcatInputSection's size, we create many " 35349cc55cSDimitry Andric "instances of it"); 36349cc55cSDimitry Andric 37fe6060f1SDimitry Andric std::vector<ConcatInputSection *> macho::inputSections; 385ffd83dbSDimitry Andric 39e8d8bef9SDimitry Andric uint64_t InputSection::getFileSize() const { 40fe6060f1SDimitry Andric return isZeroFill(getFlags()) ? 0 : getSize(); 41e8d8bef9SDimitry Andric } 42e8d8bef9SDimitry Andric 43fe6060f1SDimitry Andric uint64_t InputSection::getVA(uint64_t off) const { 44fe6060f1SDimitry Andric return parent->addr + getOffset(off); 45fe6060f1SDimitry Andric } 465ffd83dbSDimitry Andric 47fe6060f1SDimitry Andric static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { 48fe6060f1SDimitry Andric const RelocAttrs &relocAttrs = target->getRelocAttrs(type); 49fe6060f1SDimitry Andric if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) 50fe6060f1SDimitry Andric return sym->resolveBranchVA(); 51fe6060f1SDimitry Andric if (relocAttrs.hasAttr(RelocAttrBits::GOT)) 52fe6060f1SDimitry Andric return sym->resolveGotVA(); 53fe6060f1SDimitry Andric if (relocAttrs.hasAttr(RelocAttrBits::TLV)) 54fe6060f1SDimitry Andric return sym->resolveTlvVA(); 55fe6060f1SDimitry Andric return sym->getVA(); 56fe6060f1SDimitry Andric } 57fe6060f1SDimitry Andric 58*81ad6265SDimitry Andric const Defined *InputSection::getContainingSymbol(uint64_t off) const { 59*81ad6265SDimitry Andric auto *nextSym = llvm::upper_bound( 60*81ad6265SDimitry Andric symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; }); 61*81ad6265SDimitry Andric if (nextSym == symbols.begin()) 62*81ad6265SDimitry Andric return nullptr; 63*81ad6265SDimitry Andric return *std::prev(nextSym); 64*81ad6265SDimitry Andric } 65*81ad6265SDimitry Andric 66*81ad6265SDimitry Andric std::string InputSection::getLocation(uint64_t off) const { 67*81ad6265SDimitry Andric // First, try to find a symbol that's near the offset. Use it as a reference 68*81ad6265SDimitry Andric // point. 69*81ad6265SDimitry Andric if (auto *sym = getContainingSymbol(off)) 70*81ad6265SDimitry Andric return (toString(getFile()) + ":(symbol " + sym->getName() + "+0x" + 71*81ad6265SDimitry Andric Twine::utohexstr(off - sym->value) + ")") 72*81ad6265SDimitry Andric .str(); 73*81ad6265SDimitry Andric 74*81ad6265SDimitry Andric // If that fails, use the section itself as a reference point. 75*81ad6265SDimitry Andric for (const Subsection &subsec : section.subsections) { 76*81ad6265SDimitry Andric if (subsec.isec == this) { 77*81ad6265SDimitry Andric off += subsec.offset; 78*81ad6265SDimitry Andric break; 79fe6060f1SDimitry Andric } 80fe6060f1SDimitry Andric } 81fe6060f1SDimitry Andric 82*81ad6265SDimitry Andric return (toString(getFile()) + ":(" + getName() + "+0x" + 83*81ad6265SDimitry Andric Twine::utohexstr(off) + ")") 84*81ad6265SDimitry Andric .str(); 85*81ad6265SDimitry Andric } 86*81ad6265SDimitry Andric 87*81ad6265SDimitry Andric std::string InputSection::getSourceLocation(uint64_t off) const { 88*81ad6265SDimitry Andric auto *obj = dyn_cast_or_null<ObjFile>(getFile()); 89*81ad6265SDimitry Andric if (!obj) 90*81ad6265SDimitry Andric return {}; 91*81ad6265SDimitry Andric 92*81ad6265SDimitry Andric DWARFCache *dwarf = obj->getDwarf(); 93*81ad6265SDimitry Andric if (!dwarf) 94*81ad6265SDimitry Andric return std::string(); 95*81ad6265SDimitry Andric 96*81ad6265SDimitry Andric for (const Subsection &subsec : section.subsections) { 97*81ad6265SDimitry Andric if (subsec.isec == this) { 98*81ad6265SDimitry Andric off += subsec.offset; 99*81ad6265SDimitry Andric break; 100*81ad6265SDimitry Andric } 101*81ad6265SDimitry Andric } 102*81ad6265SDimitry Andric 103*81ad6265SDimitry Andric auto createMsg = [&](StringRef path, unsigned line) { 104*81ad6265SDimitry Andric std::string filename = sys::path::filename(path).str(); 105*81ad6265SDimitry Andric std::string lineStr = (":" + Twine(line)).str(); 106*81ad6265SDimitry Andric if (filename == path) 107*81ad6265SDimitry Andric return filename + lineStr; 108*81ad6265SDimitry Andric return (filename + lineStr + " (" + path + lineStr + ")").str(); 109*81ad6265SDimitry Andric }; 110*81ad6265SDimitry Andric 111*81ad6265SDimitry Andric // First, look up a function for a given offset. 112*81ad6265SDimitry Andric if (Optional<DILineInfo> li = dwarf->getDILineInfo( 113*81ad6265SDimitry Andric section.addr + off, object::SectionedAddress::UndefSection)) 114*81ad6265SDimitry Andric return createMsg(li->FileName, li->Line); 115*81ad6265SDimitry Andric 116*81ad6265SDimitry Andric // If it failed, look up again as a variable. 117*81ad6265SDimitry Andric if (const Defined *sym = getContainingSymbol(off)) { 118*81ad6265SDimitry Andric // Symbols are generally prefixed with an underscore, which is not included 119*81ad6265SDimitry Andric // in the debug information. 120*81ad6265SDimitry Andric StringRef symName = sym->getName(); 121*81ad6265SDimitry Andric if (!symName.empty() && symName[0] == '_') 122*81ad6265SDimitry Andric symName = symName.substr(1); 123*81ad6265SDimitry Andric 124*81ad6265SDimitry Andric if (Optional<std::pair<std::string, unsigned>> fileLine = 125*81ad6265SDimitry Andric dwarf->getVariableLoc(symName)) 126*81ad6265SDimitry Andric return createMsg(fileLine->first, fileLine->second); 127*81ad6265SDimitry Andric } 128*81ad6265SDimitry Andric 129*81ad6265SDimitry Andric // Try to get the source file's name from the DWARF information. 130*81ad6265SDimitry Andric if (obj->compileUnit) 131*81ad6265SDimitry Andric return obj->sourceFile(); 132*81ad6265SDimitry Andric 133*81ad6265SDimitry Andric return {}; 134fe6060f1SDimitry Andric } 135fe6060f1SDimitry Andric 136fe6060f1SDimitry Andric void ConcatInputSection::foldIdentical(ConcatInputSection *copy) { 137fe6060f1SDimitry Andric align = std::max(align, copy->align); 138fe6060f1SDimitry Andric copy->live = false; 139fe6060f1SDimitry Andric copy->wasCoalesced = true; 140fe6060f1SDimitry Andric copy->replacement = this; 141*81ad6265SDimitry Andric for (auto ©Sym : copy->symbols) 142*81ad6265SDimitry Andric copySym->wasIdenticalCodeFolded = true; 143349cc55cSDimitry Andric 144349cc55cSDimitry Andric // Merge the sorted vectors of symbols together. 145349cc55cSDimitry Andric auto it = symbols.begin(); 146349cc55cSDimitry Andric for (auto copyIt = copy->symbols.begin(); copyIt != copy->symbols.end();) { 147349cc55cSDimitry Andric if (it == symbols.end()) { 148349cc55cSDimitry Andric symbols.push_back(*copyIt++); 149349cc55cSDimitry Andric it = symbols.end(); 150349cc55cSDimitry Andric } else if ((*it)->value > (*copyIt)->value) { 151349cc55cSDimitry Andric std::swap(*it++, *copyIt); 152349cc55cSDimitry Andric } else { 153349cc55cSDimitry Andric ++it; 154349cc55cSDimitry Andric } 155349cc55cSDimitry Andric } 156349cc55cSDimitry Andric copy->symbols.clear(); 157349cc55cSDimitry Andric 158349cc55cSDimitry Andric // Remove duplicate compact unwind info for symbols at the same address. 159349cc55cSDimitry Andric if (symbols.empty()) 160349cc55cSDimitry Andric return; 161349cc55cSDimitry Andric it = symbols.begin(); 162349cc55cSDimitry Andric uint64_t v = (*it)->value; 163349cc55cSDimitry Andric for (++it; it != symbols.end(); ++it) { 164349cc55cSDimitry Andric Defined *d = *it; 165349cc55cSDimitry Andric if (d->value == v) 166349cc55cSDimitry Andric d->unwindEntry = nullptr; 167349cc55cSDimitry Andric else 168349cc55cSDimitry Andric v = d->value; 169349cc55cSDimitry Andric } 170fe6060f1SDimitry Andric } 171fe6060f1SDimitry Andric 172fe6060f1SDimitry Andric void ConcatInputSection::writeTo(uint8_t *buf) { 173fe6060f1SDimitry Andric assert(!shouldOmitFromOutput()); 174fe6060f1SDimitry Andric 1755ffd83dbSDimitry Andric if (getFileSize() == 0) 1765ffd83dbSDimitry Andric return; 1775ffd83dbSDimitry Andric 1785ffd83dbSDimitry Andric memcpy(buf, data.data(), data.size()); 1795ffd83dbSDimitry Andric 180*81ad6265SDimitry Andric std::vector<uint64_t> relocTargets; 181*81ad6265SDimitry Andric if (!optimizationHints.empty()) 182*81ad6265SDimitry Andric relocTargets.reserve(relocs.size()); 183*81ad6265SDimitry Andric 184fe6060f1SDimitry Andric for (size_t i = 0; i < relocs.size(); i++) { 185fe6060f1SDimitry Andric const Reloc &r = relocs[i]; 186fe6060f1SDimitry Andric uint8_t *loc = buf + r.offset; 187e8d8bef9SDimitry Andric uint64_t referentVA = 0; 188fe6060f1SDimitry Andric if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) { 189fe6060f1SDimitry Andric const Symbol *fromSym = r.referent.get<Symbol *>(); 190fe6060f1SDimitry Andric const Reloc &minuend = relocs[++i]; 191fe6060f1SDimitry Andric uint64_t minuendVA; 192fe6060f1SDimitry Andric if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>()) 193fe6060f1SDimitry Andric minuendVA = toSym->getVA() + minuend.addend; 194fe6060f1SDimitry Andric else { 195fe6060f1SDimitry Andric auto *referentIsec = minuend.referent.get<InputSection *>(); 196fe6060f1SDimitry Andric assert(!::shouldOmitFromOutput(referentIsec)); 197fe6060f1SDimitry Andric minuendVA = referentIsec->getVA(minuend.addend); 198fe6060f1SDimitry Andric } 199fe6060f1SDimitry Andric referentVA = minuendVA - fromSym->getVA(); 200fe6060f1SDimitry Andric } else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) { 201fe6060f1SDimitry Andric if (target->hasAttr(r.type, RelocAttrBits::LOAD) && 202fe6060f1SDimitry Andric !referentSym->isInGot()) 203fe6060f1SDimitry Andric target->relaxGotLoad(loc, r.type); 204fe6060f1SDimitry Andric referentVA = resolveSymbolVA(referentSym, r.type) + r.addend; 2055ffd83dbSDimitry Andric 206fe6060f1SDimitry Andric if (isThreadLocalVariables(getFlags())) { 207e8d8bef9SDimitry Andric // References from thread-local variable sections are treated as offsets 208e8d8bef9SDimitry Andric // relative to the start of the thread-local data memory area, which 209e8d8bef9SDimitry Andric // is initialized via copying all the TLV data sections (which are all 210e8d8bef9SDimitry Andric // contiguous). 211e8d8bef9SDimitry Andric if (isa<Defined>(referentSym)) 212e8d8bef9SDimitry Andric referentVA -= firstTLVDataSection->addr; 2135ffd83dbSDimitry Andric } 214e8d8bef9SDimitry Andric } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { 215fe6060f1SDimitry Andric assert(!::shouldOmitFromOutput(referentIsec)); 216fe6060f1SDimitry Andric referentVA = referentIsec->getVA(r.addend); 217e8d8bef9SDimitry Andric } 218fe6060f1SDimitry Andric target->relocateOne(loc, r, referentVA, getVA() + r.offset); 219*81ad6265SDimitry Andric 220*81ad6265SDimitry Andric if (!optimizationHints.empty()) 221*81ad6265SDimitry Andric relocTargets.push_back(referentVA); 222e8d8bef9SDimitry Andric } 223*81ad6265SDimitry Andric 224*81ad6265SDimitry Andric if (!optimizationHints.empty()) 225*81ad6265SDimitry Andric target->applyOptimizationHints(buf, this, relocTargets); 226*81ad6265SDimitry Andric } 227*81ad6265SDimitry Andric 228*81ad6265SDimitry Andric ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName, 229*81ad6265SDimitry Andric StringRef sectName, 230*81ad6265SDimitry Andric uint32_t flags, 231*81ad6265SDimitry Andric ArrayRef<uint8_t> data, 232*81ad6265SDimitry Andric uint32_t align) { 233*81ad6265SDimitry Andric Section §ion = 234*81ad6265SDimitry Andric *make<Section>(/*file=*/nullptr, segName, sectName, flags, /*addr=*/0); 235*81ad6265SDimitry Andric auto isec = make<ConcatInputSection>(section, data, align); 236*81ad6265SDimitry Andric section.subsections.push_back({0, isec}); 237*81ad6265SDimitry Andric return isec; 238e8d8bef9SDimitry Andric } 239e8d8bef9SDimitry Andric 240fe6060f1SDimitry Andric void CStringInputSection::splitIntoPieces() { 241fe6060f1SDimitry Andric size_t off = 0; 242fe6060f1SDimitry Andric StringRef s = toStringRef(data); 243fe6060f1SDimitry Andric while (!s.empty()) { 244fe6060f1SDimitry Andric size_t end = s.find(0); 245fe6060f1SDimitry Andric if (end == StringRef::npos) 246*81ad6265SDimitry Andric fatal(getLocation(off) + ": string is not null terminated"); 247fe6060f1SDimitry Andric size_t size = end + 1; 248fe6060f1SDimitry Andric uint32_t hash = config->dedupLiterals ? xxHash64(s.substr(0, size)) : 0; 249fe6060f1SDimitry Andric pieces.emplace_back(off, hash); 250fe6060f1SDimitry Andric s = s.substr(size); 251fe6060f1SDimitry Andric off += size; 252fe6060f1SDimitry Andric } 253fe6060f1SDimitry Andric } 254fe6060f1SDimitry Andric 255fe6060f1SDimitry Andric StringPiece &CStringInputSection::getStringPiece(uint64_t off) { 256fe6060f1SDimitry Andric if (off >= data.size()) 257fe6060f1SDimitry Andric fatal(toString(this) + ": offset is outside the section"); 258fe6060f1SDimitry Andric 259fe6060f1SDimitry Andric auto it = 260fe6060f1SDimitry Andric partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; }); 261fe6060f1SDimitry Andric return it[-1]; 262fe6060f1SDimitry Andric } 263fe6060f1SDimitry Andric 264fe6060f1SDimitry Andric const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const { 265fe6060f1SDimitry Andric return const_cast<CStringInputSection *>(this)->getStringPiece(off); 266fe6060f1SDimitry Andric } 267fe6060f1SDimitry Andric 268fe6060f1SDimitry Andric uint64_t CStringInputSection::getOffset(uint64_t off) const { 269fe6060f1SDimitry Andric const StringPiece &piece = getStringPiece(off); 270fe6060f1SDimitry Andric uint64_t addend = off - piece.inSecOff; 271fe6060f1SDimitry Andric return piece.outSecOff + addend; 272fe6060f1SDimitry Andric } 273fe6060f1SDimitry Andric 274*81ad6265SDimitry Andric WordLiteralInputSection::WordLiteralInputSection(const Section §ion, 275fe6060f1SDimitry Andric ArrayRef<uint8_t> data, 276*81ad6265SDimitry Andric uint32_t align) 277*81ad6265SDimitry Andric : InputSection(WordLiteralKind, section, data, align) { 278*81ad6265SDimitry Andric switch (sectionType(getFlags())) { 279fe6060f1SDimitry Andric case S_4BYTE_LITERALS: 280fe6060f1SDimitry Andric power2LiteralSize = 2; 281fe6060f1SDimitry Andric break; 282fe6060f1SDimitry Andric case S_8BYTE_LITERALS: 283fe6060f1SDimitry Andric power2LiteralSize = 3; 284fe6060f1SDimitry Andric break; 285fe6060f1SDimitry Andric case S_16BYTE_LITERALS: 286fe6060f1SDimitry Andric power2LiteralSize = 4; 287fe6060f1SDimitry Andric break; 288fe6060f1SDimitry Andric default: 289fe6060f1SDimitry Andric llvm_unreachable("invalid literal section type"); 290fe6060f1SDimitry Andric } 291fe6060f1SDimitry Andric 292fe6060f1SDimitry Andric live.resize(data.size() >> power2LiteralSize, !config->deadStrip); 293fe6060f1SDimitry Andric } 294fe6060f1SDimitry Andric 295fe6060f1SDimitry Andric uint64_t WordLiteralInputSection::getOffset(uint64_t off) const { 296fe6060f1SDimitry Andric auto *osec = cast<WordLiteralSection>(parent); 297349cc55cSDimitry Andric const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data()); 298fe6060f1SDimitry Andric switch (sectionType(getFlags())) { 299fe6060f1SDimitry Andric case S_4BYTE_LITERALS: 300349cc55cSDimitry Andric return osec->getLiteral4Offset(buf + (off & ~3LLU)) | (off & 3); 301fe6060f1SDimitry Andric case S_8BYTE_LITERALS: 302349cc55cSDimitry Andric return osec->getLiteral8Offset(buf + (off & ~7LLU)) | (off & 7); 303fe6060f1SDimitry Andric case S_16BYTE_LITERALS: 304349cc55cSDimitry Andric return osec->getLiteral16Offset(buf + (off & ~15LLU)) | (off & 15); 305fe6060f1SDimitry Andric default: 306fe6060f1SDimitry Andric llvm_unreachable("invalid literal section type"); 307fe6060f1SDimitry Andric } 308fe6060f1SDimitry Andric } 309fe6060f1SDimitry Andric 310fe6060f1SDimitry Andric bool macho::isCodeSection(const InputSection *isec) { 311fe6060f1SDimitry Andric uint32_t type = sectionType(isec->getFlags()); 312e8d8bef9SDimitry Andric if (type != S_REGULAR && type != S_COALESCED) 313e8d8bef9SDimitry Andric return false; 314e8d8bef9SDimitry Andric 315fe6060f1SDimitry Andric uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR; 316e8d8bef9SDimitry Andric if (attr == S_ATTR_PURE_INSTRUCTIONS) 317e8d8bef9SDimitry Andric return true; 318e8d8bef9SDimitry Andric 319fe6060f1SDimitry Andric if (isec->getSegName() == segment_names::text) 320fe6060f1SDimitry Andric return StringSwitch<bool>(isec->getName()) 321fe6060f1SDimitry Andric .Cases(section_names::textCoalNt, section_names::staticInit, true) 322e8d8bef9SDimitry Andric .Default(false); 323e8d8bef9SDimitry Andric 324e8d8bef9SDimitry Andric return false; 325e8d8bef9SDimitry Andric } 326e8d8bef9SDimitry Andric 327fe6060f1SDimitry Andric bool macho::isCfStringSection(const InputSection *isec) { 328fe6060f1SDimitry Andric return isec->getName() == section_names::cfString && 329fe6060f1SDimitry Andric isec->getSegName() == segment_names::data; 330fe6060f1SDimitry Andric } 331fe6060f1SDimitry Andric 332*81ad6265SDimitry Andric bool macho::isClassRefsSection(const InputSection *isec) { 333*81ad6265SDimitry Andric return isec->getName() == section_names::objcClassRefs && 334*81ad6265SDimitry Andric isec->getSegName() == segment_names::data; 335*81ad6265SDimitry Andric } 336*81ad6265SDimitry Andric 337*81ad6265SDimitry Andric bool macho::isEhFrameSection(const InputSection *isec) { 338*81ad6265SDimitry Andric return isec->getName() == section_names::ehFrame && 339*81ad6265SDimitry Andric isec->getSegName() == segment_names::text; 340*81ad6265SDimitry Andric } 341*81ad6265SDimitry Andric 342e8d8bef9SDimitry Andric std::string lld::toString(const InputSection *isec) { 343fe6060f1SDimitry Andric return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str(); 3445ffd83dbSDimitry Andric } 345