15ffd83dbSDimitry Andric //===- InputSection.cpp ---------------------------------------------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric 95ffd83dbSDimitry Andric #include "InputSection.h" 10fe6060f1SDimitry Andric #include "ConcatOutputSection.h" 11fe6060f1SDimitry Andric #include "Config.h" 12e8d8bef9SDimitry Andric #include "InputFiles.h" 135ffd83dbSDimitry Andric #include "OutputSegment.h" 145ffd83dbSDimitry Andric #include "Symbols.h" 15fe6060f1SDimitry Andric #include "SyntheticSections.h" 165ffd83dbSDimitry Andric #include "Target.h" 17fe6060f1SDimitry Andric #include "UnwindInfoSection.h" 18e8d8bef9SDimitry Andric #include "Writer.h" 195ffd83dbSDimitry Andric #include "lld/Common/Memory.h" 205ffd83dbSDimitry Andric #include "llvm/Support/Endian.h" 21fe6060f1SDimitry Andric #include "llvm/Support/xxhash.h" 225ffd83dbSDimitry Andric 235ffd83dbSDimitry Andric using namespace llvm; 245ffd83dbSDimitry Andric using namespace llvm::MachO; 255ffd83dbSDimitry Andric using namespace llvm::support; 265ffd83dbSDimitry Andric using namespace lld; 275ffd83dbSDimitry Andric using namespace lld::macho; 285ffd83dbSDimitry Andric 29*4824e7fdSDimitry Andric // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector 30*4824e7fdSDimitry Andric // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL), 31*4824e7fdSDimitry Andric // so account for that. 32*4824e7fdSDimitry Andric static_assert(sizeof(void *) != 8 || 33*4824e7fdSDimitry Andric sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 96, 34349cc55cSDimitry Andric "Try to minimize ConcatInputSection's size, we create many " 35349cc55cSDimitry Andric "instances of it"); 36349cc55cSDimitry Andric 37fe6060f1SDimitry Andric std::vector<ConcatInputSection *> macho::inputSections; 385ffd83dbSDimitry Andric 39e8d8bef9SDimitry Andric uint64_t InputSection::getFileSize() const { 40fe6060f1SDimitry Andric return isZeroFill(getFlags()) ? 0 : getSize(); 41e8d8bef9SDimitry Andric } 42e8d8bef9SDimitry Andric 43fe6060f1SDimitry Andric uint64_t InputSection::getVA(uint64_t off) const { 44fe6060f1SDimitry Andric return parent->addr + getOffset(off); 45fe6060f1SDimitry Andric } 465ffd83dbSDimitry Andric 47fe6060f1SDimitry Andric static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { 48fe6060f1SDimitry Andric const RelocAttrs &relocAttrs = target->getRelocAttrs(type); 49fe6060f1SDimitry Andric if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) 50fe6060f1SDimitry Andric return sym->resolveBranchVA(); 51fe6060f1SDimitry Andric if (relocAttrs.hasAttr(RelocAttrBits::GOT)) 52fe6060f1SDimitry Andric return sym->resolveGotVA(); 53fe6060f1SDimitry Andric if (relocAttrs.hasAttr(RelocAttrBits::TLV)) 54fe6060f1SDimitry Andric return sym->resolveTlvVA(); 55fe6060f1SDimitry Andric return sym->getVA(); 56fe6060f1SDimitry Andric } 57fe6060f1SDimitry Andric 58fe6060f1SDimitry Andric // ICF needs to hash any section that might potentially be duplicated so 59fe6060f1SDimitry Andric // that it can match on content rather than identity. 60fe6060f1SDimitry Andric bool ConcatInputSection::isHashableForICF() const { 61fe6060f1SDimitry Andric switch (sectionType(getFlags())) { 62fe6060f1SDimitry Andric case S_REGULAR: 63fe6060f1SDimitry Andric return true; 64fe6060f1SDimitry Andric case S_CSTRING_LITERALS: 65fe6060f1SDimitry Andric case S_4BYTE_LITERALS: 66fe6060f1SDimitry Andric case S_8BYTE_LITERALS: 67fe6060f1SDimitry Andric case S_16BYTE_LITERALS: 68fe6060f1SDimitry Andric case S_LITERAL_POINTERS: 69fe6060f1SDimitry Andric llvm_unreachable("found unexpected literal type in ConcatInputSection"); 70fe6060f1SDimitry Andric case S_ZEROFILL: 71fe6060f1SDimitry Andric case S_GB_ZEROFILL: 72fe6060f1SDimitry Andric case S_NON_LAZY_SYMBOL_POINTERS: 73fe6060f1SDimitry Andric case S_LAZY_SYMBOL_POINTERS: 74fe6060f1SDimitry Andric case S_SYMBOL_STUBS: 75fe6060f1SDimitry Andric case S_MOD_INIT_FUNC_POINTERS: 76fe6060f1SDimitry Andric case S_MOD_TERM_FUNC_POINTERS: 77fe6060f1SDimitry Andric case S_COALESCED: 78fe6060f1SDimitry Andric case S_INTERPOSING: 79fe6060f1SDimitry Andric case S_DTRACE_DOF: 80fe6060f1SDimitry Andric case S_LAZY_DYLIB_SYMBOL_POINTERS: 81fe6060f1SDimitry Andric case S_THREAD_LOCAL_REGULAR: 82fe6060f1SDimitry Andric case S_THREAD_LOCAL_ZEROFILL: 83fe6060f1SDimitry Andric case S_THREAD_LOCAL_VARIABLES: 84fe6060f1SDimitry Andric case S_THREAD_LOCAL_VARIABLE_POINTERS: 85fe6060f1SDimitry Andric case S_THREAD_LOCAL_INIT_FUNCTION_POINTERS: 86fe6060f1SDimitry Andric return false; 87fe6060f1SDimitry Andric default: 88fe6060f1SDimitry Andric llvm_unreachable("Section type"); 89fe6060f1SDimitry Andric } 90fe6060f1SDimitry Andric } 91fe6060f1SDimitry Andric 92fe6060f1SDimitry Andric void ConcatInputSection::hashForICF() { 93fe6060f1SDimitry Andric assert(data.data()); // zeroFill section data has nullptr with non-zero size 94fe6060f1SDimitry Andric assert(icfEqClass[0] == 0); // don't overwrite a unique ID! 95fe6060f1SDimitry Andric // Turn-on the top bit to guarantee that valid hashes have no collisions 96fe6060f1SDimitry Andric // with the small-integer unique IDs for ICF-ineligible sections 97fe6060f1SDimitry Andric icfEqClass[0] = xxHash64(data) | (1ull << 63); 98fe6060f1SDimitry Andric } 99fe6060f1SDimitry Andric 100fe6060f1SDimitry Andric void ConcatInputSection::foldIdentical(ConcatInputSection *copy) { 101fe6060f1SDimitry Andric align = std::max(align, copy->align); 102fe6060f1SDimitry Andric copy->live = false; 103fe6060f1SDimitry Andric copy->wasCoalesced = true; 104fe6060f1SDimitry Andric copy->replacement = this; 105349cc55cSDimitry Andric 106349cc55cSDimitry Andric // Merge the sorted vectors of symbols together. 107349cc55cSDimitry Andric auto it = symbols.begin(); 108349cc55cSDimitry Andric for (auto copyIt = copy->symbols.begin(); copyIt != copy->symbols.end();) { 109349cc55cSDimitry Andric if (it == symbols.end()) { 110349cc55cSDimitry Andric symbols.push_back(*copyIt++); 111349cc55cSDimitry Andric it = symbols.end(); 112349cc55cSDimitry Andric } else if ((*it)->value > (*copyIt)->value) { 113349cc55cSDimitry Andric std::swap(*it++, *copyIt); 114349cc55cSDimitry Andric } else { 115349cc55cSDimitry Andric ++it; 116349cc55cSDimitry Andric } 117349cc55cSDimitry Andric } 118349cc55cSDimitry Andric copy->symbols.clear(); 119349cc55cSDimitry Andric 120349cc55cSDimitry Andric // Remove duplicate compact unwind info for symbols at the same address. 121349cc55cSDimitry Andric if (symbols.empty()) 122349cc55cSDimitry Andric return; 123349cc55cSDimitry Andric it = symbols.begin(); 124349cc55cSDimitry Andric uint64_t v = (*it)->value; 125349cc55cSDimitry Andric for (++it; it != symbols.end(); ++it) { 126349cc55cSDimitry Andric Defined *d = *it; 127349cc55cSDimitry Andric if (d->value == v) 128349cc55cSDimitry Andric d->unwindEntry = nullptr; 129349cc55cSDimitry Andric else 130349cc55cSDimitry Andric v = d->value; 131349cc55cSDimitry Andric } 132fe6060f1SDimitry Andric } 133fe6060f1SDimitry Andric 134fe6060f1SDimitry Andric void ConcatInputSection::writeTo(uint8_t *buf) { 135fe6060f1SDimitry Andric assert(!shouldOmitFromOutput()); 136fe6060f1SDimitry Andric 1375ffd83dbSDimitry Andric if (getFileSize() == 0) 1385ffd83dbSDimitry Andric return; 1395ffd83dbSDimitry Andric 1405ffd83dbSDimitry Andric memcpy(buf, data.data(), data.size()); 1415ffd83dbSDimitry Andric 142fe6060f1SDimitry Andric for (size_t i = 0; i < relocs.size(); i++) { 143fe6060f1SDimitry Andric const Reloc &r = relocs[i]; 144fe6060f1SDimitry Andric uint8_t *loc = buf + r.offset; 145e8d8bef9SDimitry Andric uint64_t referentVA = 0; 146fe6060f1SDimitry Andric if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) { 147fe6060f1SDimitry Andric const Symbol *fromSym = r.referent.get<Symbol *>(); 148fe6060f1SDimitry Andric const Reloc &minuend = relocs[++i]; 149fe6060f1SDimitry Andric uint64_t minuendVA; 150fe6060f1SDimitry Andric if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>()) 151fe6060f1SDimitry Andric minuendVA = toSym->getVA() + minuend.addend; 152fe6060f1SDimitry Andric else { 153fe6060f1SDimitry Andric auto *referentIsec = minuend.referent.get<InputSection *>(); 154fe6060f1SDimitry Andric assert(!::shouldOmitFromOutput(referentIsec)); 155fe6060f1SDimitry Andric minuendVA = referentIsec->getVA(minuend.addend); 156fe6060f1SDimitry Andric } 157fe6060f1SDimitry Andric referentVA = minuendVA - fromSym->getVA(); 158fe6060f1SDimitry Andric } else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) { 159fe6060f1SDimitry Andric if (target->hasAttr(r.type, RelocAttrBits::LOAD) && 160fe6060f1SDimitry Andric !referentSym->isInGot()) 161fe6060f1SDimitry Andric target->relaxGotLoad(loc, r.type); 162fe6060f1SDimitry Andric referentVA = resolveSymbolVA(referentSym, r.type) + r.addend; 1635ffd83dbSDimitry Andric 164fe6060f1SDimitry Andric if (isThreadLocalVariables(getFlags())) { 165e8d8bef9SDimitry Andric // References from thread-local variable sections are treated as offsets 166e8d8bef9SDimitry Andric // relative to the start of the thread-local data memory area, which 167e8d8bef9SDimitry Andric // is initialized via copying all the TLV data sections (which are all 168e8d8bef9SDimitry Andric // contiguous). 169e8d8bef9SDimitry Andric if (isa<Defined>(referentSym)) 170e8d8bef9SDimitry Andric referentVA -= firstTLVDataSection->addr; 1715ffd83dbSDimitry Andric } 172e8d8bef9SDimitry Andric } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { 173fe6060f1SDimitry Andric assert(!::shouldOmitFromOutput(referentIsec)); 174fe6060f1SDimitry Andric referentVA = referentIsec->getVA(r.addend); 175e8d8bef9SDimitry Andric } 176fe6060f1SDimitry Andric target->relocateOne(loc, r, referentVA, getVA() + r.offset); 177e8d8bef9SDimitry Andric } 178e8d8bef9SDimitry Andric } 179e8d8bef9SDimitry Andric 180fe6060f1SDimitry Andric void CStringInputSection::splitIntoPieces() { 181fe6060f1SDimitry Andric size_t off = 0; 182fe6060f1SDimitry Andric StringRef s = toStringRef(data); 183fe6060f1SDimitry Andric while (!s.empty()) { 184fe6060f1SDimitry Andric size_t end = s.find(0); 185fe6060f1SDimitry Andric if (end == StringRef::npos) 186fe6060f1SDimitry Andric fatal(toString(this) + ": string is not null terminated"); 187fe6060f1SDimitry Andric size_t size = end + 1; 188fe6060f1SDimitry Andric uint32_t hash = config->dedupLiterals ? xxHash64(s.substr(0, size)) : 0; 189fe6060f1SDimitry Andric pieces.emplace_back(off, hash); 190fe6060f1SDimitry Andric s = s.substr(size); 191fe6060f1SDimitry Andric off += size; 192fe6060f1SDimitry Andric } 193fe6060f1SDimitry Andric } 194fe6060f1SDimitry Andric 195fe6060f1SDimitry Andric StringPiece &CStringInputSection::getStringPiece(uint64_t off) { 196fe6060f1SDimitry Andric if (off >= data.size()) 197fe6060f1SDimitry Andric fatal(toString(this) + ": offset is outside the section"); 198fe6060f1SDimitry Andric 199fe6060f1SDimitry Andric auto it = 200fe6060f1SDimitry Andric partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; }); 201fe6060f1SDimitry Andric return it[-1]; 202fe6060f1SDimitry Andric } 203fe6060f1SDimitry Andric 204fe6060f1SDimitry Andric const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const { 205fe6060f1SDimitry Andric return const_cast<CStringInputSection *>(this)->getStringPiece(off); 206fe6060f1SDimitry Andric } 207fe6060f1SDimitry Andric 208fe6060f1SDimitry Andric uint64_t CStringInputSection::getOffset(uint64_t off) const { 209fe6060f1SDimitry Andric const StringPiece &piece = getStringPiece(off); 210fe6060f1SDimitry Andric uint64_t addend = off - piece.inSecOff; 211fe6060f1SDimitry Andric return piece.outSecOff + addend; 212fe6060f1SDimitry Andric } 213fe6060f1SDimitry Andric 214fe6060f1SDimitry Andric WordLiteralInputSection::WordLiteralInputSection(StringRef segname, 215fe6060f1SDimitry Andric StringRef name, 216fe6060f1SDimitry Andric InputFile *file, 217fe6060f1SDimitry Andric ArrayRef<uint8_t> data, 218fe6060f1SDimitry Andric uint32_t align, uint32_t flags) 219fe6060f1SDimitry Andric : InputSection(WordLiteralKind, segname, name, file, data, align, flags) { 220fe6060f1SDimitry Andric switch (sectionType(flags)) { 221fe6060f1SDimitry Andric case S_4BYTE_LITERALS: 222fe6060f1SDimitry Andric power2LiteralSize = 2; 223fe6060f1SDimitry Andric break; 224fe6060f1SDimitry Andric case S_8BYTE_LITERALS: 225fe6060f1SDimitry Andric power2LiteralSize = 3; 226fe6060f1SDimitry Andric break; 227fe6060f1SDimitry Andric case S_16BYTE_LITERALS: 228fe6060f1SDimitry Andric power2LiteralSize = 4; 229fe6060f1SDimitry Andric break; 230fe6060f1SDimitry Andric default: 231fe6060f1SDimitry Andric llvm_unreachable("invalid literal section type"); 232fe6060f1SDimitry Andric } 233fe6060f1SDimitry Andric 234fe6060f1SDimitry Andric live.resize(data.size() >> power2LiteralSize, !config->deadStrip); 235fe6060f1SDimitry Andric } 236fe6060f1SDimitry Andric 237fe6060f1SDimitry Andric uint64_t WordLiteralInputSection::getOffset(uint64_t off) const { 238fe6060f1SDimitry Andric auto *osec = cast<WordLiteralSection>(parent); 239349cc55cSDimitry Andric const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data()); 240fe6060f1SDimitry Andric switch (sectionType(getFlags())) { 241fe6060f1SDimitry Andric case S_4BYTE_LITERALS: 242349cc55cSDimitry Andric return osec->getLiteral4Offset(buf + (off & ~3LLU)) | (off & 3); 243fe6060f1SDimitry Andric case S_8BYTE_LITERALS: 244349cc55cSDimitry Andric return osec->getLiteral8Offset(buf + (off & ~7LLU)) | (off & 7); 245fe6060f1SDimitry Andric case S_16BYTE_LITERALS: 246349cc55cSDimitry Andric return osec->getLiteral16Offset(buf + (off & ~15LLU)) | (off & 15); 247fe6060f1SDimitry Andric default: 248fe6060f1SDimitry Andric llvm_unreachable("invalid literal section type"); 249fe6060f1SDimitry Andric } 250fe6060f1SDimitry Andric } 251fe6060f1SDimitry Andric 252fe6060f1SDimitry Andric bool macho::isCodeSection(const InputSection *isec) { 253fe6060f1SDimitry Andric uint32_t type = sectionType(isec->getFlags()); 254e8d8bef9SDimitry Andric if (type != S_REGULAR && type != S_COALESCED) 255e8d8bef9SDimitry Andric return false; 256e8d8bef9SDimitry Andric 257fe6060f1SDimitry Andric uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR; 258e8d8bef9SDimitry Andric if (attr == S_ATTR_PURE_INSTRUCTIONS) 259e8d8bef9SDimitry Andric return true; 260e8d8bef9SDimitry Andric 261fe6060f1SDimitry Andric if (isec->getSegName() == segment_names::text) 262fe6060f1SDimitry Andric return StringSwitch<bool>(isec->getName()) 263fe6060f1SDimitry Andric .Cases(section_names::textCoalNt, section_names::staticInit, true) 264e8d8bef9SDimitry Andric .Default(false); 265e8d8bef9SDimitry Andric 266e8d8bef9SDimitry Andric return false; 267e8d8bef9SDimitry Andric } 268e8d8bef9SDimitry Andric 269fe6060f1SDimitry Andric bool macho::isCfStringSection(const InputSection *isec) { 270fe6060f1SDimitry Andric return isec->getName() == section_names::cfString && 271fe6060f1SDimitry Andric isec->getSegName() == segment_names::data; 272fe6060f1SDimitry Andric } 273fe6060f1SDimitry Andric 274e8d8bef9SDimitry Andric std::string lld::toString(const InputSection *isec) { 275fe6060f1SDimitry Andric return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str(); 2765ffd83dbSDimitry Andric } 277