1 //===- InputSection.cpp ---------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputSection.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "InputFiles.h" 13 #include "OutputSegment.h" 14 #include "Sections.h" 15 #include "Symbols.h" 16 #include "SyntheticSections.h" 17 #include "Target.h" 18 #include "UnwindInfoSection.h" 19 #include "Writer.h" 20 21 #include "lld/Common/ErrorHandler.h" 22 #include "lld/Common/Memory.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/xxhash.h" 25 26 using namespace llvm; 27 using namespace llvm::MachO; 28 using namespace llvm::support; 29 using namespace lld; 30 using namespace lld::macho; 31 32 // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector 33 // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL), 34 // so account for that. 35 static_assert(sizeof(void *) != 8 || 36 sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 88, 37 "Try to minimize ConcatInputSection's size, we create many " 38 "instances of it"); 39 40 std::vector<ConcatInputSection *> macho::inputSections; 41 int macho::inputSectionsOrder = 0; 42 43 // Call this function to add a new InputSection and have it routed to the 44 // appropriate container. Depending on its type and current config, it will 45 // either be added to 'inputSections' vector or to a synthetic section. 46 void lld::macho::addInputSection(InputSection *inputSection) { 47 if (auto *isec = dyn_cast<ConcatInputSection>(inputSection)) { 48 if (isec->isCoalescedWeak()) 49 return; 50 if (config->emitRelativeMethodLists && 51 ObjCMethListSection::isMethodList(isec)) { 52 if (in.objcMethList->inputOrder == UnspecifiedInputOrder) 53 in.objcMethList->inputOrder = inputSectionsOrder++; 54 in.objcMethList->addInput(isec); 55 isec->parent = in.objcMethList; 56 return; 57 } 58 if (config->emitInitOffsets && 59 sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) { 60 in.initOffsets->addInput(isec); 61 return; 62 } 63 isec->outSecOff = inputSectionsOrder++; 64 auto *osec = ConcatOutputSection::getOrCreateForInput(isec); 65 isec->parent = osec; 66 inputSections.push_back(isec); 67 } else if (auto *isec = dyn_cast<CStringInputSection>(inputSection)) { 68 if (isec->getName() == section_names::objcMethname) { 69 if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder) 70 in.objcMethnameSection->inputOrder = inputSectionsOrder++; 71 in.objcMethnameSection->addInput(isec); 72 } else { 73 if (in.cStringSection->inputOrder == UnspecifiedInputOrder) 74 in.cStringSection->inputOrder = inputSectionsOrder++; 75 in.cStringSection->addInput(isec); 76 } 77 } else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) { 78 if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder) 79 in.wordLiteralSection->inputOrder = inputSectionsOrder++; 80 in.wordLiteralSection->addInput(isec); 81 } else { 82 llvm_unreachable("unexpected input section kind"); 83 } 84 85 assert(inputSectionsOrder <= UnspecifiedInputOrder); 86 } 87 88 uint64_t InputSection::getFileSize() const { 89 return isZeroFill(getFlags()) ? 0 : getSize(); 90 } 91 92 uint64_t InputSection::getVA(uint64_t off) const { 93 return parent->addr + getOffset(off); 94 } 95 96 static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { 97 const RelocAttrs &relocAttrs = target->getRelocAttrs(type); 98 if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) 99 return sym->resolveBranchVA(); 100 if (relocAttrs.hasAttr(RelocAttrBits::GOT)) 101 return sym->resolveGotVA(); 102 if (relocAttrs.hasAttr(RelocAttrBits::TLV)) 103 return sym->resolveTlvVA(); 104 return sym->getVA(); 105 } 106 107 const Defined *InputSection::getContainingSymbol(uint64_t off) const { 108 auto *nextSym = llvm::upper_bound( 109 symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; }); 110 if (nextSym == symbols.begin()) 111 return nullptr; 112 return *std::prev(nextSym); 113 } 114 115 std::string InputSection::getLocation(uint64_t off) const { 116 // First, try to find a symbol that's near the offset. Use it as a reference 117 // point. 118 if (auto *sym = getContainingSymbol(off)) 119 return (toString(getFile()) + ":(symbol " + toString(*sym) + "+0x" + 120 Twine::utohexstr(off - sym->value) + ")") 121 .str(); 122 123 // If that fails, use the section itself as a reference point. 124 for (const Subsection &subsec : section.subsections) { 125 if (subsec.isec == this) { 126 off += subsec.offset; 127 break; 128 } 129 } 130 131 return (toString(getFile()) + ":(" + getName() + "+0x" + 132 Twine::utohexstr(off) + ")") 133 .str(); 134 } 135 136 std::string InputSection::getSourceLocation(uint64_t off) const { 137 auto *obj = dyn_cast_or_null<ObjFile>(getFile()); 138 if (!obj) 139 return {}; 140 141 DWARFCache *dwarf = obj->getDwarf(); 142 if (!dwarf) 143 return std::string(); 144 145 for (const Subsection &subsec : section.subsections) { 146 if (subsec.isec == this) { 147 off += subsec.offset; 148 break; 149 } 150 } 151 152 auto createMsg = [&](StringRef path, unsigned line) { 153 std::string filename = sys::path::filename(path).str(); 154 std::string lineStr = (":" + Twine(line)).str(); 155 if (filename == path) 156 return filename + lineStr; 157 return (filename + lineStr + " (" + path + lineStr + ")").str(); 158 }; 159 160 // First, look up a function for a given offset. 161 if (std::optional<DILineInfo> li = dwarf->getDILineInfo( 162 section.addr + off, object::SectionedAddress::UndefSection)) 163 return createMsg(li->FileName, li->Line); 164 165 // If it failed, look up again as a variable. 166 if (const Defined *sym = getContainingSymbol(off)) { 167 // Symbols are generally prefixed with an underscore, which is not included 168 // in the debug information. 169 StringRef symName = sym->getName(); 170 symName.consume_front("_"); 171 172 if (std::optional<std::pair<std::string, unsigned>> fileLine = 173 dwarf->getVariableLoc(symName)) 174 return createMsg(fileLine->first, fileLine->second); 175 } 176 177 // Try to get the source file's name from the DWARF information. 178 if (obj->compileUnit) 179 return obj->sourceFile(); 180 181 return {}; 182 } 183 184 const Reloc *InputSection::getRelocAt(uint32_t off) const { 185 auto it = llvm::find_if( 186 relocs, [=](const macho::Reloc &r) { return r.offset == off; }); 187 if (it == relocs.end()) 188 return nullptr; 189 return &*it; 190 } 191 192 void ConcatInputSection::foldIdentical(ConcatInputSection *copy, 193 Symbol::ICFFoldKind foldKind) { 194 align = std::max(align, copy->align); 195 copy->live = false; 196 copy->wasCoalesced = true; 197 copy->replacement = this; 198 for (auto ©Sym : copy->symbols) 199 copySym->identicalCodeFoldingKind = foldKind; 200 201 symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end()); 202 copy->symbols.clear(); 203 204 // Remove duplicate compact unwind info for symbols at the same address. 205 if (symbols.empty()) 206 return; 207 for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) { 208 assert((*it)->value == 0); 209 (*it)->originalUnwindEntry = nullptr; 210 } 211 } 212 213 void ConcatInputSection::writeTo(uint8_t *buf) { 214 assert(!shouldOmitFromOutput()); 215 216 if (getFileSize() == 0) 217 return; 218 219 memcpy(buf, data.data(), data.size()); 220 221 for (size_t i = 0; i < relocs.size(); i++) { 222 const Reloc &r = relocs[i]; 223 uint8_t *loc = buf + r.offset; 224 uint64_t referentVA = 0; 225 226 const bool needsFixup = config->emitChainedFixups && 227 target->hasAttr(r.type, RelocAttrBits::UNSIGNED); 228 if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) { 229 const Symbol *fromSym = cast<Symbol *>(r.referent); 230 const Reloc &minuend = relocs[++i]; 231 uint64_t minuendVA; 232 if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>()) 233 minuendVA = toSym->getVA() + minuend.addend; 234 else { 235 auto *referentIsec = cast<InputSection *>(minuend.referent); 236 assert(!::shouldOmitFromOutput(referentIsec)); 237 minuendVA = referentIsec->getVA(minuend.addend); 238 } 239 referentVA = minuendVA - fromSym->getVA(); 240 } else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) { 241 if (target->hasAttr(r.type, RelocAttrBits::LOAD) && 242 !referentSym->isInGot()) 243 target->relaxGotLoad(loc, r.type); 244 // For dtrace symbols, do not handle them as normal undefined symbols 245 if (referentSym->getName().starts_with("___dtrace_")) { 246 // Change dtrace call site to pre-defined instructions 247 target->handleDtraceReloc(referentSym, r, loc); 248 continue; 249 } 250 referentVA = resolveSymbolVA(referentSym, r.type) + r.addend; 251 252 if (isThreadLocalVariables(getFlags()) && isa<Defined>(referentSym)) { 253 // References from thread-local variable sections are treated as offsets 254 // relative to the start of the thread-local data memory area, which 255 // is initialized via copying all the TLV data sections (which are all 256 // contiguous). 257 referentVA -= firstTLVDataSection->addr; 258 } else if (needsFixup) { 259 writeChainedFixup(loc, referentSym, r.addend); 260 continue; 261 } 262 } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { 263 assert(!::shouldOmitFromOutput(referentIsec)); 264 referentVA = referentIsec->getVA(r.addend); 265 266 if (needsFixup) { 267 writeChainedRebase(loc, referentVA); 268 continue; 269 } 270 } 271 target->relocateOne(loc, r, referentVA, getVA() + r.offset); 272 } 273 } 274 275 ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName, 276 StringRef sectName, 277 uint32_t flags, 278 ArrayRef<uint8_t> data, 279 uint32_t align) { 280 Section §ion = 281 *make<Section>(/*file=*/nullptr, segName, sectName, flags, /*addr=*/0); 282 auto isec = make<ConcatInputSection>(section, data, align); 283 // Since this is an explicitly created 'fake' input section, 284 // it should not be dead stripped. 285 isec->live = true; 286 section.subsections.push_back({0, isec}); 287 return isec; 288 } 289 290 void CStringInputSection::splitIntoPieces() { 291 size_t off = 0; 292 StringRef s = toStringRef(data); 293 while (!s.empty()) { 294 size_t end = s.find(0); 295 if (end == StringRef::npos) 296 fatal(getLocation(off) + ": string is not null terminated"); 297 uint32_t hash = deduplicateLiterals ? xxh3_64bits(s.take_front(end)) : 0; 298 pieces.emplace_back(off, hash); 299 size_t size = end + 1; // include null terminator 300 s = s.substr(size); 301 off += size; 302 } 303 } 304 305 StringPiece &CStringInputSection::getStringPiece(uint64_t off) { 306 if (off >= data.size()) 307 fatal(toString(this) + ": offset is outside the section"); 308 309 auto it = 310 partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; }); 311 return it[-1]; 312 } 313 314 const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const { 315 return const_cast<CStringInputSection *>(this)->getStringPiece(off); 316 } 317 318 size_t CStringInputSection::getStringPieceIndex(uint64_t off) const { 319 if (off >= data.size()) 320 fatal(toString(this) + ": offset is outside the section"); 321 322 auto it = 323 partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; }); 324 return std::distance(pieces.begin(), it) - 1; 325 } 326 327 uint64_t CStringInputSection::getOffset(uint64_t off) const { 328 const StringPiece &piece = getStringPiece(off); 329 uint64_t addend = off - piece.inSecOff; 330 return piece.outSecOff + addend; 331 } 332 333 WordLiteralInputSection::WordLiteralInputSection(const Section §ion, 334 ArrayRef<uint8_t> data, 335 uint32_t align) 336 : InputSection(WordLiteralKind, section, data, align) { 337 switch (sectionType(getFlags())) { 338 case S_4BYTE_LITERALS: 339 power2LiteralSize = 2; 340 break; 341 case S_8BYTE_LITERALS: 342 power2LiteralSize = 3; 343 break; 344 case S_16BYTE_LITERALS: 345 power2LiteralSize = 4; 346 break; 347 default: 348 llvm_unreachable("invalid literal section type"); 349 } 350 351 live.resize(data.size() >> power2LiteralSize, !config->deadStrip); 352 } 353 354 uint64_t WordLiteralInputSection::getOffset(uint64_t off) const { 355 auto *osec = cast<WordLiteralSection>(parent); 356 const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data()); 357 switch (sectionType(getFlags())) { 358 case S_4BYTE_LITERALS: 359 return osec->getLiteral4Offset(buf + (off & ~3LLU)) | (off & 3); 360 case S_8BYTE_LITERALS: 361 return osec->getLiteral8Offset(buf + (off & ~7LLU)) | (off & 7); 362 case S_16BYTE_LITERALS: 363 return osec->getLiteral16Offset(buf + (off & ~15LLU)) | (off & 15); 364 default: 365 llvm_unreachable("invalid literal section type"); 366 } 367 } 368 369 bool macho::isCodeSection(const InputSection *isec) { 370 return sections::isCodeSection(isec->getName(), isec->getSegName(), 371 isec->getFlags()); 372 } 373 374 bool macho::isCfStringSection(const InputSection *isec) { 375 return isec->getName() == section_names::cfString && 376 isec->getSegName() == segment_names::data; 377 } 378 379 bool macho::isClassRefsSection(const InputSection *isec) { 380 return isec->getName() == section_names::objcClassRefs && 381 isec->getSegName() == segment_names::data; 382 } 383 384 bool macho::isSelRefsSection(const InputSection *isec) { 385 return isec->getName() == section_names::objcSelrefs && 386 isec->getSegName() == segment_names::data; 387 } 388 389 bool macho::isEhFrameSection(const InputSection *isec) { 390 return isec->getName() == section_names::ehFrame && 391 isec->getSegName() == segment_names::text; 392 } 393 394 bool macho::isGccExceptTabSection(const InputSection *isec) { 395 return isec->getName() == section_names::gccExceptTab && 396 isec->getSegName() == segment_names::text; 397 } 398 399 std::string lld::toString(const InputSection *isec) { 400 return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str(); 401 } 402