1 //===- InputSection.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_INPUT_SECTION_H 10 #define LLD_ELF_INPUT_SECTION_H 11 12 #include "Config.h" 13 #include "Relocations.h" 14 #include "lld/Common/CommonLinkerContext.h" 15 #include "lld/Common/LLVM.h" 16 #include "lld/Common/Memory.h" 17 #include "llvm/ADT/CachedHashString.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/StringExtras.h" 20 #include "llvm/ADT/TinyPtrVector.h" 21 #include "llvm/Object/ELF.h" 22 #include "llvm/Support/Compiler.h" 23 24 namespace lld { 25 namespace elf { 26 27 class InputFile; 28 class Symbol; 29 30 class Defined; 31 struct Partition; 32 class SyntheticSection; 33 template <class ELFT> class ObjFile; 34 class OutputSection; 35 36 // Returned by InputSectionBase::relsOrRelas. At least two members are empty. 37 template <class ELFT> struct RelsOrRelas { 38 Relocs<typename ELFT::Rel> rels; 39 Relocs<typename ELFT::Rela> relas; 40 Relocs<typename ELFT::Crel> crels; 41 bool areRelocsRel() const { return rels.size(); } 42 bool areRelocsCrel() const { return crels.size(); } 43 }; 44 45 #define invokeOnRelocs(sec, f, ...) \ 46 { \ 47 const RelsOrRelas<ELFT> rs = (sec).template relsOrRelas<ELFT>(); \ 48 if (rs.areRelocsCrel()) \ 49 f(__VA_ARGS__, rs.crels); \ 50 else if (rs.areRelocsRel()) \ 51 f(__VA_ARGS__, rs.rels); \ 52 else \ 53 f(__VA_ARGS__, rs.relas); \ 54 } 55 56 // This is the base class of all sections that lld handles. Some are sections in 57 // input files, some are sections in the produced output file and some exist 58 // just as a convenience for implementing special ways of combining some 59 // sections. 60 class SectionBase { 61 public: 62 enum Kind : uint8_t { 63 Regular, 64 Synthetic, 65 Spill, 66 EHFrame, 67 Merge, 68 Output, 69 Class, 70 }; 71 72 Kind kind() const { return sectionKind; } 73 74 // The file which contains this section. For InputSectionBase, its dynamic 75 // type is usually ObjFile<ELFT>, but may be an InputFile of InternalKind 76 // (for a synthetic section). 77 InputFile *file; 78 79 StringRef name; 80 81 // The 1-indexed partition that this section is assigned to by the garbage 82 // collector, or 0 if this section is dead. Normally there is only one 83 // partition, so this will either be 0 or 1. 84 elf::Partition &getPartition(Ctx &) const; 85 86 // These corresponds to the fields in Elf_Shdr. 87 uint64_t flags; 88 uint32_t type; 89 uint32_t link; 90 uint32_t info; 91 uint32_t addralign; 92 uint32_t entsize; 93 94 Kind sectionKind; 95 uint8_t partition = 1; 96 97 // The next two bit fields are only used by InputSectionBase, but we 98 // put them here so the struct packs better. 99 100 Ctx &getCtx() const; 101 OutputSection *getOutputSection(); 102 const OutputSection *getOutputSection() const { 103 return const_cast<SectionBase *>(this)->getOutputSection(); 104 } 105 106 // Translate an offset in the input section to an offset in the output 107 // section. 108 uint64_t getOffset(uint64_t offset) const; 109 110 uint64_t getVA(uint64_t offset = 0) const; 111 112 bool isLive() const { return partition != 0; } 113 void markLive() { partition = 1; } 114 void markDead() { partition = 0; } 115 116 protected: 117 constexpr SectionBase(Kind sectionKind, InputFile *file, StringRef name, 118 uint32_t type, uint64_t flags, uint32_t link, 119 uint32_t info, uint32_t addralign, uint32_t entsize) 120 : file(file), name(name), flags(flags), type(type), link(link), 121 info(info), addralign(addralign), entsize(entsize), 122 sectionKind(sectionKind) {} 123 }; 124 125 struct SymbolAnchor { 126 uint64_t offset; 127 Defined *d; 128 bool end; // true for the anchor of st_value+st_size 129 }; 130 131 struct RelaxAux { 132 // This records symbol start and end offsets which will be adjusted according 133 // to the nearest relocDeltas element. 134 SmallVector<SymbolAnchor, 0> anchors; 135 // For relocations[i], the actual offset is 136 // r_offset - (i ? relocDeltas[i-1] : 0). 137 std::unique_ptr<uint32_t[]> relocDeltas; 138 // For relocations[i], the actual type is relocTypes[i]. 139 std::unique_ptr<RelType[]> relocTypes; 140 SmallVector<uint32_t, 0> writes; 141 }; 142 143 // This corresponds to a section of an input file. 144 class InputSectionBase : public SectionBase { 145 public: 146 struct ObjMsg { 147 const InputSectionBase *sec; 148 uint64_t offset; 149 }; 150 struct SrcMsg { 151 const InputSectionBase &sec; 152 const Symbol &sym; 153 uint64_t offset; 154 }; 155 156 template <class ELFT> 157 InputSectionBase(ObjFile<ELFT> &file, const typename ELFT::Shdr &header, 158 StringRef name, Kind sectionKind); 159 160 InputSectionBase(InputFile *file, StringRef name, uint32_t type, 161 uint64_t flags, uint32_t link, uint32_t info, 162 uint32_t addralign, uint32_t entsize, ArrayRef<uint8_t> data, 163 Kind sectionKind); 164 165 static bool classof(const SectionBase *s) { 166 return s->kind() != Output && s->kind() != Class; 167 } 168 169 LLVM_PREFERRED_TYPE(bool) 170 uint8_t bss : 1; 171 172 // Whether this section is SHT_CREL and has been decoded to RELA by 173 // relsOrRelas. 174 LLVM_PREFERRED_TYPE(bool) 175 uint8_t decodedCrel : 1; 176 177 // Set for sections that should not be folded by ICF. 178 LLVM_PREFERRED_TYPE(bool) 179 uint8_t keepUnique : 1; 180 181 // Whether the section needs to be padded with a NOP filler due to 182 // deleteFallThruJmpInsn. 183 LLVM_PREFERRED_TYPE(bool) 184 uint8_t nopFiller : 1; 185 186 mutable bool compressed = false; 187 188 // Input sections are part of an output section. Special sections 189 // like .eh_frame and merge sections are first combined into a 190 // synthetic section that is then added to an output section. In all 191 // cases this points one level up. 192 SectionBase *parent = nullptr; 193 194 // Section index of the relocation section if exists. 195 uint32_t relSecIdx = 0; 196 197 // Getter when the dynamic type is ObjFile<ELFT>. 198 template <class ELFT> ObjFile<ELFT> *getFile() const { 199 return cast<ObjFile<ELFT>>(file); 200 } 201 202 // Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to 203 // indicate the number of bytes which is not counted in the size. This should 204 // be reset to zero after uses. 205 uint32_t bytesDropped = 0; 206 207 void drop_back(unsigned num) { 208 assert(bytesDropped + num < 256); 209 bytesDropped += num; 210 } 211 212 void push_back(uint64_t num) { 213 assert(bytesDropped >= num); 214 bytesDropped -= num; 215 } 216 217 mutable const uint8_t *content_; 218 uint64_t size; 219 220 void trim() { 221 if (bytesDropped) { 222 size -= bytesDropped; 223 bytesDropped = 0; 224 } 225 } 226 227 ArrayRef<uint8_t> content() const { 228 return ArrayRef<uint8_t>(content_, size); 229 } 230 ArrayRef<uint8_t> contentMaybeDecompress() const { 231 if (compressed) 232 decompress(); 233 return content(); 234 } 235 236 // The next member in the section group if this section is in a group. This is 237 // used by --gc-sections. 238 InputSectionBase *nextInSectionGroup = nullptr; 239 240 template <class ELFT> 241 RelsOrRelas<ELFT> relsOrRelas(bool supportsCrel = true) const; 242 243 // InputSections that are dependent on us (reverse dependency for GC) 244 llvm::TinyPtrVector<InputSection *> dependentSections; 245 246 // Returns the size of this section (even if this is a common or BSS.) 247 size_t getSize() const; 248 249 InputSection *getLinkOrderDep() const; 250 251 // Get a symbol that encloses this offset from within the section. If type is 252 // not zero, return a symbol with the specified type. 253 Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const; 254 Defined *getEnclosingFunction(uint64_t offset) const { 255 return getEnclosingSymbol(offset, llvm::ELF::STT_FUNC); 256 } 257 258 // Returns a source location string. Used to construct an error message. 259 std::string getLocation(uint64_t offset) const; 260 ObjMsg getObjMsg(uint64_t offset) const { return {this, offset}; } 261 SrcMsg getSrcMsg(const Symbol &sym, uint64_t offset) const { 262 return {*this, sym, offset}; 263 } 264 265 // Each section knows how to relocate itself. These functions apply 266 // relocations, assuming that Buf points to this section's copy in 267 // the mmap'ed output buffer. 268 template <class ELFT> void relocate(Ctx &, uint8_t *buf, uint8_t *bufEnd); 269 uint64_t getRelocTargetVA(Ctx &, const Relocation &r, uint64_t p) const; 270 271 // The native ELF reloc data type is not very convenient to handle. 272 // So we convert ELF reloc records to our own records in Relocations.cpp. 273 // This vector contains such "cooked" relocations. 274 SmallVector<Relocation, 0> relocations; 275 276 void addReloc(const Relocation &r) { relocations.push_back(r); } 277 MutableArrayRef<Relocation> relocs() { return relocations; } 278 ArrayRef<Relocation> relocs() const { return relocations; } 279 280 union { 281 // These are modifiers to jump instructions that are necessary when basic 282 // block sections are enabled. Basic block sections creates opportunities 283 // to relax jump instructions at basic block boundaries after reordering the 284 // basic blocks. 285 JumpInstrMod *jumpInstrMod = nullptr; 286 287 // Auxiliary information for RISC-V and LoongArch linker relaxation. 288 // They do not use jumpInstrMod. 289 RelaxAux *relaxAux; 290 291 // The compressed content size when `compressed` is true. 292 size_t compressedSize; 293 }; 294 295 // A function compiled with -fsplit-stack calling a function 296 // compiled without -fsplit-stack needs its prologue adjusted. Find 297 // such functions and adjust their prologues. This is very similar 298 // to relocation. See https://gcc.gnu.org/wiki/SplitStacks for more 299 // information. 300 template <typename ELFT> 301 void adjustSplitStackFunctionPrologues(Ctx &, uint8_t *buf, uint8_t *end); 302 303 template <typename T> llvm::ArrayRef<T> getDataAs() const { 304 size_t s = content().size(); 305 assert(s % sizeof(T) == 0); 306 return llvm::ArrayRef<T>((const T *)content().data(), s / sizeof(T)); 307 } 308 309 protected: 310 template <typename ELFT> void parseCompressedHeader(Ctx &); 311 void decompress() const; 312 }; 313 314 // SectionPiece represents a piece of splittable section contents. 315 // We allocate a lot of these and binary search on them. This means that they 316 // have to be as compact as possible, which is why we don't store the size (can 317 // be found by looking at the next one). 318 struct SectionPiece { 319 SectionPiece() = default; 320 SectionPiece(size_t off, uint32_t hash, bool live) 321 : inputOff(off), live(live), hash(hash >> 1) {} 322 323 uint32_t inputOff; 324 LLVM_PREFERRED_TYPE(bool) 325 uint32_t live : 1; 326 uint32_t hash : 31; 327 uint64_t outputOff = 0; 328 }; 329 330 static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big"); 331 332 // This corresponds to a SHF_MERGE section of an input file. 333 class MergeInputSection : public InputSectionBase { 334 public: 335 template <class ELFT> 336 MergeInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header, 337 StringRef name); 338 MergeInputSection(Ctx &, StringRef name, uint32_t type, uint64_t flags, 339 uint64_t entsize, ArrayRef<uint8_t> data); 340 341 static bool classof(const SectionBase *s) { return s->kind() == Merge; } 342 void splitIntoPieces(); 343 344 // Translate an offset in the input section to an offset in the parent 345 // MergeSyntheticSection. 346 uint64_t getParentOffset(uint64_t offset) const; 347 348 // Splittable sections are handled as a sequence of data 349 // rather than a single large blob of data. 350 SmallVector<SectionPiece, 0> pieces; 351 352 // Returns I'th piece's data. This function is very hot when 353 // string merging is enabled, so we want to inline. 354 LLVM_ATTRIBUTE_ALWAYS_INLINE 355 llvm::CachedHashStringRef getData(size_t i) const { 356 size_t begin = pieces[i].inputOff; 357 size_t end = 358 (pieces.size() - 1 == i) ? content().size() : pieces[i + 1].inputOff; 359 return {toStringRef(content().slice(begin, end - begin)), pieces[i].hash}; 360 } 361 362 // Returns the SectionPiece at a given input section offset. 363 SectionPiece &getSectionPiece(uint64_t offset); 364 const SectionPiece &getSectionPiece(uint64_t offset) const { 365 return const_cast<MergeInputSection *>(this)->getSectionPiece(offset); 366 } 367 368 SyntheticSection *getParent() const { 369 return cast_or_null<SyntheticSection>(parent); 370 } 371 372 private: 373 void splitStrings(StringRef s, size_t size); 374 void splitNonStrings(ArrayRef<uint8_t> a, size_t size); 375 }; 376 377 struct EhSectionPiece { 378 EhSectionPiece(size_t off, InputSectionBase *sec, uint32_t size, 379 unsigned firstRelocation) 380 : inputOff(off), sec(sec), size(size), firstRelocation(firstRelocation) {} 381 382 ArrayRef<uint8_t> data() const { 383 return {sec->content().data() + this->inputOff, size}; 384 } 385 386 size_t inputOff; 387 ssize_t outputOff = -1; 388 InputSectionBase *sec; 389 uint32_t size; 390 unsigned firstRelocation; 391 }; 392 393 // This corresponds to a .eh_frame section of an input file. 394 class EhInputSection : public InputSectionBase { 395 public: 396 template <class ELFT> 397 EhInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header, 398 StringRef name); 399 static bool classof(const SectionBase *s) { return s->kind() == EHFrame; } 400 template <class ELFT> void split(); 401 template <class ELFT, class RelTy> void split(ArrayRef<RelTy> rels); 402 403 // Splittable sections are handled as a sequence of data 404 // rather than a single large blob of data. 405 SmallVector<EhSectionPiece, 0> cies, fdes; 406 407 SyntheticSection *getParent() const; 408 uint64_t getParentOffset(uint64_t offset) const; 409 }; 410 411 // This is a section that is added directly to an output section 412 // instead of needing special combination via a synthetic section. This 413 // includes all input sections with the exceptions of SHF_MERGE and 414 // .eh_frame. It also includes the synthetic sections themselves. 415 class InputSection : public InputSectionBase { 416 public: 417 InputSection(InputFile *f, StringRef name, uint32_t type, uint64_t flags, 418 uint32_t addralign, uint32_t entsize, ArrayRef<uint8_t> data, 419 Kind k = Regular); 420 template <class ELFT> 421 InputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header, 422 StringRef name); 423 424 static bool classof(const SectionBase *s) { 425 return s->kind() == SectionBase::Regular || 426 s->kind() == SectionBase::Synthetic || 427 s->kind() == SectionBase::Spill; 428 } 429 430 // Write this section to a mmap'ed file, assuming Buf is pointing to 431 // beginning of the output section. 432 template <class ELFT> void writeTo(Ctx &, uint8_t *buf); 433 434 OutputSection *getParent() const { 435 return reinterpret_cast<OutputSection *>(parent); 436 } 437 438 // This variable has two usages. Initially, it represents an index in the 439 // OutputSection's InputSection list, and is used when ordering SHF_LINK_ORDER 440 // sections. After assignAddresses is called, it represents the offset from 441 // the beginning of the output section this section was assigned to. 442 uint64_t outSecOff = 0; 443 444 InputSectionBase *getRelocatedSection() const; 445 446 template <class ELFT, class RelTy> 447 void relocateNonAlloc(Ctx &, uint8_t *buf, Relocs<RelTy> rels); 448 449 // Points to the canonical section. If ICF folds two sections, repl pointer of 450 // one section points to the other. 451 InputSection *repl = this; 452 453 // Used by ICF. 454 uint32_t eqClass[2] = {0, 0}; 455 456 // Called by ICF to merge two input sections. 457 void replace(InputSection *other); 458 459 static InputSection discarded; 460 461 private: 462 template <class ELFT, class RelTy> void copyRelocations(Ctx &, uint8_t *buf); 463 464 template <class ELFT, class RelTy, class RelIt> 465 void copyRelocations(Ctx &, uint8_t *buf, llvm::iterator_range<RelIt> rels); 466 467 template <class ELFT> void copyShtGroup(uint8_t *buf); 468 }; 469 470 // A marker for a potential spill location for another input section. This 471 // broadly acts as if it were the original section until address assignment. 472 // Then it is either replaced with the real input section or removed. 473 class PotentialSpillSection : public InputSection { 474 public: 475 // The containing input section description; used to quickly replace this stub 476 // with the actual section. 477 InputSectionDescription *isd; 478 479 // Next potential spill location for the same source input section. 480 PotentialSpillSection *next = nullptr; 481 482 PotentialSpillSection(const InputSectionBase &source, 483 InputSectionDescription &isd); 484 485 static bool classof(const SectionBase *sec) { 486 return sec->kind() == InputSectionBase::Spill; 487 } 488 }; 489 490 #ifndef _WIN32 491 static_assert(sizeof(InputSection) <= 152, "InputSection is too big"); 492 #endif 493 494 class SyntheticSection : public InputSection { 495 public: 496 Ctx &ctx; 497 SyntheticSection(Ctx &ctx, StringRef name, uint32_t type, uint64_t flags, 498 uint32_t addralign) 499 : InputSection(ctx.internalFile, name, type, flags, addralign, 500 /*entsize=*/0, {}, InputSectionBase::Synthetic), 501 ctx(ctx) {} 502 503 virtual ~SyntheticSection() = default; 504 virtual size_t getSize() const = 0; 505 virtual bool updateAllocSize(Ctx &) { return false; } 506 // If the section has the SHF_ALLOC flag and the size may be changed if 507 // thunks are added, update the section size. 508 virtual bool isNeeded() const { return true; } 509 virtual void finalizeContents() {} 510 virtual void writeTo(uint8_t *buf) = 0; 511 512 static bool classof(const SectionBase *sec) { 513 return sec->kind() == InputSectionBase::Synthetic; 514 } 515 }; 516 517 inline bool isStaticRelSecType(uint32_t type) { 518 return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_CREL || 519 type == llvm::ELF::SHT_REL; 520 } 521 522 inline bool isDebugSection(const InputSectionBase &sec) { 523 return (sec.flags & llvm::ELF::SHF_ALLOC) == 0 && 524 sec.name.starts_with(".debug"); 525 } 526 527 std::string toStr(elf::Ctx &, const elf::InputSectionBase *); 528 const ELFSyncStream &operator<<(const ELFSyncStream &, 529 const InputSectionBase *); 530 const ELFSyncStream &operator<<(const ELFSyncStream &, 531 InputSectionBase::ObjMsg &&); 532 const ELFSyncStream &operator<<(const ELFSyncStream &, 533 InputSectionBase::SrcMsg &&); 534 } // namespace elf 535 } // namespace lld 536 537 #endif 538