1 //===- InputFiles.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "Config.h" 11 #include "DWARF.h" 12 #include "Driver.h" 13 #include "InputSection.h" 14 #include "LinkerScript.h" 15 #include "SymbolTable.h" 16 #include "Symbols.h" 17 #include "SyntheticSections.h" 18 #include "Target.h" 19 #include "lld/Common/CommonLinkerContext.h" 20 #include "lld/Common/DWARF.h" 21 #include "llvm/ADT/CachedHashString.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/LTO/LTO.h" 24 #include "llvm/Object/IRObjectFile.h" 25 #include "llvm/Support/ARMAttributeParser.h" 26 #include "llvm/Support/ARMBuildAttributes.h" 27 #include "llvm/Support/Endian.h" 28 #include "llvm/Support/FileSystem.h" 29 #include "llvm/Support/Path.h" 30 #include "llvm/Support/RISCVAttributeParser.h" 31 #include "llvm/Support/TarWriter.h" 32 #include "llvm/Support/raw_ostream.h" 33 34 using namespace llvm; 35 using namespace llvm::ELF; 36 using namespace llvm::object; 37 using namespace llvm::sys; 38 using namespace llvm::sys::fs; 39 using namespace llvm::support::endian; 40 using namespace lld; 41 using namespace lld::elf; 42 43 bool InputFile::isInGroup; 44 uint32_t InputFile::nextGroupId; 45 46 std::unique_ptr<TarWriter> elf::tar; 47 48 DenseMap<StringRef, StringRef> elf::gnuWarnings; 49 50 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". 51 std::string lld::toString(const InputFile *f) { 52 static std::mutex mu; 53 if (!f) 54 return "<internal>"; 55 56 { 57 std::lock_guard<std::mutex> lock(mu); 58 if (f->toStringCache.empty()) { 59 if (f->archiveName.empty()) 60 f->toStringCache = f->getName(); 61 else 62 (f->archiveName + "(" + f->getName() + ")").toVector(f->toStringCache); 63 } 64 } 65 return std::string(f->toStringCache); 66 } 67 68 // .gnu.warning.SYMBOL are treated as warning symbols for the given symbol 69 void lld::parseGNUWarning(StringRef name, ArrayRef<char> data, size_t size) { 70 if (!name.empty() && name.startswith(".gnu.warning.")) { 71 StringRef wsym = name.substr(13); 72 StringRef s(data.begin()); 73 StringRef wng(s.substr(0, size)); 74 symtab.insert(wsym)->gwarn = true; 75 gnuWarnings.insert({wsym, wng}); 76 } 77 } 78 79 static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) { 80 unsigned char size; 81 unsigned char endian; 82 std::tie(size, endian) = getElfArchType(mb.getBuffer()); 83 84 auto report = [&](StringRef msg) { 85 StringRef filename = mb.getBufferIdentifier(); 86 if (archiveName.empty()) 87 fatal(filename + ": " + msg); 88 else 89 fatal(archiveName + "(" + filename + "): " + msg); 90 }; 91 92 if (!mb.getBuffer().startswith(ElfMagic)) 93 report("not an ELF file"); 94 if (endian != ELFDATA2LSB && endian != ELFDATA2MSB) 95 report("corrupted ELF file: invalid data encoding"); 96 if (size != ELFCLASS32 && size != ELFCLASS64) 97 report("corrupted ELF file: invalid file class"); 98 99 size_t bufSize = mb.getBuffer().size(); 100 if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) || 101 (size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr))) 102 report("corrupted ELF file: file is too short"); 103 104 if (size == ELFCLASS32) 105 return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; 106 return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; 107 } 108 109 // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD 110 // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how 111 // the input objects have been compiled. 112 static void updateARMVFPArgs(const ARMAttributeParser &attributes, 113 const InputFile *f) { 114 std::optional<unsigned> attr = 115 attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); 116 if (!attr) 117 // If an ABI tag isn't present then it is implicitly given the value of 0 118 // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files, 119 // including some in glibc that don't use FP args (and should have value 3) 120 // don't have the attribute so we do not consider an implicit value of 0 121 // as a clash. 122 return; 123 124 unsigned vfpArgs = *attr; 125 ARMVFPArgKind arg; 126 switch (vfpArgs) { 127 case ARMBuildAttrs::BaseAAPCS: 128 arg = ARMVFPArgKind::Base; 129 break; 130 case ARMBuildAttrs::HardFPAAPCS: 131 arg = ARMVFPArgKind::VFP; 132 break; 133 case ARMBuildAttrs::ToolChainFPPCS: 134 // Tool chain specific convention that conforms to neither AAPCS variant. 135 arg = ARMVFPArgKind::ToolChain; 136 break; 137 case ARMBuildAttrs::CompatibleFPAAPCS: 138 // Object compatible with all conventions. 139 return; 140 default: 141 error(toString(f) + ": unknown Tag_ABI_VFP_args value: " + Twine(vfpArgs)); 142 return; 143 } 144 // Follow ld.bfd and error if there is a mix of calling conventions. 145 if (config->armVFPArgs != arg && config->armVFPArgs != ARMVFPArgKind::Default) 146 error(toString(f) + ": incompatible Tag_ABI_VFP_args"); 147 else 148 config->armVFPArgs = arg; 149 } 150 151 // The ARM support in lld makes some use of instructions that are not available 152 // on all ARM architectures. Namely: 153 // - Use of BLX instruction for interworking between ARM and Thumb state. 154 // - Use of the extended Thumb branch encoding in relocation. 155 // - Use of the MOVT/MOVW instructions in Thumb Thunks. 156 // The ARM Attributes section contains information about the architecture chosen 157 // at compile time. We follow the convention that if at least one input object 158 // is compiled with an architecture that supports these features then lld is 159 // permitted to use them. 160 static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { 161 std::optional<unsigned> attr = 162 attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); 163 if (!attr) 164 return; 165 auto arch = *attr; 166 switch (arch) { 167 case ARMBuildAttrs::Pre_v4: 168 case ARMBuildAttrs::v4: 169 case ARMBuildAttrs::v4T: 170 // Architectures prior to v5 do not support BLX instruction 171 break; 172 case ARMBuildAttrs::v5T: 173 case ARMBuildAttrs::v5TE: 174 case ARMBuildAttrs::v5TEJ: 175 case ARMBuildAttrs::v6: 176 case ARMBuildAttrs::v6KZ: 177 case ARMBuildAttrs::v6K: 178 config->armHasBlx = true; 179 // Architectures used in pre-Cortex processors do not support 180 // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception 181 // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do. 182 break; 183 default: 184 // All other Architectures have BLX and extended branch encoding 185 config->armHasBlx = true; 186 config->armJ1J2BranchEncoding = true; 187 if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M) 188 // All Architectures used in Cortex processors with the exception 189 // of v6-M and v6S-M have the MOVT and MOVW instructions. 190 config->armHasMovtMovw = true; 191 break; 192 } 193 } 194 195 InputFile::InputFile(Kind k, MemoryBufferRef m) 196 : mb(m), groupId(nextGroupId), fileKind(k) { 197 // All files within the same --{start,end}-group get the same group ID. 198 // Otherwise, a new file will get a new group ID. 199 if (!isInGroup) 200 ++nextGroupId; 201 } 202 203 std::optional<MemoryBufferRef> elf::readFile(StringRef path) { 204 llvm::TimeTraceScope timeScope("Load input files", path); 205 206 // The --chroot option changes our virtual root directory. 207 // This is useful when you are dealing with files created by --reproduce. 208 if (!config->chroot.empty() && path.startswith("/")) 209 path = saver().save(config->chroot + path); 210 211 log(path); 212 config->dependencyFiles.insert(llvm::CachedHashString(path)); 213 214 auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false, 215 /*RequiresNullTerminator=*/false); 216 if (auto ec = mbOrErr.getError()) { 217 error("cannot open " + path + ": " + ec.message()); 218 return std::nullopt; 219 } 220 221 MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef(); 222 ctx.memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership 223 224 if (tar) 225 tar->append(relativeToRoot(path), mbref.getBuffer()); 226 return mbref; 227 } 228 229 // All input object files must be for the same architecture 230 // (e.g. it does not make sense to link x86 object files with 231 // MIPS object files.) This function checks for that error. 232 static bool isCompatible(InputFile *file) { 233 if (!file->isElf() && !isa<BitcodeFile>(file)) 234 return true; 235 236 if (file->ekind == config->ekind && file->emachine == config->emachine) { 237 if (config->emachine != EM_MIPS) 238 return true; 239 if (isMipsN32Abi(file) == config->mipsN32Abi) 240 return true; 241 } 242 243 StringRef target = 244 !config->bfdname.empty() ? config->bfdname : config->emulation; 245 if (!target.empty()) { 246 error(toString(file) + " is incompatible with " + target); 247 return false; 248 } 249 250 InputFile *existing = nullptr; 251 if (!ctx.objectFiles.empty()) 252 existing = ctx.objectFiles[0]; 253 else if (!ctx.sharedFiles.empty()) 254 existing = ctx.sharedFiles[0]; 255 else if (!ctx.bitcodeFiles.empty()) 256 existing = ctx.bitcodeFiles[0]; 257 std::string with; 258 if (existing) 259 with = " with " + toString(existing); 260 error(toString(file) + " is incompatible" + with); 261 return false; 262 } 263 264 template <class ELFT> static void doParseFile(InputFile *file) { 265 if (!isCompatible(file)) 266 return; 267 268 // Binary file 269 if (auto *f = dyn_cast<BinaryFile>(file)) { 270 ctx.binaryFiles.push_back(f); 271 f->parse(); 272 return; 273 } 274 275 // Lazy object file 276 if (file->lazy) { 277 if (auto *f = dyn_cast<BitcodeFile>(file)) { 278 ctx.lazyBitcodeFiles.push_back(f); 279 f->parseLazy(); 280 } else { 281 cast<ObjFile<ELFT>>(file)->parseLazy(); 282 } 283 return; 284 } 285 286 if (config->trace) 287 message(toString(file)); 288 289 // .so file 290 if (auto *f = dyn_cast<SharedFile>(file)) { 291 f->parse<ELFT>(); 292 return; 293 } 294 295 // LLVM bitcode file 296 if (auto *f = dyn_cast<BitcodeFile>(file)) { 297 ctx.bitcodeFiles.push_back(f); 298 f->parse(); 299 return; 300 } 301 302 // Regular object file 303 ctx.objectFiles.push_back(cast<ELFFileBase>(file)); 304 cast<ObjFile<ELFT>>(file)->parse(); 305 } 306 307 // Add symbols in File to the symbol table. 308 void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); } 309 310 // Concatenates arguments to construct a string representing an error location. 311 static std::string createFileLineMsg(StringRef path, unsigned line) { 312 std::string filename = std::string(path::filename(path)); 313 std::string lineno = ":" + std::to_string(line); 314 if (filename == path) 315 return filename + lineno; 316 return filename + lineno + " (" + path.str() + lineno + ")"; 317 } 318 319 template <class ELFT> 320 static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym, 321 InputSectionBase &sec, uint64_t offset) { 322 // In DWARF, functions and variables are stored to different places. 323 // First, look up a function for a given offset. 324 if (std::optional<DILineInfo> info = file.getDILineInfo(&sec, offset)) 325 return createFileLineMsg(info->FileName, info->Line); 326 327 // If it failed, look up again as a variable. 328 if (std::optional<std::pair<std::string, unsigned>> fileLine = 329 file.getVariableLoc(sym.getName())) 330 return createFileLineMsg(fileLine->first, fileLine->second); 331 332 // File.sourceFile contains STT_FILE symbol, and that is a last resort. 333 return std::string(file.sourceFile); 334 } 335 336 std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec, 337 uint64_t offset) { 338 if (kind() != ObjKind) 339 return ""; 340 switch (ekind) { 341 default: 342 llvm_unreachable("Invalid kind"); 343 case ELF32LEKind: 344 return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), sym, sec, offset); 345 case ELF32BEKind: 346 return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), sym, sec, offset); 347 case ELF64LEKind: 348 return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), sym, sec, offset); 349 case ELF64BEKind: 350 return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), sym, sec, offset); 351 } 352 } 353 354 StringRef InputFile::getNameForScript() const { 355 if (archiveName.empty()) 356 return getName(); 357 358 if (nameForScriptCache.empty()) 359 nameForScriptCache = (archiveName + Twine(':') + getName()).str(); 360 361 return nameForScriptCache; 362 } 363 364 // An ELF object file may contain a `.deplibs` section. If it exists, the 365 // section contains a list of library specifiers such as `m` for libm. This 366 // function resolves a given name by finding the first matching library checking 367 // the various ways that a library can be specified to LLD. This ELF extension 368 // is a form of autolinking and is called `dependent libraries`. It is currently 369 // unique to LLVM and lld. 370 static void addDependentLibrary(StringRef specifier, const InputFile *f) { 371 if (!config->dependentLibraries) 372 return; 373 if (std::optional<std::string> s = searchLibraryBaseName(specifier)) 374 ctx.driver.addFile(saver().save(*s), /*withLOption=*/true); 375 else if (std::optional<std::string> s = findFromSearchPaths(specifier)) 376 ctx.driver.addFile(saver().save(*s), /*withLOption=*/true); 377 else if (fs::exists(specifier)) 378 ctx.driver.addFile(specifier, /*withLOption=*/false); 379 else 380 error(toString(f) + 381 ": unable to find library from dependent library specifier: " + 382 specifier); 383 } 384 385 // Record the membership of a section group so that in the garbage collection 386 // pass, section group members are kept or discarded as a unit. 387 template <class ELFT> 388 static void handleSectionGroup(ArrayRef<InputSectionBase *> sections, 389 ArrayRef<typename ELFT::Word> entries) { 390 bool hasAlloc = false; 391 for (uint32_t index : entries.slice(1)) { 392 if (index >= sections.size()) 393 return; 394 if (InputSectionBase *s = sections[index]) 395 if (s != &InputSection::discarded && s->flags & SHF_ALLOC) 396 hasAlloc = true; 397 } 398 399 // If any member has the SHF_ALLOC flag, the whole group is subject to garbage 400 // collection. See the comment in markLive(). This rule retains .debug_types 401 // and .rela.debug_types. 402 if (!hasAlloc) 403 return; 404 405 // Connect the members in a circular doubly-linked list via 406 // nextInSectionGroup. 407 InputSectionBase *head; 408 InputSectionBase *prev = nullptr; 409 for (uint32_t index : entries.slice(1)) { 410 InputSectionBase *s = sections[index]; 411 if (!s || s == &InputSection::discarded) 412 continue; 413 if (prev) 414 prev->nextInSectionGroup = s; 415 else 416 head = s; 417 prev = s; 418 } 419 if (prev) 420 prev->nextInSectionGroup = head; 421 } 422 423 template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() { 424 llvm::call_once(initDwarf, [this]() { 425 dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>( 426 std::make_unique<LLDDwarfObj<ELFT>>(this), "", 427 [&](Error err) { warn(getName() + ": " + toString(std::move(err))); }, 428 [&](Error warning) { 429 warn(getName() + ": " + toString(std::move(warning))); 430 })); 431 }); 432 433 return dwarf.get(); 434 } 435 436 // Returns the pair of file name and line number describing location of data 437 // object (variable, array, etc) definition. 438 template <class ELFT> 439 std::optional<std::pair<std::string, unsigned>> 440 ObjFile<ELFT>::getVariableLoc(StringRef name) { 441 return getDwarf()->getVariableLoc(name); 442 } 443 444 // Returns source line information for a given offset 445 // using DWARF debug info. 446 template <class ELFT> 447 std::optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s, 448 uint64_t offset) { 449 // Detect SectionIndex for specified section. 450 uint64_t sectionIndex = object::SectionedAddress::UndefSection; 451 ArrayRef<InputSectionBase *> sections = s->file->getSections(); 452 for (uint64_t curIndex = 0; curIndex < sections.size(); ++curIndex) { 453 if (s == sections[curIndex]) { 454 sectionIndex = curIndex; 455 break; 456 } 457 } 458 459 return getDwarf()->getDILineInfo(offset, sectionIndex); 460 } 461 462 ELFFileBase::ELFFileBase(Kind k, ELFKind ekind, MemoryBufferRef mb) 463 : InputFile(k, mb) { 464 this->ekind = ekind; 465 } 466 467 template <typename Elf_Shdr> 468 static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) { 469 for (const Elf_Shdr &sec : sections) 470 if (sec.sh_type == type) 471 return &sec; 472 return nullptr; 473 } 474 475 void ELFFileBase::init() { 476 switch (ekind) { 477 case ELF32LEKind: 478 init<ELF32LE>(fileKind); 479 break; 480 case ELF32BEKind: 481 init<ELF32BE>(fileKind); 482 break; 483 case ELF64LEKind: 484 init<ELF64LE>(fileKind); 485 break; 486 case ELF64BEKind: 487 init<ELF64BE>(fileKind); 488 break; 489 default: 490 llvm_unreachable("getELFKind"); 491 } 492 } 493 494 template <class ELFT> void ELFFileBase::init(InputFile::Kind k) { 495 using Elf_Shdr = typename ELFT::Shdr; 496 using Elf_Sym = typename ELFT::Sym; 497 498 // Initialize trivial attributes. 499 const ELFFile<ELFT> &obj = getObj<ELFT>(); 500 emachine = obj.getHeader().e_machine; 501 osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI]; 502 abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; 503 504 ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this); 505 elfShdrs = sections.data(); 506 numELFShdrs = sections.size(); 507 508 // Find a symbol table. 509 const Elf_Shdr *symtabSec = 510 findSection(sections, k == SharedKind ? SHT_DYNSYM : SHT_SYMTAB); 511 512 if (!symtabSec) 513 return; 514 515 // Initialize members corresponding to a symbol table. 516 firstGlobal = symtabSec->sh_info; 517 518 ArrayRef<Elf_Sym> eSyms = CHECK(obj.symbols(symtabSec), this); 519 if (firstGlobal == 0 || firstGlobal > eSyms.size()) 520 fatal(toString(this) + ": invalid sh_info in symbol table"); 521 522 elfSyms = reinterpret_cast<const void *>(eSyms.data()); 523 numELFSyms = uint32_t(eSyms.size()); 524 stringTable = CHECK(obj.getStringTableForSymtab(*symtabSec, sections), this); 525 } 526 527 template <class ELFT> 528 uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const { 529 return CHECK( 530 this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable), 531 this); 532 } 533 534 template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) { 535 object::ELFFile<ELFT> obj = this->getObj(); 536 // Read a section table. justSymbols is usually false. 537 if (this->justSymbols) { 538 initializeJustSymbols(); 539 initializeSymbols(obj); 540 return; 541 } 542 543 // Handle dependent libraries and selection of section groups as these are not 544 // done in parallel. 545 ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); 546 StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); 547 uint64_t size = objSections.size(); 548 sections.resize(size); 549 for (size_t i = 0; i != size; ++i) { 550 const Elf_Shdr &sec = objSections[i]; 551 if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !config->relocatable) { 552 StringRef name = check(obj.getSectionName(sec, shstrtab)); 553 ArrayRef<char> data = CHECK( 554 this->getObj().template getSectionContentsAsArray<char>(sec), this); 555 if (!data.empty() && data.back() != '\0') { 556 error( 557 toString(this) + 558 ": corrupted dependent libraries section (unterminated string): " + 559 name); 560 } else { 561 for (const char *d = data.begin(), *e = data.end(); d < e;) { 562 StringRef s(d); 563 addDependentLibrary(s, this); 564 d += s.size() + 1; 565 } 566 } 567 this->sections[i] = &InputSection::discarded; 568 continue; 569 } 570 571 if (sec.sh_type == SHT_ARM_ATTRIBUTES && config->emachine == EM_ARM) { 572 ARMAttributeParser attributes; 573 ArrayRef<uint8_t> contents = 574 check(this->getObj().getSectionContents(sec)); 575 StringRef name = check(obj.getSectionName(sec, shstrtab)); 576 this->sections[i] = &InputSection::discarded; 577 if (Error e = 578 attributes.parse(contents, ekind == ELF32LEKind ? support::little 579 : support::big)) { 580 InputSection isec(*this, sec, name); 581 warn(toString(&isec) + ": " + llvm::toString(std::move(e))); 582 } else { 583 updateSupportedARMFeatures(attributes); 584 updateARMVFPArgs(attributes, this); 585 586 // FIXME: Retain the first attribute section we see. The eglibc ARM 587 // dynamic loaders require the presence of an attribute section for 588 // dlopen to work. In a full implementation we would merge all attribute 589 // sections. 590 if (in.attributes == nullptr) { 591 in.attributes = std::make_unique<InputSection>(*this, sec, name); 592 this->sections[i] = in.attributes.get(); 593 } 594 } 595 } 596 597 if (sec.sh_type != SHT_GROUP) 598 continue; 599 StringRef signature = getShtGroupSignature(objSections, sec); 600 ArrayRef<Elf_Word> entries = 601 CHECK(obj.template getSectionContentsAsArray<Elf_Word>(sec), this); 602 if (entries.empty()) 603 fatal(toString(this) + ": empty SHT_GROUP"); 604 605 Elf_Word flag = entries[0]; 606 if (flag && flag != GRP_COMDAT) 607 fatal(toString(this) + ": unsupported SHT_GROUP format"); 608 609 bool keepGroup = 610 (flag & GRP_COMDAT) == 0 || ignoreComdats || 611 symtab.comdatGroups.try_emplace(CachedHashStringRef(signature), this) 612 .second; 613 if (keepGroup) { 614 if (config->relocatable) 615 this->sections[i] = createInputSection( 616 i, sec, check(obj.getSectionName(sec, shstrtab))); 617 continue; 618 } 619 620 // Otherwise, discard group members. 621 for (uint32_t secIndex : entries.slice(1)) { 622 if (secIndex >= size) 623 fatal(toString(this) + 624 ": invalid section index in group: " + Twine(secIndex)); 625 this->sections[secIndex] = &InputSection::discarded; 626 } 627 } 628 629 // Read a symbol table. 630 initializeSymbols(obj); 631 } 632 633 // Sections with SHT_GROUP and comdat bits define comdat section groups. 634 // They are identified and deduplicated by group name. This function 635 // returns a group name. 636 template <class ELFT> 637 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections, 638 const Elf_Shdr &sec) { 639 typename ELFT::SymRange symbols = this->getELFSyms<ELFT>(); 640 if (sec.sh_info >= symbols.size()) 641 fatal(toString(this) + ": invalid symbol index"); 642 const typename ELFT::Sym &sym = symbols[sec.sh_info]; 643 return CHECK(sym.getName(this->stringTable), this); 644 } 645 646 template <class ELFT> 647 bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) { 648 // On a regular link we don't merge sections if -O0 (default is -O1). This 649 // sometimes makes the linker significantly faster, although the output will 650 // be bigger. 651 // 652 // Doing the same for -r would create a problem as it would combine sections 653 // with different sh_entsize. One option would be to just copy every SHF_MERGE 654 // section as is to the output. While this would produce a valid ELF file with 655 // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when 656 // they see two .debug_str. We could have separate logic for combining 657 // SHF_MERGE sections based both on their name and sh_entsize, but that seems 658 // to be more trouble than it is worth. Instead, we just use the regular (-O1) 659 // logic for -r. 660 if (config->optimize == 0 && !config->relocatable) 661 return false; 662 663 // A mergeable section with size 0 is useless because they don't have 664 // any data to merge. A mergeable string section with size 0 can be 665 // argued as invalid because it doesn't end with a null character. 666 // We'll avoid a mess by handling them as if they were non-mergeable. 667 if (sec.sh_size == 0) 668 return false; 669 670 // Check for sh_entsize. The ELF spec is not clear about the zero 671 // sh_entsize. It says that "the member [sh_entsize] contains 0 if 672 // the section does not hold a table of fixed-size entries". We know 673 // that Rust 1.13 produces a string mergeable section with a zero 674 // sh_entsize. Here we just accept it rather than being picky about it. 675 uint64_t entSize = sec.sh_entsize; 676 if (entSize == 0) 677 return false; 678 if (sec.sh_size % entSize) 679 fatal(toString(this) + ":(" + name + "): SHF_MERGE section size (" + 680 Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" + 681 Twine(entSize) + ")"); 682 683 if (sec.sh_flags & SHF_WRITE) 684 fatal(toString(this) + ":(" + name + 685 "): writable SHF_MERGE section is not supported"); 686 687 return true; 688 } 689 690 // This is for --just-symbols. 691 // 692 // --just-symbols is a very minor feature that allows you to link your 693 // output against other existing program, so that if you load both your 694 // program and the other program into memory, your output can refer the 695 // other program's symbols. 696 // 697 // When the option is given, we link "just symbols". The section table is 698 // initialized with null pointers. 699 template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() { 700 sections.resize(numELFShdrs); 701 } 702 703 template <class ELFT> 704 void ObjFile<ELFT>::initializeSections(bool ignoreComdats, 705 const llvm::object::ELFFile<ELFT> &obj) { 706 ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); 707 StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); 708 uint64_t size = objSections.size(); 709 SmallVector<ArrayRef<Elf_Word>, 0> selectedGroups; 710 for (size_t i = 0; i != size; ++i) { 711 if (this->sections[i] == &InputSection::discarded) 712 continue; 713 const Elf_Shdr &sec = objSections[i]; 714 715 // SHF_EXCLUDE'ed sections are discarded by the linker. However, 716 // if -r is given, we'll let the final link discard such sections. 717 // This is compatible with GNU. 718 if ((sec.sh_flags & SHF_EXCLUDE) && !config->relocatable) { 719 if (sec.sh_type == SHT_LLVM_CALL_GRAPH_PROFILE) 720 cgProfileSectionIndex = i; 721 if (sec.sh_type == SHT_LLVM_ADDRSIG) { 722 // We ignore the address-significance table if we know that the object 723 // file was created by objcopy or ld -r. This is because these tools 724 // will reorder the symbols in the symbol table, invalidating the data 725 // in the address-significance table, which refers to symbols by index. 726 if (sec.sh_link != 0) 727 this->addrsigSec = &sec; 728 else if (config->icf == ICFLevel::Safe) 729 warn(toString(this) + 730 ": --icf=safe conservatively ignores " 731 "SHT_LLVM_ADDRSIG [index " + 732 Twine(i) + 733 "] with sh_link=0 " 734 "(likely created using objcopy or ld -r)"); 735 } 736 this->sections[i] = &InputSection::discarded; 737 continue; 738 } 739 740 switch (sec.sh_type) { 741 case SHT_GROUP: { 742 if (!config->relocatable) 743 sections[i] = &InputSection::discarded; 744 StringRef signature = 745 cantFail(this->getELFSyms<ELFT>()[sec.sh_info].getName(stringTable)); 746 ArrayRef<Elf_Word> entries = 747 cantFail(obj.template getSectionContentsAsArray<Elf_Word>(sec)); 748 if ((entries[0] & GRP_COMDAT) == 0 || ignoreComdats || 749 symtab.comdatGroups.find(CachedHashStringRef(signature))->second == 750 this) 751 selectedGroups.push_back(entries); 752 break; 753 } 754 case SHT_SYMTAB_SHNDX: 755 shndxTable = CHECK(obj.getSHNDXTable(sec, objSections), this); 756 break; 757 case SHT_SYMTAB: 758 case SHT_STRTAB: 759 case SHT_REL: 760 case SHT_RELA: 761 case SHT_NULL: 762 break; 763 case SHT_PROGBITS: { 764 this->sections[i] = createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); 765 StringRef name = check(obj.getSectionName(sec, shstrtab)); 766 ArrayRef<char> data = 767 CHECK(obj.template getSectionContentsAsArray<char>(sec), this); 768 parseGNUWarning(name, data, sec.sh_size); 769 } 770 break; 771 case SHT_LLVM_SYMPART: 772 ctx.hasSympart.store(true, std::memory_order_relaxed); 773 [[fallthrough]]; 774 default: 775 this->sections[i] = 776 createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); 777 } 778 } 779 780 // We have a second loop. It is used to: 781 // 1) handle SHF_LINK_ORDER sections. 782 // 2) create SHT_REL[A] sections. In some cases the section header index of a 783 // relocation section may be smaller than that of the relocated section. In 784 // such cases, the relocation section would attempt to reference a target 785 // section that has not yet been created. For simplicity, delay creation of 786 // relocation sections until now. 787 for (size_t i = 0; i != size; ++i) { 788 if (this->sections[i] == &InputSection::discarded) 789 continue; 790 const Elf_Shdr &sec = objSections[i]; 791 792 if (sec.sh_type == SHT_REL || sec.sh_type == SHT_RELA) { 793 // Find a relocation target section and associate this section with that. 794 // Target may have been discarded if it is in a different section group 795 // and the group is discarded, even though it's a violation of the spec. 796 // We handle that situation gracefully by discarding dangling relocation 797 // sections. 798 const uint32_t info = sec.sh_info; 799 InputSectionBase *s = getRelocTarget(i, sec, info); 800 if (!s) 801 continue; 802 803 // ELF spec allows mergeable sections with relocations, but they are rare, 804 // and it is in practice hard to merge such sections by contents, because 805 // applying relocations at end of linking changes section contents. So, we 806 // simply handle such sections as non-mergeable ones. Degrading like this 807 // is acceptable because section merging is optional. 808 if (auto *ms = dyn_cast<MergeInputSection>(s)) { 809 s = makeThreadLocal<InputSection>( 810 ms->file, ms->flags, ms->type, ms->addralign, 811 ms->contentMaybeDecompress(), ms->name); 812 sections[info] = s; 813 } 814 815 if (s->relSecIdx != 0) 816 error( 817 toString(s) + 818 ": multiple relocation sections to one section are not supported"); 819 s->relSecIdx = i; 820 821 // Relocation sections are usually removed from the output, so return 822 // `nullptr` for the normal case. However, if -r or --emit-relocs is 823 // specified, we need to copy them to the output. (Some post link analysis 824 // tools specify --emit-relocs to obtain the information.) 825 if (config->copyRelocs) { 826 auto *isec = makeThreadLocal<InputSection>( 827 *this, sec, check(obj.getSectionName(sec, shstrtab))); 828 // If the relocated section is discarded (due to /DISCARD/ or 829 // --gc-sections), the relocation section should be discarded as well. 830 s->dependentSections.push_back(isec); 831 sections[i] = isec; 832 } 833 continue; 834 } 835 836 // A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have 837 // the flag. 838 if (!sec.sh_link || !(sec.sh_flags & SHF_LINK_ORDER)) 839 continue; 840 841 InputSectionBase *linkSec = nullptr; 842 if (sec.sh_link < size) 843 linkSec = this->sections[sec.sh_link]; 844 if (!linkSec) 845 fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link)); 846 847 // A SHF_LINK_ORDER section is discarded if its linked-to section is 848 // discarded. 849 InputSection *isec = cast<InputSection>(this->sections[i]); 850 linkSec->dependentSections.push_back(isec); 851 if (!isa<InputSection>(linkSec)) 852 error("a section " + isec->name + 853 " with SHF_LINK_ORDER should not refer a non-regular section: " + 854 toString(linkSec)); 855 } 856 857 for (ArrayRef<Elf_Word> entries : selectedGroups) 858 handleSectionGroup<ELFT>(this->sections, entries); 859 } 860 861 // If a source file is compiled with x86 hardware-assisted call flow control 862 // enabled, the generated object file contains feature flags indicating that 863 // fact. This function reads the feature flags and returns it. 864 // 865 // Essentially we want to read a single 32-bit value in this function, but this 866 // function is rather complicated because the value is buried deep inside a 867 // .note.gnu.property section. 868 // 869 // The section consists of one or more NOTE records. Each NOTE record consists 870 // of zero or more type-length-value fields. We want to find a field of a 871 // certain type. It seems a bit too much to just store a 32-bit value, perhaps 872 // the ABI is unnecessarily complicated. 873 template <class ELFT> static uint32_t readAndFeatures(const InputSection &sec) { 874 using Elf_Nhdr = typename ELFT::Nhdr; 875 using Elf_Note = typename ELFT::Note; 876 877 uint32_t featuresSet = 0; 878 ArrayRef<uint8_t> data = sec.content(); 879 auto reportFatal = [&](const uint8_t *place, const char *msg) { 880 fatal(toString(sec.file) + ":(" + sec.name + "+0x" + 881 Twine::utohexstr(place - sec.content().data()) + "): " + msg); 882 }; 883 while (!data.empty()) { 884 // Read one NOTE record. 885 auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data()); 886 if (data.size() < sizeof(Elf_Nhdr) || data.size() < nhdr->getSize()) 887 reportFatal(data.data(), "data is too short"); 888 889 Elf_Note note(*nhdr); 890 if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") { 891 data = data.slice(nhdr->getSize()); 892 continue; 893 } 894 895 uint32_t featureAndType = config->emachine == EM_AARCH64 896 ? GNU_PROPERTY_AARCH64_FEATURE_1_AND 897 : GNU_PROPERTY_X86_FEATURE_1_AND; 898 899 // Read a body of a NOTE record, which consists of type-length-value fields. 900 ArrayRef<uint8_t> desc = note.getDesc(); 901 while (!desc.empty()) { 902 const uint8_t *place = desc.data(); 903 if (desc.size() < 8) 904 reportFatal(place, "program property is too short"); 905 uint32_t type = read32<ELFT::TargetEndianness>(desc.data()); 906 uint32_t size = read32<ELFT::TargetEndianness>(desc.data() + 4); 907 desc = desc.slice(8); 908 if (desc.size() < size) 909 reportFatal(place, "program property is too short"); 910 911 if (type == featureAndType) { 912 // We found a FEATURE_1_AND field. There may be more than one of these 913 // in a .note.gnu.property section, for a relocatable object we 914 // accumulate the bits set. 915 if (size < 4) 916 reportFatal(place, "FEATURE_1_AND entry is too short"); 917 featuresSet |= read32<ELFT::TargetEndianness>(desc.data()); 918 } 919 920 // Padding is present in the note descriptor, if necessary. 921 desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size)); 922 } 923 924 // Go to next NOTE record to look for more FEATURE_1_AND descriptions. 925 data = data.slice(nhdr->getSize()); 926 } 927 928 return featuresSet; 929 } 930 931 template <class ELFT> 932 InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx, 933 const Elf_Shdr &sec, 934 uint32_t info) { 935 if (info < this->sections.size()) { 936 InputSectionBase *target = this->sections[info]; 937 938 // Strictly speaking, a relocation section must be included in the 939 // group of the section it relocates. However, LLVM 3.3 and earlier 940 // would fail to do so, so we gracefully handle that case. 941 if (target == &InputSection::discarded) 942 return nullptr; 943 944 if (target != nullptr) 945 return target; 946 } 947 948 error(toString(this) + Twine(": relocation section (index ") + Twine(idx) + 949 ") has invalid sh_info (" + Twine(info) + ")"); 950 return nullptr; 951 } 952 953 // The function may be called concurrently for different input files. For 954 // allocation, prefer makeThreadLocal which does not require holding a lock. 955 template <class ELFT> 956 InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx, 957 const Elf_Shdr &sec, 958 StringRef name) { 959 if (name.startswith(".n")) { 960 // The GNU linker uses .note.GNU-stack section as a marker indicating 961 // that the code in the object file does not expect that the stack is 962 // executable (in terms of NX bit). If all input files have the marker, 963 // the GNU linker adds a PT_GNU_STACK segment to tells the loader to 964 // make the stack non-executable. Most object files have this section as 965 // of 2017. 966 // 967 // But making the stack non-executable is a norm today for security 968 // reasons. Failure to do so may result in a serious security issue. 969 // Therefore, we make LLD always add PT_GNU_STACK unless it is 970 // explicitly told to do otherwise (by -z execstack). Because the stack 971 // executable-ness is controlled solely by command line options, 972 // .note.GNU-stack sections are simply ignored. 973 if (name == ".note.GNU-stack") 974 return &InputSection::discarded; 975 976 // Object files that use processor features such as Intel Control-Flow 977 // Enforcement (CET) or AArch64 Branch Target Identification BTI, use a 978 // .note.gnu.property section containing a bitfield of feature bits like the 979 // GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag. 980 // 981 // Since we merge bitmaps from multiple object files to create a new 982 // .note.gnu.property containing a single AND'ed bitmap, we discard an input 983 // file's .note.gnu.property section. 984 if (name == ".note.gnu.property") { 985 this->andFeatures = readAndFeatures<ELFT>(InputSection(*this, sec, name)); 986 return &InputSection::discarded; 987 } 988 989 // Split stacks is a feature to support a discontiguous stack, 990 // commonly used in the programming language Go. For the details, 991 // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled 992 // for split stack will include a .note.GNU-split-stack section. 993 if (name == ".note.GNU-split-stack") { 994 if (config->relocatable) { 995 error( 996 "cannot mix split-stack and non-split-stack in a relocatable link"); 997 return &InputSection::discarded; 998 } 999 this->splitStack = true; 1000 return &InputSection::discarded; 1001 } 1002 1003 // An object file compiled for split stack, but where some of the 1004 // functions were compiled with the no_split_stack_attribute will 1005 // include a .note.GNU-no-split-stack section. 1006 if (name == ".note.GNU-no-split-stack") { 1007 this->someNoSplitStack = true; 1008 return &InputSection::discarded; 1009 } 1010 1011 // Strip existing .note.gnu.build-id sections so that the output won't have 1012 // more than one build-id. This is not usually a problem because input 1013 // object files normally don't have .build-id sections, but you can create 1014 // such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard 1015 // against it. 1016 if (name == ".note.gnu.build-id") 1017 return &InputSection::discarded; 1018 } 1019 1020 // The linker merges EH (exception handling) frames and creates a 1021 // .eh_frame_hdr section for runtime. So we handle them with a special 1022 // class. For relocatable outputs, they are just passed through. 1023 if (name == ".eh_frame" && !config->relocatable) 1024 return makeThreadLocal<EhInputSection>(*this, sec, name); 1025 1026 if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name)) 1027 return makeThreadLocal<MergeInputSection>(*this, sec, name); 1028 return makeThreadLocal<InputSection>(*this, sec, name); 1029 } 1030 1031 // Initialize this->Symbols. this->Symbols is a parallel array as 1032 // its corresponding ELF symbol table. 1033 template <class ELFT> 1034 void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) { 1035 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1036 if (numSymbols == 0) { 1037 numSymbols = eSyms.size(); 1038 symbols = std::make_unique<Symbol *[]>(numSymbols); 1039 } 1040 1041 // Some entries have been filled by LazyObjFile. 1042 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) 1043 if (!symbols[i]) 1044 symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); 1045 1046 // Perform symbol resolution on non-local symbols. 1047 SmallVector<unsigned, 32> undefineds; 1048 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1049 const Elf_Sym &eSym = eSyms[i]; 1050 uint32_t secIdx = eSym.st_shndx; 1051 if (secIdx == SHN_UNDEF) { 1052 undefineds.push_back(i); 1053 continue; 1054 } 1055 1056 uint8_t binding = eSym.getBinding(); 1057 uint8_t stOther = eSym.st_other; 1058 uint8_t type = eSym.getType(); 1059 uint64_t value = eSym.st_value; 1060 uint64_t size = eSym.st_size; 1061 1062 Symbol *sym = symbols[i]; 1063 sym->isUsedInRegularObj = true; 1064 if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) { 1065 if (value == 0 || value >= UINT32_MAX) 1066 fatal(toString(this) + ": common symbol '" + sym->getName() + 1067 "' has invalid alignment: " + Twine(value)); 1068 hasCommonSyms = true; 1069 sym->resolve( 1070 CommonSymbol{this, StringRef(), binding, stOther, type, value, size}); 1071 continue; 1072 } 1073 1074 // Handle global defined symbols. Defined::section will be set in postParse. 1075 sym->resolve(Defined{this, StringRef(), binding, stOther, type, value, size, 1076 nullptr}); 1077 } 1078 1079 // Undefined symbols (excluding those defined relative to non-prevailing 1080 // sections) can trigger recursive extract. Process defined symbols first so 1081 // that the relative order between a defined symbol and an undefined symbol 1082 // does not change the symbol resolution behavior. In addition, a set of 1083 // interconnected symbols will all be resolved to the same file, instead of 1084 // being resolved to different files. 1085 for (unsigned i : undefineds) { 1086 const Elf_Sym &eSym = eSyms[i]; 1087 Symbol *sym = symbols[i]; 1088 sym->resolve(Undefined{this, StringRef(), eSym.getBinding(), eSym.st_other, 1089 eSym.getType()}); 1090 sym->isUsedInRegularObj = true; 1091 sym->referenced = true; 1092 } 1093 } 1094 1095 template <class ELFT> 1096 void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) { 1097 if (!justSymbols) 1098 initializeSections(ignoreComdats, getObj()); 1099 1100 if (!firstGlobal) 1101 return; 1102 SymbolUnion *locals = makeThreadLocalN<SymbolUnion>(firstGlobal); 1103 memset(locals, 0, sizeof(SymbolUnion) * firstGlobal); 1104 1105 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1106 for (size_t i = 0, end = firstGlobal; i != end; ++i) { 1107 const Elf_Sym &eSym = eSyms[i]; 1108 uint32_t secIdx = eSym.st_shndx; 1109 if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) 1110 secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); 1111 else if (secIdx >= SHN_LORESERVE) 1112 secIdx = 0; 1113 if (LLVM_UNLIKELY(secIdx >= sections.size())) 1114 fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); 1115 if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL)) 1116 error(toString(this) + ": non-local symbol (" + Twine(i) + 1117 ") found at index < .symtab's sh_info (" + Twine(end) + ")"); 1118 1119 InputSectionBase *sec = sections[secIdx]; 1120 uint8_t type = eSym.getType(); 1121 if (type == STT_FILE) 1122 sourceFile = CHECK(eSym.getName(stringTable), this); 1123 if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name)) 1124 fatal(toString(this) + ": invalid symbol name offset"); 1125 StringRef name(stringTable.data() + eSym.st_name); 1126 1127 symbols[i] = reinterpret_cast<Symbol *>(locals + i); 1128 if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded) 1129 new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type, 1130 /*discardedSecIdx=*/secIdx); 1131 else 1132 new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type, 1133 eSym.st_value, eSym.st_size, sec); 1134 symbols[i]->partition = 1; 1135 symbols[i]->isUsedInRegularObj = true; 1136 } 1137 } 1138 1139 // Called after all ObjFile::parse is called for all ObjFiles. This checks 1140 // duplicate symbols and may do symbol property merge in the future. 1141 template <class ELFT> void ObjFile<ELFT>::postParse() { 1142 static std::mutex mu; 1143 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); 1144 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1145 const Elf_Sym &eSym = eSyms[i]; 1146 Symbol &sym = *symbols[i]; 1147 uint32_t secIdx = eSym.st_shndx; 1148 uint8_t binding = eSym.getBinding(); 1149 if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK && 1150 binding != STB_GNU_UNIQUE)) 1151 errorOrWarn(toString(this) + ": symbol (" + Twine(i) + 1152 ") has invalid binding: " + Twine((int)binding)); 1153 1154 // st_value of STT_TLS represents the assigned offset, not the actual 1155 // address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can 1156 // only be referenced by special TLS relocations. It is usually an error if 1157 // a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa. 1158 if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS && 1159 eSym.getType() != STT_NOTYPE) 1160 errorOrWarn("TLS attribute mismatch: " + toString(sym) + "\n>>> in " + 1161 toString(sym.file) + "\n>>> in " + toString(this)); 1162 1163 // Handle non-COMMON defined symbol below. !sym.file allows a symbol 1164 // assignment to redefine a symbol without an error. 1165 if (!sym.file || !sym.isDefined() || secIdx == SHN_UNDEF || 1166 secIdx == SHN_COMMON) 1167 continue; 1168 1169 if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) 1170 secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); 1171 else if (secIdx >= SHN_LORESERVE) 1172 secIdx = 0; 1173 if (LLVM_UNLIKELY(secIdx >= sections.size())) 1174 fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); 1175 InputSectionBase *sec = sections[secIdx]; 1176 if (sec == &InputSection::discarded) { 1177 if (sym.traced) { 1178 printTraceSymbol(Undefined{this, sym.getName(), sym.binding, 1179 sym.stOther, sym.type, secIdx}, 1180 sym.getName()); 1181 } 1182 if (sym.file == this) { 1183 std::lock_guard<std::mutex> lock(mu); 1184 ctx.nonPrevailingSyms.emplace_back(&sym, secIdx); 1185 } 1186 continue; 1187 } 1188 1189 if (sym.file == this) { 1190 cast<Defined>(sym).section = sec; 1191 continue; 1192 } 1193 1194 if (sym.binding == STB_WEAK || binding == STB_WEAK) 1195 continue; 1196 std::lock_guard<std::mutex> lock(mu); 1197 ctx.duplicates.push_back({&sym, this, sec, eSym.st_value}); 1198 } 1199 } 1200 1201 // The handling of tentative definitions (COMMON symbols) in archives is murky. 1202 // A tentative definition will be promoted to a global definition if there are 1203 // no non-tentative definitions to dominate it. When we hold a tentative 1204 // definition to a symbol and are inspecting archive members for inclusion 1205 // there are 2 ways we can proceed: 1206 // 1207 // 1) Consider the tentative definition a 'real' definition (ie promotion from 1208 // tentative to real definition has already happened) and not inspect 1209 // archive members for Global/Weak definitions to replace the tentative 1210 // definition. An archive member would only be included if it satisfies some 1211 // other undefined symbol. This is the behavior Gold uses. 1212 // 1213 // 2) Consider the tentative definition as still undefined (ie the promotion to 1214 // a real definition happens only after all symbol resolution is done). 1215 // The linker searches archive members for STB_GLOBAL definitions to 1216 // replace the tentative definition with. This is the behavior used by 1217 // GNU ld. 1218 // 1219 // The second behavior is inherited from SysVR4, which based it on the FORTRAN 1220 // COMMON BLOCK model. This behavior is needed for proper initialization in old 1221 // (pre F90) FORTRAN code that is packaged into an archive. 1222 // 1223 // The following functions search archive members for definitions to replace 1224 // tentative definitions (implementing behavior 2). 1225 static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName, 1226 StringRef archiveName) { 1227 IRSymtabFile symtabFile = check(readIRSymtab(mb)); 1228 for (const irsymtab::Reader::SymbolRef &sym : 1229 symtabFile.TheReader.symbols()) { 1230 if (sym.isGlobal() && sym.getName() == symName) 1231 return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon(); 1232 } 1233 return false; 1234 } 1235 1236 template <class ELFT> 1237 static bool isNonCommonDef(ELFKind ekind, MemoryBufferRef mb, StringRef symName, 1238 StringRef archiveName) { 1239 ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(ekind, mb, archiveName); 1240 obj->init(); 1241 StringRef stringtable = obj->getStringTable(); 1242 1243 for (auto sym : obj->template getGlobalELFSyms<ELFT>()) { 1244 Expected<StringRef> name = sym.getName(stringtable); 1245 if (name && name.get() == symName) 1246 return sym.isDefined() && sym.getBinding() == STB_GLOBAL && 1247 !sym.isCommon(); 1248 } 1249 return false; 1250 } 1251 1252 static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName, 1253 StringRef archiveName) { 1254 switch (getELFKind(mb, archiveName)) { 1255 case ELF32LEKind: 1256 return isNonCommonDef<ELF32LE>(ELF32LEKind, mb, symName, archiveName); 1257 case ELF32BEKind: 1258 return isNonCommonDef<ELF32BE>(ELF32BEKind, mb, symName, archiveName); 1259 case ELF64LEKind: 1260 return isNonCommonDef<ELF64LE>(ELF64LEKind, mb, symName, archiveName); 1261 case ELF64BEKind: 1262 return isNonCommonDef<ELF64BE>(ELF64BEKind, mb, symName, archiveName); 1263 default: 1264 llvm_unreachable("getELFKind"); 1265 } 1266 } 1267 1268 unsigned SharedFile::vernauxNum; 1269 1270 SharedFile::SharedFile(MemoryBufferRef m, StringRef defaultSoName) 1271 : ELFFileBase(SharedKind, getELFKind(m, ""), m), soName(defaultSoName), 1272 isNeeded(!config->asNeeded) {} 1273 1274 // Parse the version definitions in the object file if present, and return a 1275 // vector whose nth element contains a pointer to the Elf_Verdef for version 1276 // identifier n. Version identifiers that are not definitions map to nullptr. 1277 template <typename ELFT> 1278 static SmallVector<const void *, 0> 1279 parseVerdefs(const uint8_t *base, const typename ELFT::Shdr *sec) { 1280 if (!sec) 1281 return {}; 1282 1283 // Build the Verdefs array by following the chain of Elf_Verdef objects 1284 // from the start of the .gnu.version_d section. 1285 SmallVector<const void *, 0> verdefs; 1286 const uint8_t *verdef = base + sec->sh_offset; 1287 for (unsigned i = 0, e = sec->sh_info; i != e; ++i) { 1288 auto *curVerdef = reinterpret_cast<const typename ELFT::Verdef *>(verdef); 1289 verdef += curVerdef->vd_next; 1290 unsigned verdefIndex = curVerdef->vd_ndx; 1291 if (verdefIndex >= verdefs.size()) 1292 verdefs.resize(verdefIndex + 1); 1293 verdefs[verdefIndex] = curVerdef; 1294 } 1295 return verdefs; 1296 } 1297 1298 // Parse SHT_GNU_verneed to properly set the name of a versioned undefined 1299 // symbol. We detect fatal issues which would cause vulnerabilities, but do not 1300 // implement sophisticated error checking like in llvm-readobj because the value 1301 // of such diagnostics is low. 1302 template <typename ELFT> 1303 std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj, 1304 const typename ELFT::Shdr *sec) { 1305 if (!sec) 1306 return {}; 1307 std::vector<uint32_t> verneeds; 1308 ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(*sec), this); 1309 const uint8_t *verneedBuf = data.begin(); 1310 for (unsigned i = 0; i != sec->sh_info; ++i) { 1311 if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) 1312 fatal(toString(this) + " has an invalid Verneed"); 1313 auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf); 1314 const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux; 1315 for (unsigned j = 0; j != vn->vn_cnt; ++j) { 1316 if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) 1317 fatal(toString(this) + " has an invalid Vernaux"); 1318 auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf); 1319 if (aux->vna_name >= this->stringTable.size()) 1320 fatal(toString(this) + " has a Vernaux with an invalid vna_name"); 1321 uint16_t version = aux->vna_other & VERSYM_VERSION; 1322 if (version >= verneeds.size()) 1323 verneeds.resize(version + 1); 1324 verneeds[version] = aux->vna_name; 1325 vernauxBuf += aux->vna_next; 1326 } 1327 verneedBuf += vn->vn_next; 1328 } 1329 return verneeds; 1330 } 1331 1332 // We do not usually care about alignments of data in shared object 1333 // files because the loader takes care of it. However, if we promote a 1334 // DSO symbol to point to .bss due to copy relocation, we need to keep 1335 // the original alignment requirements. We infer it in this function. 1336 template <typename ELFT> 1337 static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections, 1338 const typename ELFT::Sym &sym) { 1339 uint64_t ret = UINT64_MAX; 1340 if (sym.st_value) 1341 ret = 1ULL << countTrailingZeros((uint64_t)sym.st_value); 1342 if (0 < sym.st_shndx && sym.st_shndx < sections.size()) 1343 ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign); 1344 return (ret > UINT32_MAX) ? 0 : ret; 1345 } 1346 1347 // Fully parse the shared object file. 1348 // 1349 // This function parses symbol versions. If a DSO has version information, 1350 // the file has a ".gnu.version_d" section which contains symbol version 1351 // definitions. Each symbol is associated to one version through a table in 1352 // ".gnu.version" section. That table is a parallel array for the symbol 1353 // table, and each table entry contains an index in ".gnu.version_d". 1354 // 1355 // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for 1356 // VER_NDX_GLOBAL. There's no table entry for these special versions in 1357 // ".gnu.version_d". 1358 // 1359 // The file format for symbol versioning is perhaps a bit more complicated 1360 // than necessary, but you can easily understand the code if you wrap your 1361 // head around the data structure described above. 1362 template <class ELFT> void SharedFile::parse() { 1363 using Elf_Dyn = typename ELFT::Dyn; 1364 using Elf_Shdr = typename ELFT::Shdr; 1365 using Elf_Sym = typename ELFT::Sym; 1366 using Elf_Verdef = typename ELFT::Verdef; 1367 using Elf_Versym = typename ELFT::Versym; 1368 1369 ArrayRef<Elf_Dyn> dynamicTags; 1370 const ELFFile<ELFT> obj = this->getObj<ELFT>(); 1371 ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>(); 1372 1373 StringRef sectionStringTable = 1374 CHECK(obj.getSectionStringTable(sections), this); 1375 1376 const Elf_Shdr *versymSec = nullptr; 1377 const Elf_Shdr *verdefSec = nullptr; 1378 const Elf_Shdr *verneedSec = nullptr; 1379 1380 // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. 1381 for (const Elf_Shdr &sec : sections) { 1382 switch (sec.sh_type) { 1383 default: 1384 continue; 1385 case SHT_DYNAMIC: 1386 dynamicTags = 1387 CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this); 1388 break; 1389 case SHT_GNU_versym: 1390 versymSec = &sec; 1391 break; 1392 case SHT_GNU_verdef: 1393 verdefSec = &sec; 1394 break; 1395 case SHT_GNU_verneed: 1396 verneedSec = &sec; 1397 break; 1398 case SHT_PROGBITS: { 1399 StringRef name = CHECK(obj.getSectionName(sec, sectionStringTable), this); 1400 ArrayRef<char> data = 1401 CHECK(obj.template getSectionContentsAsArray<char>(sec), this); 1402 parseGNUWarning(name, data, sec.sh_size); 1403 break; 1404 } 1405 } 1406 } 1407 1408 if (versymSec && numELFSyms == 0) { 1409 error("SHT_GNU_versym should be associated with symbol table"); 1410 return; 1411 } 1412 1413 // Search for a DT_SONAME tag to initialize this->soName. 1414 for (const Elf_Dyn &dyn : dynamicTags) { 1415 if (dyn.d_tag == DT_NEEDED) { 1416 uint64_t val = dyn.getVal(); 1417 if (val >= this->stringTable.size()) 1418 fatal(toString(this) + ": invalid DT_NEEDED entry"); 1419 dtNeeded.push_back(this->stringTable.data() + val); 1420 } else if (dyn.d_tag == DT_SONAME) { 1421 uint64_t val = dyn.getVal(); 1422 if (val >= this->stringTable.size()) 1423 fatal(toString(this) + ": invalid DT_SONAME entry"); 1424 soName = this->stringTable.data() + val; 1425 } 1426 } 1427 1428 // DSOs are uniquified not by filename but by soname. 1429 DenseMap<CachedHashStringRef, SharedFile *>::iterator it; 1430 bool wasInserted; 1431 std::tie(it, wasInserted) = 1432 symtab.soNames.try_emplace(CachedHashStringRef(soName), this); 1433 1434 // If a DSO appears more than once on the command line with and without 1435 // --as-needed, --no-as-needed takes precedence over --as-needed because a 1436 // user can add an extra DSO with --no-as-needed to force it to be added to 1437 // the dependency list. 1438 it->second->isNeeded |= isNeeded; 1439 if (!wasInserted) 1440 return; 1441 1442 ctx.sharedFiles.push_back(this); 1443 1444 verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec); 1445 std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec); 1446 1447 // Parse ".gnu.version" section which is a parallel array for the symbol 1448 // table. If a given file doesn't have a ".gnu.version" section, we use 1449 // VER_NDX_GLOBAL. 1450 size_t size = numELFSyms - firstGlobal; 1451 std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL); 1452 if (versymSec) { 1453 ArrayRef<Elf_Versym> versym = 1454 CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec), 1455 this) 1456 .slice(firstGlobal); 1457 for (size_t i = 0; i < size; ++i) 1458 versyms[i] = versym[i].vs_index; 1459 } 1460 1461 // System libraries can have a lot of symbols with versions. Using a 1462 // fixed buffer for computing the versions name (foo@ver) can save a 1463 // lot of allocations. 1464 SmallString<0> versionedNameBuffer; 1465 1466 // Add symbols to the symbol table. 1467 ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>(); 1468 for (size_t i = 0, e = syms.size(); i != e; ++i) { 1469 const Elf_Sym &sym = syms[i]; 1470 1471 // ELF spec requires that all local symbols precede weak or global 1472 // symbols in each symbol table, and the index of first non-local symbol 1473 // is stored to sh_info. If a local symbol appears after some non-local 1474 // symbol, that's a violation of the spec. 1475 StringRef name = CHECK(sym.getName(stringTable), this); 1476 if (sym.getBinding() == STB_LOCAL) { 1477 errorOrWarn(toString(this) + ": invalid local symbol '" + name + 1478 "' in global part of symbol table"); 1479 continue; 1480 } 1481 1482 const uint16_t ver = versyms[i], idx = ver & ~VERSYM_HIDDEN; 1483 if (sym.isUndefined()) { 1484 // For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but 1485 // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL. 1486 if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) { 1487 if (idx >= verneeds.size()) { 1488 error("corrupt input file: version need index " + Twine(idx) + 1489 " for symbol " + name + " is out of bounds\n>>> defined in " + 1490 toString(this)); 1491 continue; 1492 } 1493 StringRef verName = stringTable.data() + verneeds[idx]; 1494 versionedNameBuffer.clear(); 1495 name = saver().save( 1496 (name + "@" + verName).toStringRef(versionedNameBuffer)); 1497 } 1498 Symbol *s = symtab.addSymbol( 1499 Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); 1500 s->exportDynamic = true; 1501 if (s->isUndefined() && sym.getBinding() != STB_WEAK && 1502 config->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore) 1503 requiredSymbols.push_back(s); 1504 continue; 1505 } 1506 1507 if (ver == VER_NDX_LOCAL || 1508 (ver != VER_NDX_GLOBAL && idx >= verdefs.size())) { 1509 // In GNU ld < 2.31 (before 3be08ea4728b56d35e136af4e6fd3086ade17764), the 1510 // MIPS port puts _gp_disp symbol into DSO files and incorrectly assigns 1511 // VER_NDX_LOCAL. Workaround this bug. 1512 if (config->emachine == EM_MIPS && name == "_gp_disp") 1513 continue; 1514 error("corrupt input file: version definition index " + Twine(idx) + 1515 " for symbol " + name + " is out of bounds\n>>> defined in " + 1516 toString(this)); 1517 continue; 1518 } 1519 1520 uint32_t alignment = getAlignment<ELFT>(sections, sym); 1521 if (ver == idx) { 1522 auto *s = symtab.addSymbol( 1523 SharedSymbol{*this, name, sym.getBinding(), sym.st_other, 1524 sym.getType(), sym.st_value, sym.st_size, alignment}); 1525 if (s->file == this) 1526 s->verdefIndex = ver; 1527 } 1528 1529 // Also add the symbol with the versioned name to handle undefined symbols 1530 // with explicit versions. 1531 if (ver == VER_NDX_GLOBAL) 1532 continue; 1533 1534 StringRef verName = 1535 stringTable.data() + 1536 reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name; 1537 versionedNameBuffer.clear(); 1538 name = (name + "@" + verName).toStringRef(versionedNameBuffer); 1539 auto *s = symtab.addSymbol( 1540 SharedSymbol{*this, saver().save(name), sym.getBinding(), sym.st_other, 1541 sym.getType(), sym.st_value, sym.st_size, alignment}); 1542 if (s->file == this) 1543 s->verdefIndex = idx; 1544 } 1545 } 1546 1547 static ELFKind getBitcodeELFKind(const Triple &t) { 1548 if (t.isLittleEndian()) 1549 return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind; 1550 return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind; 1551 } 1552 1553 static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { 1554 switch (t.getArch()) { 1555 case Triple::aarch64: 1556 case Triple::aarch64_be: 1557 return EM_AARCH64; 1558 case Triple::amdgcn: 1559 case Triple::r600: 1560 return EM_AMDGPU; 1561 case Triple::arm: 1562 case Triple::thumb: 1563 return EM_ARM; 1564 case Triple::avr: 1565 return EM_AVR; 1566 case Triple::hexagon: 1567 return EM_HEXAGON; 1568 case Triple::mips: 1569 case Triple::mipsel: 1570 case Triple::mips64: 1571 case Triple::mips64el: 1572 return EM_MIPS; 1573 case Triple::msp430: 1574 return EM_MSP430; 1575 case Triple::ppc: 1576 case Triple::ppcle: 1577 return EM_PPC; 1578 case Triple::ppc64: 1579 case Triple::ppc64le: 1580 return EM_PPC64; 1581 case Triple::riscv32: 1582 case Triple::riscv64: 1583 return EM_RISCV; 1584 case Triple::x86: 1585 return t.isOSIAMCU() ? EM_IAMCU : EM_386; 1586 case Triple::x86_64: 1587 return EM_X86_64; 1588 default: 1589 error(path + ": could not infer e_machine from bitcode target triple " + 1590 t.str()); 1591 return EM_NONE; 1592 } 1593 } 1594 1595 static uint8_t getOsAbi(const Triple &t) { 1596 switch (t.getOS()) { 1597 case Triple::AMDHSA: 1598 return ELF::ELFOSABI_AMDGPU_HSA; 1599 case Triple::AMDPAL: 1600 return ELF::ELFOSABI_AMDGPU_PAL; 1601 case Triple::Mesa3D: 1602 return ELF::ELFOSABI_AMDGPU_MESA3D; 1603 default: 1604 return ELF::ELFOSABI_NONE; 1605 } 1606 } 1607 1608 BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, 1609 uint64_t offsetInArchive, bool lazy) 1610 : InputFile(BitcodeKind, mb) { 1611 this->archiveName = archiveName; 1612 this->lazy = lazy; 1613 1614 std::string path = mb.getBufferIdentifier().str(); 1615 if (config->thinLTOIndexOnly) 1616 path = replaceThinLTOSuffix(mb.getBufferIdentifier()); 1617 1618 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique 1619 // name. If two archives define two members with the same name, this 1620 // causes a collision which result in only one of the objects being taken 1621 // into consideration at LTO time (which very likely causes undefined 1622 // symbols later in the link stage). So we append file offset to make 1623 // filename unique. 1624 StringRef name = archiveName.empty() 1625 ? saver().save(path) 1626 : saver().save(archiveName + "(" + path::filename(path) + 1627 " at " + utostr(offsetInArchive) + ")"); 1628 MemoryBufferRef mbref(mb.getBuffer(), name); 1629 1630 obj = CHECK(lto::InputFile::create(mbref), this); 1631 1632 Triple t(obj->getTargetTriple()); 1633 ekind = getBitcodeELFKind(t); 1634 emachine = getBitcodeMachineKind(mb.getBufferIdentifier(), t); 1635 osabi = getOsAbi(t); 1636 } 1637 1638 static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { 1639 switch (gvVisibility) { 1640 case GlobalValue::DefaultVisibility: 1641 return STV_DEFAULT; 1642 case GlobalValue::HiddenVisibility: 1643 return STV_HIDDEN; 1644 case GlobalValue::ProtectedVisibility: 1645 return STV_PROTECTED; 1646 } 1647 llvm_unreachable("unknown visibility"); 1648 } 1649 1650 static void 1651 createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats, 1652 const lto::InputFile::Symbol &objSym, BitcodeFile &f) { 1653 uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL; 1654 uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE; 1655 uint8_t visibility = mapVisibility(objSym.getVisibility()); 1656 1657 if (!sym) 1658 sym = symtab.insert(saver().save(objSym.getName())); 1659 1660 int c = objSym.getComdatIndex(); 1661 if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) { 1662 Undefined newSym(&f, StringRef(), binding, visibility, type); 1663 sym->resolve(newSym); 1664 sym->referenced = true; 1665 return; 1666 } 1667 1668 if (objSym.isCommon()) { 1669 sym->resolve(CommonSymbol{&f, StringRef(), binding, visibility, STT_OBJECT, 1670 objSym.getCommonAlignment(), 1671 objSym.getCommonSize()}); 1672 } else { 1673 Defined newSym(&f, StringRef(), binding, visibility, type, 0, 0, nullptr); 1674 if (objSym.canBeOmittedFromSymbolTable()) 1675 newSym.exportDynamic = false; 1676 sym->resolve(newSym); 1677 } 1678 } 1679 1680 void BitcodeFile::parse() { 1681 for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) { 1682 keptComdats.push_back( 1683 s.second == Comdat::NoDeduplicate || 1684 symtab.comdatGroups.try_emplace(CachedHashStringRef(s.first), this) 1685 .second); 1686 } 1687 1688 if (numSymbols == 0) { 1689 numSymbols = obj->symbols().size(); 1690 symbols = std::make_unique<Symbol *[]>(numSymbols); 1691 } 1692 // Process defined symbols first. See the comment in 1693 // ObjFile<ELFT>::initializeSymbols. 1694 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1695 if (!irSym.isUndefined()) 1696 createBitcodeSymbol(symbols[i], keptComdats, irSym, *this); 1697 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1698 if (irSym.isUndefined()) 1699 createBitcodeSymbol(symbols[i], keptComdats, irSym, *this); 1700 1701 for (auto l : obj->getDependentLibraries()) 1702 addDependentLibrary(l, this); 1703 } 1704 1705 void BitcodeFile::parseLazy() { 1706 numSymbols = obj->symbols().size(); 1707 symbols = std::make_unique<Symbol *[]>(numSymbols); 1708 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) 1709 if (!irSym.isUndefined()) { 1710 auto *sym = symtab.insert(saver().save(irSym.getName())); 1711 sym->resolve(LazyObject{*this}); 1712 symbols[i] = sym; 1713 } 1714 } 1715 1716 void BitcodeFile::postParse() { 1717 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) { 1718 const Symbol &sym = *symbols[i]; 1719 if (sym.file == this || !sym.isDefined() || irSym.isUndefined() || 1720 irSym.isCommon() || irSym.isWeak()) 1721 continue; 1722 int c = irSym.getComdatIndex(); 1723 if (c != -1 && !keptComdats[c]) 1724 continue; 1725 reportDuplicate(sym, this, nullptr, 0); 1726 } 1727 } 1728 1729 void BinaryFile::parse() { 1730 ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer()); 1731 auto *section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 1732 8, data, ".data"); 1733 sections.push_back(section); 1734 1735 // For each input file foo that is embedded to a result as a binary 1736 // blob, we define _binary_foo_{start,end,size} symbols, so that 1737 // user programs can access blobs by name. Non-alphanumeric 1738 // characters in a filename are replaced with underscore. 1739 std::string s = "_binary_" + mb.getBufferIdentifier().str(); 1740 for (size_t i = 0; i < s.size(); ++i) 1741 if (!isAlnum(s[i])) 1742 s[i] = '_'; 1743 1744 llvm::StringSaver &saver = lld::saver(); 1745 1746 symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_start"), 1747 STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0, 0, 1748 section}); 1749 symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_end"), 1750 STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 1751 data.size(), 0, section}); 1752 symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_size"), 1753 STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 1754 data.size(), 0, nullptr}); 1755 } 1756 1757 ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName, 1758 bool lazy) { 1759 ELFFileBase *f; 1760 switch (getELFKind(mb, archiveName)) { 1761 case ELF32LEKind: 1762 f = make<ObjFile<ELF32LE>>(ELF32LEKind, mb, archiveName); 1763 break; 1764 case ELF32BEKind: 1765 f = make<ObjFile<ELF32BE>>(ELF32BEKind, mb, archiveName); 1766 break; 1767 case ELF64LEKind: 1768 f = make<ObjFile<ELF64LE>>(ELF64LEKind, mb, archiveName); 1769 break; 1770 case ELF64BEKind: 1771 f = make<ObjFile<ELF64BE>>(ELF64BEKind, mb, archiveName); 1772 break; 1773 default: 1774 llvm_unreachable("getELFKind"); 1775 } 1776 f->init(); 1777 f->lazy = lazy; 1778 return f; 1779 } 1780 1781 template <class ELFT> void ObjFile<ELFT>::parseLazy() { 1782 const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>(); 1783 numSymbols = eSyms.size(); 1784 symbols = std::make_unique<Symbol *[]>(numSymbols); 1785 1786 // resolve() may trigger this->extract() if an existing symbol is an undefined 1787 // symbol. If that happens, this function has served its purpose, and we can 1788 // exit from the loop early. 1789 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { 1790 if (eSyms[i].st_shndx == SHN_UNDEF) 1791 continue; 1792 symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); 1793 symbols[i]->resolve(LazyObject{*this}); 1794 if (!lazy) 1795 break; 1796 } 1797 } 1798 1799 bool InputFile::shouldExtractForCommon(StringRef name) { 1800 if (isa<BitcodeFile>(this)) 1801 return isBitcodeNonCommonDef(mb, name, archiveName); 1802 1803 return isNonCommonDef(mb, name, archiveName); 1804 } 1805 1806 std::string elf::replaceThinLTOSuffix(StringRef path) { 1807 auto [suffix, repl] = config->thinLTOObjectSuffixReplace; 1808 if (path.consume_back(suffix)) 1809 return (path + repl).str(); 1810 return std::string(path); 1811 } 1812 1813 template class elf::ObjFile<ELF32LE>; 1814 template class elf::ObjFile<ELF32BE>; 1815 template class elf::ObjFile<ELF64LE>; 1816 template class elf::ObjFile<ELF64BE>; 1817 1818 template void SharedFile::parse<ELF32LE>(); 1819 template void SharedFile::parse<ELF32BE>(); 1820 template void SharedFile::parse<ELF64LE>(); 1821 template void SharedFile::parse<ELF64BE>(); 1822