xref: /openbsd-src/gnu/llvm/lld/ELF/InputFiles.cpp (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Config.h"
11 #include "DWARF.h"
12 #include "Driver.h"
13 #include "InputSection.h"
14 #include "LinkerScript.h"
15 #include "SymbolTable.h"
16 #include "Symbols.h"
17 #include "SyntheticSections.h"
18 #include "Target.h"
19 #include "lld/Common/CommonLinkerContext.h"
20 #include "lld/Common/DWARF.h"
21 #include "llvm/ADT/CachedHashString.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/LTO/LTO.h"
24 #include "llvm/Object/IRObjectFile.h"
25 #include "llvm/Support/ARMAttributeParser.h"
26 #include "llvm/Support/ARMBuildAttributes.h"
27 #include "llvm/Support/Endian.h"
28 #include "llvm/Support/FileSystem.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/RISCVAttributeParser.h"
31 #include "llvm/Support/TarWriter.h"
32 #include "llvm/Support/raw_ostream.h"
33 
34 using namespace llvm;
35 using namespace llvm::ELF;
36 using namespace llvm::object;
37 using namespace llvm::sys;
38 using namespace llvm::sys::fs;
39 using namespace llvm::support::endian;
40 using namespace lld;
41 using namespace lld::elf;
42 
43 bool InputFile::isInGroup;
44 uint32_t InputFile::nextGroupId;
45 
46 std::unique_ptr<TarWriter> elf::tar;
47 
48 DenseMap<StringRef, StringRef> elf::gnuWarnings;
49 
50 // Returns "<internal>", "foo.a(bar.o)" or "baz.o".
51 std::string lld::toString(const InputFile *f) {
52   static std::mutex mu;
53   if (!f)
54     return "<internal>";
55 
56   {
57     std::lock_guard<std::mutex> lock(mu);
58     if (f->toStringCache.empty()) {
59       if (f->archiveName.empty())
60         f->toStringCache = f->getName();
61       else
62         (f->archiveName + "(" + f->getName() + ")").toVector(f->toStringCache);
63     }
64   }
65   return std::string(f->toStringCache);
66 }
67 
68 // .gnu.warning.SYMBOL are treated as warning symbols for the given symbol
69 void lld::parseGNUWarning(StringRef name, ArrayRef<char> data, size_t size) {
70   if (!name.empty() && name.startswith(".gnu.warning.")) {
71     StringRef wsym = name.substr(13);
72     StringRef s(data.begin());
73     StringRef wng(s.substr(0, size));
74     symtab.insert(wsym)->gwarn = true;
75     gnuWarnings.insert({wsym, wng});
76   }
77 }
78 
79 static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) {
80   unsigned char size;
81   unsigned char endian;
82   std::tie(size, endian) = getElfArchType(mb.getBuffer());
83 
84   auto report = [&](StringRef msg) {
85     StringRef filename = mb.getBufferIdentifier();
86     if (archiveName.empty())
87       fatal(filename + ": " + msg);
88     else
89       fatal(archiveName + "(" + filename + "): " + msg);
90   };
91 
92   if (!mb.getBuffer().startswith(ElfMagic))
93     report("not an ELF file");
94   if (endian != ELFDATA2LSB && endian != ELFDATA2MSB)
95     report("corrupted ELF file: invalid data encoding");
96   if (size != ELFCLASS32 && size != ELFCLASS64)
97     report("corrupted ELF file: invalid file class");
98 
99   size_t bufSize = mb.getBuffer().size();
100   if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) ||
101       (size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr)))
102     report("corrupted ELF file: file is too short");
103 
104   if (size == ELFCLASS32)
105     return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
106   return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
107 }
108 
109 // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD
110 // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how
111 // the input objects have been compiled.
112 static void updateARMVFPArgs(const ARMAttributeParser &attributes,
113                              const InputFile *f) {
114   std::optional<unsigned> attr =
115       attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args);
116   if (!attr)
117     // If an ABI tag isn't present then it is implicitly given the value of 0
118     // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files,
119     // including some in glibc that don't use FP args (and should have value 3)
120     // don't have the attribute so we do not consider an implicit value of 0
121     // as a clash.
122     return;
123 
124   unsigned vfpArgs = *attr;
125   ARMVFPArgKind arg;
126   switch (vfpArgs) {
127   case ARMBuildAttrs::BaseAAPCS:
128     arg = ARMVFPArgKind::Base;
129     break;
130   case ARMBuildAttrs::HardFPAAPCS:
131     arg = ARMVFPArgKind::VFP;
132     break;
133   case ARMBuildAttrs::ToolChainFPPCS:
134     // Tool chain specific convention that conforms to neither AAPCS variant.
135     arg = ARMVFPArgKind::ToolChain;
136     break;
137   case ARMBuildAttrs::CompatibleFPAAPCS:
138     // Object compatible with all conventions.
139     return;
140   default:
141     error(toString(f) + ": unknown Tag_ABI_VFP_args value: " + Twine(vfpArgs));
142     return;
143   }
144   // Follow ld.bfd and error if there is a mix of calling conventions.
145   if (config->armVFPArgs != arg && config->armVFPArgs != ARMVFPArgKind::Default)
146     error(toString(f) + ": incompatible Tag_ABI_VFP_args");
147   else
148     config->armVFPArgs = arg;
149 }
150 
151 // The ARM support in lld makes some use of instructions that are not available
152 // on all ARM architectures. Namely:
153 // - Use of BLX instruction for interworking between ARM and Thumb state.
154 // - Use of the extended Thumb branch encoding in relocation.
155 // - Use of the MOVT/MOVW instructions in Thumb Thunks.
156 // The ARM Attributes section contains information about the architecture chosen
157 // at compile time. We follow the convention that if at least one input object
158 // is compiled with an architecture that supports these features then lld is
159 // permitted to use them.
160 static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) {
161   std::optional<unsigned> attr =
162       attributes.getAttributeValue(ARMBuildAttrs::CPU_arch);
163   if (!attr)
164     return;
165   auto arch = *attr;
166   switch (arch) {
167   case ARMBuildAttrs::Pre_v4:
168   case ARMBuildAttrs::v4:
169   case ARMBuildAttrs::v4T:
170     // Architectures prior to v5 do not support BLX instruction
171     break;
172   case ARMBuildAttrs::v5T:
173   case ARMBuildAttrs::v5TE:
174   case ARMBuildAttrs::v5TEJ:
175   case ARMBuildAttrs::v6:
176   case ARMBuildAttrs::v6KZ:
177   case ARMBuildAttrs::v6K:
178     config->armHasBlx = true;
179     // Architectures used in pre-Cortex processors do not support
180     // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception
181     // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do.
182     break;
183   default:
184     // All other Architectures have BLX and extended branch encoding
185     config->armHasBlx = true;
186     config->armJ1J2BranchEncoding = true;
187     if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M)
188       // All Architectures used in Cortex processors with the exception
189       // of v6-M and v6S-M have the MOVT and MOVW instructions.
190       config->armHasMovtMovw = true;
191     break;
192   }
193 }
194 
195 InputFile::InputFile(Kind k, MemoryBufferRef m)
196     : mb(m), groupId(nextGroupId), fileKind(k) {
197   // All files within the same --{start,end}-group get the same group ID.
198   // Otherwise, a new file will get a new group ID.
199   if (!isInGroup)
200     ++nextGroupId;
201 }
202 
203 std::optional<MemoryBufferRef> elf::readFile(StringRef path) {
204   llvm::TimeTraceScope timeScope("Load input files", path);
205 
206   // The --chroot option changes our virtual root directory.
207   // This is useful when you are dealing with files created by --reproduce.
208   if (!config->chroot.empty() && path.startswith("/"))
209     path = saver().save(config->chroot + path);
210 
211   log(path);
212   config->dependencyFiles.insert(llvm::CachedHashString(path));
213 
214   auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false,
215                                        /*RequiresNullTerminator=*/false);
216   if (auto ec = mbOrErr.getError()) {
217     error("cannot open " + path + ": " + ec.message());
218     return std::nullopt;
219   }
220 
221   MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef();
222   ctx.memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership
223 
224   if (tar)
225     tar->append(relativeToRoot(path), mbref.getBuffer());
226   return mbref;
227 }
228 
229 // All input object files must be for the same architecture
230 // (e.g. it does not make sense to link x86 object files with
231 // MIPS object files.) This function checks for that error.
232 static bool isCompatible(InputFile *file) {
233   if (!file->isElf() && !isa<BitcodeFile>(file))
234     return true;
235 
236   if (file->ekind == config->ekind && file->emachine == config->emachine) {
237     if (config->emachine != EM_MIPS)
238       return true;
239     if (isMipsN32Abi(file) == config->mipsN32Abi)
240       return true;
241   }
242 
243   StringRef target =
244       !config->bfdname.empty() ? config->bfdname : config->emulation;
245   if (!target.empty()) {
246     error(toString(file) + " is incompatible with " + target);
247     return false;
248   }
249 
250   InputFile *existing = nullptr;
251   if (!ctx.objectFiles.empty())
252     existing = ctx.objectFiles[0];
253   else if (!ctx.sharedFiles.empty())
254     existing = ctx.sharedFiles[0];
255   else if (!ctx.bitcodeFiles.empty())
256     existing = ctx.bitcodeFiles[0];
257   std::string with;
258   if (existing)
259     with = " with " + toString(existing);
260   error(toString(file) + " is incompatible" + with);
261   return false;
262 }
263 
264 template <class ELFT> static void doParseFile(InputFile *file) {
265   if (!isCompatible(file))
266     return;
267 
268   // Binary file
269   if (auto *f = dyn_cast<BinaryFile>(file)) {
270     ctx.binaryFiles.push_back(f);
271     f->parse();
272     return;
273   }
274 
275   // Lazy object file
276   if (file->lazy) {
277     if (auto *f = dyn_cast<BitcodeFile>(file)) {
278       ctx.lazyBitcodeFiles.push_back(f);
279       f->parseLazy();
280     } else {
281       cast<ObjFile<ELFT>>(file)->parseLazy();
282     }
283     return;
284   }
285 
286   if (config->trace)
287     message(toString(file));
288 
289   // .so file
290   if (auto *f = dyn_cast<SharedFile>(file)) {
291     f->parse<ELFT>();
292     return;
293   }
294 
295   // LLVM bitcode file
296   if (auto *f = dyn_cast<BitcodeFile>(file)) {
297     ctx.bitcodeFiles.push_back(f);
298     f->parse();
299     return;
300   }
301 
302   // Regular object file
303   ctx.objectFiles.push_back(cast<ELFFileBase>(file));
304   cast<ObjFile<ELFT>>(file)->parse();
305 }
306 
307 // Add symbols in File to the symbol table.
308 void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); }
309 
310 // Concatenates arguments to construct a string representing an error location.
311 static std::string createFileLineMsg(StringRef path, unsigned line) {
312   std::string filename = std::string(path::filename(path));
313   std::string lineno = ":" + std::to_string(line);
314   if (filename == path)
315     return filename + lineno;
316   return filename + lineno + " (" + path.str() + lineno + ")";
317 }
318 
319 template <class ELFT>
320 static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym,
321                                 InputSectionBase &sec, uint64_t offset) {
322   // In DWARF, functions and variables are stored to different places.
323   // First, look up a function for a given offset.
324   if (std::optional<DILineInfo> info = file.getDILineInfo(&sec, offset))
325     return createFileLineMsg(info->FileName, info->Line);
326 
327   // If it failed, look up again as a variable.
328   if (std::optional<std::pair<std::string, unsigned>> fileLine =
329           file.getVariableLoc(sym.getName()))
330     return createFileLineMsg(fileLine->first, fileLine->second);
331 
332   // File.sourceFile contains STT_FILE symbol, and that is a last resort.
333   return std::string(file.sourceFile);
334 }
335 
336 std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec,
337                                  uint64_t offset) {
338   if (kind() != ObjKind)
339     return "";
340   switch (ekind) {
341   default:
342     llvm_unreachable("Invalid kind");
343   case ELF32LEKind:
344     return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), sym, sec, offset);
345   case ELF32BEKind:
346     return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), sym, sec, offset);
347   case ELF64LEKind:
348     return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), sym, sec, offset);
349   case ELF64BEKind:
350     return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), sym, sec, offset);
351   }
352 }
353 
354 StringRef InputFile::getNameForScript() const {
355   if (archiveName.empty())
356     return getName();
357 
358   if (nameForScriptCache.empty())
359     nameForScriptCache = (archiveName + Twine(':') + getName()).str();
360 
361   return nameForScriptCache;
362 }
363 
364 // An ELF object file may contain a `.deplibs` section. If it exists, the
365 // section contains a list of library specifiers such as `m` for libm. This
366 // function resolves a given name by finding the first matching library checking
367 // the various ways that a library can be specified to LLD. This ELF extension
368 // is a form of autolinking and is called `dependent libraries`. It is currently
369 // unique to LLVM and lld.
370 static void addDependentLibrary(StringRef specifier, const InputFile *f) {
371   if (!config->dependentLibraries)
372     return;
373   if (std::optional<std::string> s = searchLibraryBaseName(specifier))
374     ctx.driver.addFile(saver().save(*s), /*withLOption=*/true);
375   else if (std::optional<std::string> s = findFromSearchPaths(specifier))
376     ctx.driver.addFile(saver().save(*s), /*withLOption=*/true);
377   else if (fs::exists(specifier))
378     ctx.driver.addFile(specifier, /*withLOption=*/false);
379   else
380     error(toString(f) +
381           ": unable to find library from dependent library specifier: " +
382           specifier);
383 }
384 
385 // Record the membership of a section group so that in the garbage collection
386 // pass, section group members are kept or discarded as a unit.
387 template <class ELFT>
388 static void handleSectionGroup(ArrayRef<InputSectionBase *> sections,
389                                ArrayRef<typename ELFT::Word> entries) {
390   bool hasAlloc = false;
391   for (uint32_t index : entries.slice(1)) {
392     if (index >= sections.size())
393       return;
394     if (InputSectionBase *s = sections[index])
395       if (s != &InputSection::discarded && s->flags & SHF_ALLOC)
396         hasAlloc = true;
397   }
398 
399   // If any member has the SHF_ALLOC flag, the whole group is subject to garbage
400   // collection. See the comment in markLive(). This rule retains .debug_types
401   // and .rela.debug_types.
402   if (!hasAlloc)
403     return;
404 
405   // Connect the members in a circular doubly-linked list via
406   // nextInSectionGroup.
407   InputSectionBase *head;
408   InputSectionBase *prev = nullptr;
409   for (uint32_t index : entries.slice(1)) {
410     InputSectionBase *s = sections[index];
411     if (!s || s == &InputSection::discarded)
412       continue;
413     if (prev)
414       prev->nextInSectionGroup = s;
415     else
416       head = s;
417     prev = s;
418   }
419   if (prev)
420     prev->nextInSectionGroup = head;
421 }
422 
423 template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() {
424   llvm::call_once(initDwarf, [this]() {
425     dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(
426         std::make_unique<LLDDwarfObj<ELFT>>(this), "",
427         [&](Error err) { warn(getName() + ": " + toString(std::move(err))); },
428         [&](Error warning) {
429           warn(getName() + ": " + toString(std::move(warning)));
430         }));
431   });
432 
433   return dwarf.get();
434 }
435 
436 // Returns the pair of file name and line number describing location of data
437 // object (variable, array, etc) definition.
438 template <class ELFT>
439 std::optional<std::pair<std::string, unsigned>>
440 ObjFile<ELFT>::getVariableLoc(StringRef name) {
441   return getDwarf()->getVariableLoc(name);
442 }
443 
444 // Returns source line information for a given offset
445 // using DWARF debug info.
446 template <class ELFT>
447 std::optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s,
448                                                        uint64_t offset) {
449   // Detect SectionIndex for specified section.
450   uint64_t sectionIndex = object::SectionedAddress::UndefSection;
451   ArrayRef<InputSectionBase *> sections = s->file->getSections();
452   for (uint64_t curIndex = 0; curIndex < sections.size(); ++curIndex) {
453     if (s == sections[curIndex]) {
454       sectionIndex = curIndex;
455       break;
456     }
457   }
458 
459   return getDwarf()->getDILineInfo(offset, sectionIndex);
460 }
461 
462 ELFFileBase::ELFFileBase(Kind k, ELFKind ekind, MemoryBufferRef mb)
463     : InputFile(k, mb) {
464   this->ekind = ekind;
465 }
466 
467 template <typename Elf_Shdr>
468 static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) {
469   for (const Elf_Shdr &sec : sections)
470     if (sec.sh_type == type)
471       return &sec;
472   return nullptr;
473 }
474 
475 void ELFFileBase::init() {
476   switch (ekind) {
477   case ELF32LEKind:
478     init<ELF32LE>(fileKind);
479     break;
480   case ELF32BEKind:
481     init<ELF32BE>(fileKind);
482     break;
483   case ELF64LEKind:
484     init<ELF64LE>(fileKind);
485     break;
486   case ELF64BEKind:
487     init<ELF64BE>(fileKind);
488     break;
489   default:
490     llvm_unreachable("getELFKind");
491   }
492 }
493 
494 template <class ELFT> void ELFFileBase::init(InputFile::Kind k) {
495   using Elf_Shdr = typename ELFT::Shdr;
496   using Elf_Sym = typename ELFT::Sym;
497 
498   // Initialize trivial attributes.
499   const ELFFile<ELFT> &obj = getObj<ELFT>();
500   emachine = obj.getHeader().e_machine;
501   osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI];
502   abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION];
503 
504   ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this);
505   elfShdrs = sections.data();
506   numELFShdrs = sections.size();
507 
508   // Find a symbol table.
509   const Elf_Shdr *symtabSec =
510       findSection(sections, k == SharedKind ? SHT_DYNSYM : SHT_SYMTAB);
511 
512   if (!symtabSec)
513     return;
514 
515   // Initialize members corresponding to a symbol table.
516   firstGlobal = symtabSec->sh_info;
517 
518   ArrayRef<Elf_Sym> eSyms = CHECK(obj.symbols(symtabSec), this);
519   if (firstGlobal == 0 || firstGlobal > eSyms.size())
520     fatal(toString(this) + ": invalid sh_info in symbol table");
521 
522   elfSyms = reinterpret_cast<const void *>(eSyms.data());
523   numELFSyms = uint32_t(eSyms.size());
524   stringTable = CHECK(obj.getStringTableForSymtab(*symtabSec, sections), this);
525 }
526 
527 template <class ELFT>
528 uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const {
529   return CHECK(
530       this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable),
531       this);
532 }
533 
534 template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) {
535   object::ELFFile<ELFT> obj = this->getObj();
536   // Read a section table. justSymbols is usually false.
537   if (this->justSymbols) {
538     initializeJustSymbols();
539     initializeSymbols(obj);
540     return;
541   }
542 
543   // Handle dependent libraries and selection of section groups as these are not
544   // done in parallel.
545   ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
546   StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this);
547   uint64_t size = objSections.size();
548   sections.resize(size);
549   for (size_t i = 0; i != size; ++i) {
550     const Elf_Shdr &sec = objSections[i];
551     if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !config->relocatable) {
552       StringRef name = check(obj.getSectionName(sec, shstrtab));
553       ArrayRef<char> data = CHECK(
554           this->getObj().template getSectionContentsAsArray<char>(sec), this);
555       if (!data.empty() && data.back() != '\0') {
556         error(
557             toString(this) +
558             ": corrupted dependent libraries section (unterminated string): " +
559             name);
560       } else {
561         for (const char *d = data.begin(), *e = data.end(); d < e;) {
562           StringRef s(d);
563           addDependentLibrary(s, this);
564           d += s.size() + 1;
565         }
566       }
567       this->sections[i] = &InputSection::discarded;
568       continue;
569     }
570 
571     if (sec.sh_type == SHT_ARM_ATTRIBUTES && config->emachine == EM_ARM) {
572       ARMAttributeParser attributes;
573       ArrayRef<uint8_t> contents =
574           check(this->getObj().getSectionContents(sec));
575       StringRef name = check(obj.getSectionName(sec, shstrtab));
576       this->sections[i] = &InputSection::discarded;
577       if (Error e =
578               attributes.parse(contents, ekind == ELF32LEKind ? support::little
579                                                               : support::big)) {
580         InputSection isec(*this, sec, name);
581         warn(toString(&isec) + ": " + llvm::toString(std::move(e)));
582       } else {
583         updateSupportedARMFeatures(attributes);
584         updateARMVFPArgs(attributes, this);
585 
586         // FIXME: Retain the first attribute section we see. The eglibc ARM
587         // dynamic loaders require the presence of an attribute section for
588         // dlopen to work. In a full implementation we would merge all attribute
589         // sections.
590         if (in.attributes == nullptr) {
591           in.attributes = std::make_unique<InputSection>(*this, sec, name);
592           this->sections[i] = in.attributes.get();
593         }
594       }
595     }
596 
597     if (sec.sh_type != SHT_GROUP)
598       continue;
599     StringRef signature = getShtGroupSignature(objSections, sec);
600     ArrayRef<Elf_Word> entries =
601         CHECK(obj.template getSectionContentsAsArray<Elf_Word>(sec), this);
602     if (entries.empty())
603       fatal(toString(this) + ": empty SHT_GROUP");
604 
605     Elf_Word flag = entries[0];
606     if (flag && flag != GRP_COMDAT)
607       fatal(toString(this) + ": unsupported SHT_GROUP format");
608 
609     bool keepGroup =
610         (flag & GRP_COMDAT) == 0 || ignoreComdats ||
611         symtab.comdatGroups.try_emplace(CachedHashStringRef(signature), this)
612             .second;
613     if (keepGroup) {
614       if (config->relocatable)
615         this->sections[i] = createInputSection(
616             i, sec, check(obj.getSectionName(sec, shstrtab)));
617       continue;
618     }
619 
620     // Otherwise, discard group members.
621     for (uint32_t secIndex : entries.slice(1)) {
622       if (secIndex >= size)
623         fatal(toString(this) +
624               ": invalid section index in group: " + Twine(secIndex));
625       this->sections[secIndex] = &InputSection::discarded;
626     }
627   }
628 
629   // Read a symbol table.
630   initializeSymbols(obj);
631 }
632 
633 // Sections with SHT_GROUP and comdat bits define comdat section groups.
634 // They are identified and deduplicated by group name. This function
635 // returns a group name.
636 template <class ELFT>
637 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
638                                               const Elf_Shdr &sec) {
639   typename ELFT::SymRange symbols = this->getELFSyms<ELFT>();
640   if (sec.sh_info >= symbols.size())
641     fatal(toString(this) + ": invalid symbol index");
642   const typename ELFT::Sym &sym = symbols[sec.sh_info];
643   return CHECK(sym.getName(this->stringTable), this);
644 }
645 
646 template <class ELFT>
647 bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) {
648   // On a regular link we don't merge sections if -O0 (default is -O1). This
649   // sometimes makes the linker significantly faster, although the output will
650   // be bigger.
651   //
652   // Doing the same for -r would create a problem as it would combine sections
653   // with different sh_entsize. One option would be to just copy every SHF_MERGE
654   // section as is to the output. While this would produce a valid ELF file with
655   // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when
656   // they see two .debug_str. We could have separate logic for combining
657   // SHF_MERGE sections based both on their name and sh_entsize, but that seems
658   // to be more trouble than it is worth. Instead, we just use the regular (-O1)
659   // logic for -r.
660   if (config->optimize == 0 && !config->relocatable)
661     return false;
662 
663   // A mergeable section with size 0 is useless because they don't have
664   // any data to merge. A mergeable string section with size 0 can be
665   // argued as invalid because it doesn't end with a null character.
666   // We'll avoid a mess by handling them as if they were non-mergeable.
667   if (sec.sh_size == 0)
668     return false;
669 
670   // Check for sh_entsize. The ELF spec is not clear about the zero
671   // sh_entsize. It says that "the member [sh_entsize] contains 0 if
672   // the section does not hold a table of fixed-size entries". We know
673   // that Rust 1.13 produces a string mergeable section with a zero
674   // sh_entsize. Here we just accept it rather than being picky about it.
675   uint64_t entSize = sec.sh_entsize;
676   if (entSize == 0)
677     return false;
678   if (sec.sh_size % entSize)
679     fatal(toString(this) + ":(" + name + "): SHF_MERGE section size (" +
680           Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" +
681           Twine(entSize) + ")");
682 
683   if (sec.sh_flags & SHF_WRITE)
684     fatal(toString(this) + ":(" + name +
685           "): writable SHF_MERGE section is not supported");
686 
687   return true;
688 }
689 
690 // This is for --just-symbols.
691 //
692 // --just-symbols is a very minor feature that allows you to link your
693 // output against other existing program, so that if you load both your
694 // program and the other program into memory, your output can refer the
695 // other program's symbols.
696 //
697 // When the option is given, we link "just symbols". The section table is
698 // initialized with null pointers.
699 template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() {
700   sections.resize(numELFShdrs);
701 }
702 
703 template <class ELFT>
704 void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
705                                        const llvm::object::ELFFile<ELFT> &obj) {
706   ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
707   StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this);
708   uint64_t size = objSections.size();
709   SmallVector<ArrayRef<Elf_Word>, 0> selectedGroups;
710   for (size_t i = 0; i != size; ++i) {
711     if (this->sections[i] == &InputSection::discarded)
712       continue;
713     const Elf_Shdr &sec = objSections[i];
714 
715     // SHF_EXCLUDE'ed sections are discarded by the linker. However,
716     // if -r is given, we'll let the final link discard such sections.
717     // This is compatible with GNU.
718     if ((sec.sh_flags & SHF_EXCLUDE) && !config->relocatable) {
719       if (sec.sh_type == SHT_LLVM_CALL_GRAPH_PROFILE)
720         cgProfileSectionIndex = i;
721       if (sec.sh_type == SHT_LLVM_ADDRSIG) {
722         // We ignore the address-significance table if we know that the object
723         // file was created by objcopy or ld -r. This is because these tools
724         // will reorder the symbols in the symbol table, invalidating the data
725         // in the address-significance table, which refers to symbols by index.
726         if (sec.sh_link != 0)
727           this->addrsigSec = &sec;
728         else if (config->icf == ICFLevel::Safe)
729           warn(toString(this) +
730                ": --icf=safe conservatively ignores "
731                "SHT_LLVM_ADDRSIG [index " +
732                Twine(i) +
733                "] with sh_link=0 "
734                "(likely created using objcopy or ld -r)");
735       }
736       this->sections[i] = &InputSection::discarded;
737       continue;
738     }
739 
740     switch (sec.sh_type) {
741     case SHT_GROUP: {
742       if (!config->relocatable)
743         sections[i] = &InputSection::discarded;
744       StringRef signature =
745           cantFail(this->getELFSyms<ELFT>()[sec.sh_info].getName(stringTable));
746       ArrayRef<Elf_Word> entries =
747           cantFail(obj.template getSectionContentsAsArray<Elf_Word>(sec));
748       if ((entries[0] & GRP_COMDAT) == 0 || ignoreComdats ||
749           symtab.comdatGroups.find(CachedHashStringRef(signature))->second ==
750               this)
751         selectedGroups.push_back(entries);
752       break;
753     }
754     case SHT_SYMTAB_SHNDX:
755       shndxTable = CHECK(obj.getSHNDXTable(sec, objSections), this);
756       break;
757     case SHT_SYMTAB:
758     case SHT_STRTAB:
759     case SHT_REL:
760     case SHT_RELA:
761     case SHT_NULL:
762       break;
763     case SHT_PROGBITS: {
764       this->sections[i] = createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab)));
765       StringRef name = check(obj.getSectionName(sec, shstrtab));
766       ArrayRef<char> data =
767           CHECK(obj.template getSectionContentsAsArray<char>(sec), this);
768       parseGNUWarning(name, data, sec.sh_size);
769       }
770       break;
771     case SHT_LLVM_SYMPART:
772       ctx.hasSympart.store(true, std::memory_order_relaxed);
773       [[fallthrough]];
774     default:
775       this->sections[i] =
776           createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab)));
777     }
778   }
779 
780   // We have a second loop. It is used to:
781   // 1) handle SHF_LINK_ORDER sections.
782   // 2) create SHT_REL[A] sections. In some cases the section header index of a
783   //    relocation section may be smaller than that of the relocated section. In
784   //    such cases, the relocation section would attempt to reference a target
785   //    section that has not yet been created. For simplicity, delay creation of
786   //    relocation sections until now.
787   for (size_t i = 0; i != size; ++i) {
788     if (this->sections[i] == &InputSection::discarded)
789       continue;
790     const Elf_Shdr &sec = objSections[i];
791 
792     if (sec.sh_type == SHT_REL || sec.sh_type == SHT_RELA) {
793       // Find a relocation target section and associate this section with that.
794       // Target may have been discarded if it is in a different section group
795       // and the group is discarded, even though it's a violation of the spec.
796       // We handle that situation gracefully by discarding dangling relocation
797       // sections.
798       const uint32_t info = sec.sh_info;
799       InputSectionBase *s = getRelocTarget(i, sec, info);
800       if (!s)
801         continue;
802 
803       // ELF spec allows mergeable sections with relocations, but they are rare,
804       // and it is in practice hard to merge such sections by contents, because
805       // applying relocations at end of linking changes section contents. So, we
806       // simply handle such sections as non-mergeable ones. Degrading like this
807       // is acceptable because section merging is optional.
808       if (auto *ms = dyn_cast<MergeInputSection>(s)) {
809         s = makeThreadLocal<InputSection>(
810             ms->file, ms->flags, ms->type, ms->addralign,
811             ms->contentMaybeDecompress(), ms->name);
812         sections[info] = s;
813       }
814 
815       if (s->relSecIdx != 0)
816         error(
817             toString(s) +
818             ": multiple relocation sections to one section are not supported");
819       s->relSecIdx = i;
820 
821       // Relocation sections are usually removed from the output, so return
822       // `nullptr` for the normal case. However, if -r or --emit-relocs is
823       // specified, we need to copy them to the output. (Some post link analysis
824       // tools specify --emit-relocs to obtain the information.)
825       if (config->copyRelocs) {
826         auto *isec = makeThreadLocal<InputSection>(
827             *this, sec, check(obj.getSectionName(sec, shstrtab)));
828         // If the relocated section is discarded (due to /DISCARD/ or
829         // --gc-sections), the relocation section should be discarded as well.
830         s->dependentSections.push_back(isec);
831         sections[i] = isec;
832       }
833       continue;
834     }
835 
836     // A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have
837     // the flag.
838     if (!sec.sh_link || !(sec.sh_flags & SHF_LINK_ORDER))
839       continue;
840 
841     InputSectionBase *linkSec = nullptr;
842     if (sec.sh_link < size)
843       linkSec = this->sections[sec.sh_link];
844     if (!linkSec)
845       fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link));
846 
847     // A SHF_LINK_ORDER section is discarded if its linked-to section is
848     // discarded.
849     InputSection *isec = cast<InputSection>(this->sections[i]);
850     linkSec->dependentSections.push_back(isec);
851     if (!isa<InputSection>(linkSec))
852       error("a section " + isec->name +
853             " with SHF_LINK_ORDER should not refer a non-regular section: " +
854             toString(linkSec));
855   }
856 
857   for (ArrayRef<Elf_Word> entries : selectedGroups)
858     handleSectionGroup<ELFT>(this->sections, entries);
859 }
860 
861 // If a source file is compiled with x86 hardware-assisted call flow control
862 // enabled, the generated object file contains feature flags indicating that
863 // fact. This function reads the feature flags and returns it.
864 //
865 // Essentially we want to read a single 32-bit value in this function, but this
866 // function is rather complicated because the value is buried deep inside a
867 // .note.gnu.property section.
868 //
869 // The section consists of one or more NOTE records. Each NOTE record consists
870 // of zero or more type-length-value fields. We want to find a field of a
871 // certain type. It seems a bit too much to just store a 32-bit value, perhaps
872 // the ABI is unnecessarily complicated.
873 template <class ELFT> static uint32_t readAndFeatures(const InputSection &sec) {
874   using Elf_Nhdr = typename ELFT::Nhdr;
875   using Elf_Note = typename ELFT::Note;
876 
877   uint32_t featuresSet = 0;
878   ArrayRef<uint8_t> data = sec.content();
879   auto reportFatal = [&](const uint8_t *place, const char *msg) {
880     fatal(toString(sec.file) + ":(" + sec.name + "+0x" +
881           Twine::utohexstr(place - sec.content().data()) + "): " + msg);
882   };
883   while (!data.empty()) {
884     // Read one NOTE record.
885     auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data());
886     if (data.size() < sizeof(Elf_Nhdr) || data.size() < nhdr->getSize())
887       reportFatal(data.data(), "data is too short");
888 
889     Elf_Note note(*nhdr);
890     if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") {
891       data = data.slice(nhdr->getSize());
892       continue;
893     }
894 
895     uint32_t featureAndType = config->emachine == EM_AARCH64
896                                   ? GNU_PROPERTY_AARCH64_FEATURE_1_AND
897                                   : GNU_PROPERTY_X86_FEATURE_1_AND;
898 
899     // Read a body of a NOTE record, which consists of type-length-value fields.
900     ArrayRef<uint8_t> desc = note.getDesc();
901     while (!desc.empty()) {
902       const uint8_t *place = desc.data();
903       if (desc.size() < 8)
904         reportFatal(place, "program property is too short");
905       uint32_t type = read32<ELFT::TargetEndianness>(desc.data());
906       uint32_t size = read32<ELFT::TargetEndianness>(desc.data() + 4);
907       desc = desc.slice(8);
908       if (desc.size() < size)
909         reportFatal(place, "program property is too short");
910 
911       if (type == featureAndType) {
912         // We found a FEATURE_1_AND field. There may be more than one of these
913         // in a .note.gnu.property section, for a relocatable object we
914         // accumulate the bits set.
915         if (size < 4)
916           reportFatal(place, "FEATURE_1_AND entry is too short");
917         featuresSet |= read32<ELFT::TargetEndianness>(desc.data());
918       }
919 
920       // Padding is present in the note descriptor, if necessary.
921       desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size));
922     }
923 
924     // Go to next NOTE record to look for more FEATURE_1_AND descriptions.
925     data = data.slice(nhdr->getSize());
926   }
927 
928   return featuresSet;
929 }
930 
931 template <class ELFT>
932 InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx,
933                                                 const Elf_Shdr &sec,
934                                                 uint32_t info) {
935   if (info < this->sections.size()) {
936     InputSectionBase *target = this->sections[info];
937 
938     // Strictly speaking, a relocation section must be included in the
939     // group of the section it relocates. However, LLVM 3.3 and earlier
940     // would fail to do so, so we gracefully handle that case.
941     if (target == &InputSection::discarded)
942       return nullptr;
943 
944     if (target != nullptr)
945       return target;
946   }
947 
948   error(toString(this) + Twine(": relocation section (index ") + Twine(idx) +
949         ") has invalid sh_info (" + Twine(info) + ")");
950   return nullptr;
951 }
952 
953 // The function may be called concurrently for different input files. For
954 // allocation, prefer makeThreadLocal which does not require holding a lock.
955 template <class ELFT>
956 InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx,
957                                                     const Elf_Shdr &sec,
958                                                     StringRef name) {
959   if (name.startswith(".n")) {
960     // The GNU linker uses .note.GNU-stack section as a marker indicating
961     // that the code in the object file does not expect that the stack is
962     // executable (in terms of NX bit). If all input files have the marker,
963     // the GNU linker adds a PT_GNU_STACK segment to tells the loader to
964     // make the stack non-executable. Most object files have this section as
965     // of 2017.
966     //
967     // But making the stack non-executable is a norm today for security
968     // reasons. Failure to do so may result in a serious security issue.
969     // Therefore, we make LLD always add PT_GNU_STACK unless it is
970     // explicitly told to do otherwise (by -z execstack). Because the stack
971     // executable-ness is controlled solely by command line options,
972     // .note.GNU-stack sections are simply ignored.
973     if (name == ".note.GNU-stack")
974       return &InputSection::discarded;
975 
976     // Object files that use processor features such as Intel Control-Flow
977     // Enforcement (CET) or AArch64 Branch Target Identification BTI, use a
978     // .note.gnu.property section containing a bitfield of feature bits like the
979     // GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag.
980     //
981     // Since we merge bitmaps from multiple object files to create a new
982     // .note.gnu.property containing a single AND'ed bitmap, we discard an input
983     // file's .note.gnu.property section.
984     if (name == ".note.gnu.property") {
985       this->andFeatures = readAndFeatures<ELFT>(InputSection(*this, sec, name));
986       return &InputSection::discarded;
987     }
988 
989     // Split stacks is a feature to support a discontiguous stack,
990     // commonly used in the programming language Go. For the details,
991     // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled
992     // for split stack will include a .note.GNU-split-stack section.
993     if (name == ".note.GNU-split-stack") {
994       if (config->relocatable) {
995         error(
996             "cannot mix split-stack and non-split-stack in a relocatable link");
997         return &InputSection::discarded;
998       }
999       this->splitStack = true;
1000       return &InputSection::discarded;
1001     }
1002 
1003     // An object file compiled for split stack, but where some of the
1004     // functions were compiled with the no_split_stack_attribute will
1005     // include a .note.GNU-no-split-stack section.
1006     if (name == ".note.GNU-no-split-stack") {
1007       this->someNoSplitStack = true;
1008       return &InputSection::discarded;
1009     }
1010 
1011     // Strip existing .note.gnu.build-id sections so that the output won't have
1012     // more than one build-id. This is not usually a problem because input
1013     // object files normally don't have .build-id sections, but you can create
1014     // such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard
1015     // against it.
1016     if (name == ".note.gnu.build-id")
1017       return &InputSection::discarded;
1018   }
1019 
1020   // The linker merges EH (exception handling) frames and creates a
1021   // .eh_frame_hdr section for runtime. So we handle them with a special
1022   // class. For relocatable outputs, they are just passed through.
1023   if (name == ".eh_frame" && !config->relocatable)
1024     return makeThreadLocal<EhInputSection>(*this, sec, name);
1025 
1026   if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name))
1027     return makeThreadLocal<MergeInputSection>(*this, sec, name);
1028   return makeThreadLocal<InputSection>(*this, sec, name);
1029 }
1030 
1031 // Initialize this->Symbols. this->Symbols is a parallel array as
1032 // its corresponding ELF symbol table.
1033 template <class ELFT>
1034 void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
1035   ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1036   if (numSymbols == 0) {
1037     numSymbols = eSyms.size();
1038     symbols = std::make_unique<Symbol *[]>(numSymbols);
1039   }
1040 
1041   // Some entries have been filled by LazyObjFile.
1042   for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i)
1043     if (!symbols[i])
1044       symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this));
1045 
1046   // Perform symbol resolution on non-local symbols.
1047   SmallVector<unsigned, 32> undefineds;
1048   for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1049     const Elf_Sym &eSym = eSyms[i];
1050     uint32_t secIdx = eSym.st_shndx;
1051     if (secIdx == SHN_UNDEF) {
1052       undefineds.push_back(i);
1053       continue;
1054     }
1055 
1056     uint8_t binding = eSym.getBinding();
1057     uint8_t stOther = eSym.st_other;
1058     uint8_t type = eSym.getType();
1059     uint64_t value = eSym.st_value;
1060     uint64_t size = eSym.st_size;
1061 
1062     Symbol *sym = symbols[i];
1063     sym->isUsedInRegularObj = true;
1064     if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) {
1065       if (value == 0 || value >= UINT32_MAX)
1066         fatal(toString(this) + ": common symbol '" + sym->getName() +
1067               "' has invalid alignment: " + Twine(value));
1068       hasCommonSyms = true;
1069       sym->resolve(
1070           CommonSymbol{this, StringRef(), binding, stOther, type, value, size});
1071       continue;
1072     }
1073 
1074     // Handle global defined symbols. Defined::section will be set in postParse.
1075     sym->resolve(Defined{this, StringRef(), binding, stOther, type, value, size,
1076                          nullptr});
1077   }
1078 
1079   // Undefined symbols (excluding those defined relative to non-prevailing
1080   // sections) can trigger recursive extract. Process defined symbols first so
1081   // that the relative order between a defined symbol and an undefined symbol
1082   // does not change the symbol resolution behavior. In addition, a set of
1083   // interconnected symbols will all be resolved to the same file, instead of
1084   // being resolved to different files.
1085   for (unsigned i : undefineds) {
1086     const Elf_Sym &eSym = eSyms[i];
1087     Symbol *sym = symbols[i];
1088     sym->resolve(Undefined{this, StringRef(), eSym.getBinding(), eSym.st_other,
1089                            eSym.getType()});
1090     sym->isUsedInRegularObj = true;
1091     sym->referenced = true;
1092   }
1093 }
1094 
1095 template <class ELFT>
1096 void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) {
1097   if (!justSymbols)
1098     initializeSections(ignoreComdats, getObj());
1099 
1100   if (!firstGlobal)
1101     return;
1102   SymbolUnion *locals = makeThreadLocalN<SymbolUnion>(firstGlobal);
1103   memset(locals, 0, sizeof(SymbolUnion) * firstGlobal);
1104 
1105   ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1106   for (size_t i = 0, end = firstGlobal; i != end; ++i) {
1107     const Elf_Sym &eSym = eSyms[i];
1108     uint32_t secIdx = eSym.st_shndx;
1109     if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
1110       secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
1111     else if (secIdx >= SHN_LORESERVE)
1112       secIdx = 0;
1113     if (LLVM_UNLIKELY(secIdx >= sections.size()))
1114       fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
1115     if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL))
1116       error(toString(this) + ": non-local symbol (" + Twine(i) +
1117             ") found at index < .symtab's sh_info (" + Twine(end) + ")");
1118 
1119     InputSectionBase *sec = sections[secIdx];
1120     uint8_t type = eSym.getType();
1121     if (type == STT_FILE)
1122       sourceFile = CHECK(eSym.getName(stringTable), this);
1123     if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name))
1124       fatal(toString(this) + ": invalid symbol name offset");
1125     StringRef name(stringTable.data() + eSym.st_name);
1126 
1127     symbols[i] = reinterpret_cast<Symbol *>(locals + i);
1128     if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded)
1129       new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type,
1130                                  /*discardedSecIdx=*/secIdx);
1131     else
1132       new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type,
1133                                eSym.st_value, eSym.st_size, sec);
1134     symbols[i]->partition = 1;
1135     symbols[i]->isUsedInRegularObj = true;
1136   }
1137 }
1138 
1139 // Called after all ObjFile::parse is called for all ObjFiles. This checks
1140 // duplicate symbols and may do symbol property merge in the future.
1141 template <class ELFT> void ObjFile<ELFT>::postParse() {
1142   static std::mutex mu;
1143   ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1144   for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1145     const Elf_Sym &eSym = eSyms[i];
1146     Symbol &sym = *symbols[i];
1147     uint32_t secIdx = eSym.st_shndx;
1148     uint8_t binding = eSym.getBinding();
1149     if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK &&
1150                       binding != STB_GNU_UNIQUE))
1151       errorOrWarn(toString(this) + ": symbol (" + Twine(i) +
1152                   ") has invalid binding: " + Twine((int)binding));
1153 
1154     // st_value of STT_TLS represents the assigned offset, not the actual
1155     // address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can
1156     // only be referenced by special TLS relocations. It is usually an error if
1157     // a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa.
1158     if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS &&
1159         eSym.getType() != STT_NOTYPE)
1160       errorOrWarn("TLS attribute mismatch: " + toString(sym) + "\n>>> in " +
1161                   toString(sym.file) + "\n>>> in " + toString(this));
1162 
1163     // Handle non-COMMON defined symbol below. !sym.file allows a symbol
1164     // assignment to redefine a symbol without an error.
1165     if (!sym.file || !sym.isDefined() || secIdx == SHN_UNDEF ||
1166         secIdx == SHN_COMMON)
1167       continue;
1168 
1169     if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
1170       secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
1171     else if (secIdx >= SHN_LORESERVE)
1172       secIdx = 0;
1173     if (LLVM_UNLIKELY(secIdx >= sections.size()))
1174       fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
1175     InputSectionBase *sec = sections[secIdx];
1176     if (sec == &InputSection::discarded) {
1177       if (sym.traced) {
1178         printTraceSymbol(Undefined{this, sym.getName(), sym.binding,
1179                                    sym.stOther, sym.type, secIdx},
1180                          sym.getName());
1181       }
1182       if (sym.file == this) {
1183         std::lock_guard<std::mutex> lock(mu);
1184         ctx.nonPrevailingSyms.emplace_back(&sym, secIdx);
1185       }
1186       continue;
1187     }
1188 
1189     if (sym.file == this) {
1190       cast<Defined>(sym).section = sec;
1191       continue;
1192     }
1193 
1194     if (sym.binding == STB_WEAK || binding == STB_WEAK)
1195       continue;
1196     std::lock_guard<std::mutex> lock(mu);
1197     ctx.duplicates.push_back({&sym, this, sec, eSym.st_value});
1198   }
1199 }
1200 
1201 // The handling of tentative definitions (COMMON symbols) in archives is murky.
1202 // A tentative definition will be promoted to a global definition if there are
1203 // no non-tentative definitions to dominate it. When we hold a tentative
1204 // definition to a symbol and are inspecting archive members for inclusion
1205 // there are 2 ways we can proceed:
1206 //
1207 // 1) Consider the tentative definition a 'real' definition (ie promotion from
1208 //    tentative to real definition has already happened) and not inspect
1209 //    archive members for Global/Weak definitions to replace the tentative
1210 //    definition. An archive member would only be included if it satisfies some
1211 //    other undefined symbol. This is the behavior Gold uses.
1212 //
1213 // 2) Consider the tentative definition as still undefined (ie the promotion to
1214 //    a real definition happens only after all symbol resolution is done).
1215 //    The linker searches archive members for STB_GLOBAL definitions to
1216 //    replace the tentative definition with. This is the behavior used by
1217 //    GNU ld.
1218 //
1219 //  The second behavior is inherited from SysVR4, which based it on the FORTRAN
1220 //  COMMON BLOCK model. This behavior is needed for proper initialization in old
1221 //  (pre F90) FORTRAN code that is packaged into an archive.
1222 //
1223 //  The following functions search archive members for definitions to replace
1224 //  tentative definitions (implementing behavior 2).
1225 static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName,
1226                                   StringRef archiveName) {
1227   IRSymtabFile symtabFile = check(readIRSymtab(mb));
1228   for (const irsymtab::Reader::SymbolRef &sym :
1229        symtabFile.TheReader.symbols()) {
1230     if (sym.isGlobal() && sym.getName() == symName)
1231       return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon();
1232   }
1233   return false;
1234 }
1235 
1236 template <class ELFT>
1237 static bool isNonCommonDef(ELFKind ekind, MemoryBufferRef mb, StringRef symName,
1238                            StringRef archiveName) {
1239   ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(ekind, mb, archiveName);
1240   obj->init();
1241   StringRef stringtable = obj->getStringTable();
1242 
1243   for (auto sym : obj->template getGlobalELFSyms<ELFT>()) {
1244     Expected<StringRef> name = sym.getName(stringtable);
1245     if (name && name.get() == symName)
1246       return sym.isDefined() && sym.getBinding() == STB_GLOBAL &&
1247              !sym.isCommon();
1248   }
1249   return false;
1250 }
1251 
1252 static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
1253                            StringRef archiveName) {
1254   switch (getELFKind(mb, archiveName)) {
1255   case ELF32LEKind:
1256     return isNonCommonDef<ELF32LE>(ELF32LEKind, mb, symName, archiveName);
1257   case ELF32BEKind:
1258     return isNonCommonDef<ELF32BE>(ELF32BEKind, mb, symName, archiveName);
1259   case ELF64LEKind:
1260     return isNonCommonDef<ELF64LE>(ELF64LEKind, mb, symName, archiveName);
1261   case ELF64BEKind:
1262     return isNonCommonDef<ELF64BE>(ELF64BEKind, mb, symName, archiveName);
1263   default:
1264     llvm_unreachable("getELFKind");
1265   }
1266 }
1267 
1268 unsigned SharedFile::vernauxNum;
1269 
1270 SharedFile::SharedFile(MemoryBufferRef m, StringRef defaultSoName)
1271     : ELFFileBase(SharedKind, getELFKind(m, ""), m), soName(defaultSoName),
1272       isNeeded(!config->asNeeded) {}
1273 
1274 // Parse the version definitions in the object file if present, and return a
1275 // vector whose nth element contains a pointer to the Elf_Verdef for version
1276 // identifier n. Version identifiers that are not definitions map to nullptr.
1277 template <typename ELFT>
1278 static SmallVector<const void *, 0>
1279 parseVerdefs(const uint8_t *base, const typename ELFT::Shdr *sec) {
1280   if (!sec)
1281     return {};
1282 
1283   // Build the Verdefs array by following the chain of Elf_Verdef objects
1284   // from the start of the .gnu.version_d section.
1285   SmallVector<const void *, 0> verdefs;
1286   const uint8_t *verdef = base + sec->sh_offset;
1287   for (unsigned i = 0, e = sec->sh_info; i != e; ++i) {
1288     auto *curVerdef = reinterpret_cast<const typename ELFT::Verdef *>(verdef);
1289     verdef += curVerdef->vd_next;
1290     unsigned verdefIndex = curVerdef->vd_ndx;
1291     if (verdefIndex >= verdefs.size())
1292       verdefs.resize(verdefIndex + 1);
1293     verdefs[verdefIndex] = curVerdef;
1294   }
1295   return verdefs;
1296 }
1297 
1298 // Parse SHT_GNU_verneed to properly set the name of a versioned undefined
1299 // symbol. We detect fatal issues which would cause vulnerabilities, but do not
1300 // implement sophisticated error checking like in llvm-readobj because the value
1301 // of such diagnostics is low.
1302 template <typename ELFT>
1303 std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj,
1304                                                const typename ELFT::Shdr *sec) {
1305   if (!sec)
1306     return {};
1307   std::vector<uint32_t> verneeds;
1308   ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(*sec), this);
1309   const uint8_t *verneedBuf = data.begin();
1310   for (unsigned i = 0; i != sec->sh_info; ++i) {
1311     if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end())
1312       fatal(toString(this) + " has an invalid Verneed");
1313     auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf);
1314     const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux;
1315     for (unsigned j = 0; j != vn->vn_cnt; ++j) {
1316       if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end())
1317         fatal(toString(this) + " has an invalid Vernaux");
1318       auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf);
1319       if (aux->vna_name >= this->stringTable.size())
1320         fatal(toString(this) + " has a Vernaux with an invalid vna_name");
1321       uint16_t version = aux->vna_other & VERSYM_VERSION;
1322       if (version >= verneeds.size())
1323         verneeds.resize(version + 1);
1324       verneeds[version] = aux->vna_name;
1325       vernauxBuf += aux->vna_next;
1326     }
1327     verneedBuf += vn->vn_next;
1328   }
1329   return verneeds;
1330 }
1331 
1332 // We do not usually care about alignments of data in shared object
1333 // files because the loader takes care of it. However, if we promote a
1334 // DSO symbol to point to .bss due to copy relocation, we need to keep
1335 // the original alignment requirements. We infer it in this function.
1336 template <typename ELFT>
1337 static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections,
1338                              const typename ELFT::Sym &sym) {
1339   uint64_t ret = UINT64_MAX;
1340   if (sym.st_value)
1341     ret = 1ULL << countTrailingZeros((uint64_t)sym.st_value);
1342   if (0 < sym.st_shndx && sym.st_shndx < sections.size())
1343     ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign);
1344   return (ret > UINT32_MAX) ? 0 : ret;
1345 }
1346 
1347 // Fully parse the shared object file.
1348 //
1349 // This function parses symbol versions. If a DSO has version information,
1350 // the file has a ".gnu.version_d" section which contains symbol version
1351 // definitions. Each symbol is associated to one version through a table in
1352 // ".gnu.version" section. That table is a parallel array for the symbol
1353 // table, and each table entry contains an index in ".gnu.version_d".
1354 //
1355 // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for
1356 // VER_NDX_GLOBAL. There's no table entry for these special versions in
1357 // ".gnu.version_d".
1358 //
1359 // The file format for symbol versioning is perhaps a bit more complicated
1360 // than necessary, but you can easily understand the code if you wrap your
1361 // head around the data structure described above.
1362 template <class ELFT> void SharedFile::parse() {
1363   using Elf_Dyn = typename ELFT::Dyn;
1364   using Elf_Shdr = typename ELFT::Shdr;
1365   using Elf_Sym = typename ELFT::Sym;
1366   using Elf_Verdef = typename ELFT::Verdef;
1367   using Elf_Versym = typename ELFT::Versym;
1368 
1369   ArrayRef<Elf_Dyn> dynamicTags;
1370   const ELFFile<ELFT> obj = this->getObj<ELFT>();
1371   ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>();
1372 
1373   StringRef sectionStringTable =
1374       CHECK(obj.getSectionStringTable(sections), this);
1375 
1376   const Elf_Shdr *versymSec = nullptr;
1377   const Elf_Shdr *verdefSec = nullptr;
1378   const Elf_Shdr *verneedSec = nullptr;
1379 
1380   // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d.
1381   for (const Elf_Shdr &sec : sections) {
1382     switch (sec.sh_type) {
1383     default:
1384       continue;
1385     case SHT_DYNAMIC:
1386       dynamicTags =
1387           CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this);
1388       break;
1389     case SHT_GNU_versym:
1390       versymSec = &sec;
1391       break;
1392     case SHT_GNU_verdef:
1393       verdefSec = &sec;
1394       break;
1395     case SHT_GNU_verneed:
1396       verneedSec = &sec;
1397       break;
1398     case SHT_PROGBITS: {
1399       StringRef name = CHECK(obj.getSectionName(sec, sectionStringTable), this);
1400       ArrayRef<char> data =
1401           CHECK(obj.template getSectionContentsAsArray<char>(sec), this);
1402       parseGNUWarning(name, data, sec.sh_size);
1403       break;
1404     }
1405     }
1406   }
1407 
1408   if (versymSec && numELFSyms == 0) {
1409     error("SHT_GNU_versym should be associated with symbol table");
1410     return;
1411   }
1412 
1413   // Search for a DT_SONAME tag to initialize this->soName.
1414   for (const Elf_Dyn &dyn : dynamicTags) {
1415     if (dyn.d_tag == DT_NEEDED) {
1416       uint64_t val = dyn.getVal();
1417       if (val >= this->stringTable.size())
1418         fatal(toString(this) + ": invalid DT_NEEDED entry");
1419       dtNeeded.push_back(this->stringTable.data() + val);
1420     } else if (dyn.d_tag == DT_SONAME) {
1421       uint64_t val = dyn.getVal();
1422       if (val >= this->stringTable.size())
1423         fatal(toString(this) + ": invalid DT_SONAME entry");
1424       soName = this->stringTable.data() + val;
1425     }
1426   }
1427 
1428   // DSOs are uniquified not by filename but by soname.
1429   DenseMap<CachedHashStringRef, SharedFile *>::iterator it;
1430   bool wasInserted;
1431   std::tie(it, wasInserted) =
1432       symtab.soNames.try_emplace(CachedHashStringRef(soName), this);
1433 
1434   // If a DSO appears more than once on the command line with and without
1435   // --as-needed, --no-as-needed takes precedence over --as-needed because a
1436   // user can add an extra DSO with --no-as-needed to force it to be added to
1437   // the dependency list.
1438   it->second->isNeeded |= isNeeded;
1439   if (!wasInserted)
1440     return;
1441 
1442   ctx.sharedFiles.push_back(this);
1443 
1444   verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec);
1445   std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec);
1446 
1447   // Parse ".gnu.version" section which is a parallel array for the symbol
1448   // table. If a given file doesn't have a ".gnu.version" section, we use
1449   // VER_NDX_GLOBAL.
1450   size_t size = numELFSyms - firstGlobal;
1451   std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL);
1452   if (versymSec) {
1453     ArrayRef<Elf_Versym> versym =
1454         CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec),
1455               this)
1456             .slice(firstGlobal);
1457     for (size_t i = 0; i < size; ++i)
1458       versyms[i] = versym[i].vs_index;
1459   }
1460 
1461   // System libraries can have a lot of symbols with versions. Using a
1462   // fixed buffer for computing the versions name (foo@ver) can save a
1463   // lot of allocations.
1464   SmallString<0> versionedNameBuffer;
1465 
1466   // Add symbols to the symbol table.
1467   ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>();
1468   for (size_t i = 0, e = syms.size(); i != e; ++i) {
1469     const Elf_Sym &sym = syms[i];
1470 
1471     // ELF spec requires that all local symbols precede weak or global
1472     // symbols in each symbol table, and the index of first non-local symbol
1473     // is stored to sh_info. If a local symbol appears after some non-local
1474     // symbol, that's a violation of the spec.
1475     StringRef name = CHECK(sym.getName(stringTable), this);
1476     if (sym.getBinding() == STB_LOCAL) {
1477       errorOrWarn(toString(this) + ": invalid local symbol '" + name +
1478                   "' in global part of symbol table");
1479       continue;
1480     }
1481 
1482     const uint16_t ver = versyms[i], idx = ver & ~VERSYM_HIDDEN;
1483     if (sym.isUndefined()) {
1484       // For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but
1485       // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL.
1486       if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) {
1487         if (idx >= verneeds.size()) {
1488           error("corrupt input file: version need index " + Twine(idx) +
1489                 " for symbol " + name + " is out of bounds\n>>> defined in " +
1490                 toString(this));
1491           continue;
1492         }
1493         StringRef verName = stringTable.data() + verneeds[idx];
1494         versionedNameBuffer.clear();
1495         name = saver().save(
1496             (name + "@" + verName).toStringRef(versionedNameBuffer));
1497       }
1498       Symbol *s = symtab.addSymbol(
1499           Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()});
1500       s->exportDynamic = true;
1501       if (s->isUndefined() && sym.getBinding() != STB_WEAK &&
1502           config->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore)
1503         requiredSymbols.push_back(s);
1504       continue;
1505     }
1506 
1507     if (ver == VER_NDX_LOCAL ||
1508         (ver != VER_NDX_GLOBAL && idx >= verdefs.size())) {
1509       // In GNU ld < 2.31 (before 3be08ea4728b56d35e136af4e6fd3086ade17764), the
1510       // MIPS port puts _gp_disp symbol into DSO files and incorrectly assigns
1511       // VER_NDX_LOCAL. Workaround this bug.
1512       if (config->emachine == EM_MIPS && name == "_gp_disp")
1513         continue;
1514       error("corrupt input file: version definition index " + Twine(idx) +
1515             " for symbol " + name + " is out of bounds\n>>> defined in " +
1516             toString(this));
1517       continue;
1518     }
1519 
1520     uint32_t alignment = getAlignment<ELFT>(sections, sym);
1521     if (ver == idx) {
1522       auto *s = symtab.addSymbol(
1523           SharedSymbol{*this, name, sym.getBinding(), sym.st_other,
1524                        sym.getType(), sym.st_value, sym.st_size, alignment});
1525       if (s->file == this)
1526         s->verdefIndex = ver;
1527     }
1528 
1529     // Also add the symbol with the versioned name to handle undefined symbols
1530     // with explicit versions.
1531     if (ver == VER_NDX_GLOBAL)
1532       continue;
1533 
1534     StringRef verName =
1535         stringTable.data() +
1536         reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name;
1537     versionedNameBuffer.clear();
1538     name = (name + "@" + verName).toStringRef(versionedNameBuffer);
1539     auto *s = symtab.addSymbol(
1540         SharedSymbol{*this, saver().save(name), sym.getBinding(), sym.st_other,
1541                      sym.getType(), sym.st_value, sym.st_size, alignment});
1542     if (s->file == this)
1543       s->verdefIndex = idx;
1544   }
1545 }
1546 
1547 static ELFKind getBitcodeELFKind(const Triple &t) {
1548   if (t.isLittleEndian())
1549     return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind;
1550   return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind;
1551 }
1552 
1553 static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) {
1554   switch (t.getArch()) {
1555   case Triple::aarch64:
1556   case Triple::aarch64_be:
1557     return EM_AARCH64;
1558   case Triple::amdgcn:
1559   case Triple::r600:
1560     return EM_AMDGPU;
1561   case Triple::arm:
1562   case Triple::thumb:
1563     return EM_ARM;
1564   case Triple::avr:
1565     return EM_AVR;
1566   case Triple::hexagon:
1567     return EM_HEXAGON;
1568   case Triple::mips:
1569   case Triple::mipsel:
1570   case Triple::mips64:
1571   case Triple::mips64el:
1572     return EM_MIPS;
1573   case Triple::msp430:
1574     return EM_MSP430;
1575   case Triple::ppc:
1576   case Triple::ppcle:
1577     return EM_PPC;
1578   case Triple::ppc64:
1579   case Triple::ppc64le:
1580     return EM_PPC64;
1581   case Triple::riscv32:
1582   case Triple::riscv64:
1583     return EM_RISCV;
1584   case Triple::x86:
1585     return t.isOSIAMCU() ? EM_IAMCU : EM_386;
1586   case Triple::x86_64:
1587     return EM_X86_64;
1588   default:
1589     error(path + ": could not infer e_machine from bitcode target triple " +
1590           t.str());
1591     return EM_NONE;
1592   }
1593 }
1594 
1595 static uint8_t getOsAbi(const Triple &t) {
1596   switch (t.getOS()) {
1597   case Triple::AMDHSA:
1598     return ELF::ELFOSABI_AMDGPU_HSA;
1599   case Triple::AMDPAL:
1600     return ELF::ELFOSABI_AMDGPU_PAL;
1601   case Triple::Mesa3D:
1602     return ELF::ELFOSABI_AMDGPU_MESA3D;
1603   default:
1604     return ELF::ELFOSABI_NONE;
1605   }
1606 }
1607 
1608 BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
1609                          uint64_t offsetInArchive, bool lazy)
1610     : InputFile(BitcodeKind, mb) {
1611   this->archiveName = archiveName;
1612   this->lazy = lazy;
1613 
1614   std::string path = mb.getBufferIdentifier().str();
1615   if (config->thinLTOIndexOnly)
1616     path = replaceThinLTOSuffix(mb.getBufferIdentifier());
1617 
1618   // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1619   // name. If two archives define two members with the same name, this
1620   // causes a collision which result in only one of the objects being taken
1621   // into consideration at LTO time (which very likely causes undefined
1622   // symbols later in the link stage). So we append file offset to make
1623   // filename unique.
1624   StringRef name = archiveName.empty()
1625                        ? saver().save(path)
1626                        : saver().save(archiveName + "(" + path::filename(path) +
1627                                       " at " + utostr(offsetInArchive) + ")");
1628   MemoryBufferRef mbref(mb.getBuffer(), name);
1629 
1630   obj = CHECK(lto::InputFile::create(mbref), this);
1631 
1632   Triple t(obj->getTargetTriple());
1633   ekind = getBitcodeELFKind(t);
1634   emachine = getBitcodeMachineKind(mb.getBufferIdentifier(), t);
1635   osabi = getOsAbi(t);
1636 }
1637 
1638 static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
1639   switch (gvVisibility) {
1640   case GlobalValue::DefaultVisibility:
1641     return STV_DEFAULT;
1642   case GlobalValue::HiddenVisibility:
1643     return STV_HIDDEN;
1644   case GlobalValue::ProtectedVisibility:
1645     return STV_PROTECTED;
1646   }
1647   llvm_unreachable("unknown visibility");
1648 }
1649 
1650 static void
1651 createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
1652                     const lto::InputFile::Symbol &objSym, BitcodeFile &f) {
1653   uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL;
1654   uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE;
1655   uint8_t visibility = mapVisibility(objSym.getVisibility());
1656 
1657   if (!sym)
1658     sym = symtab.insert(saver().save(objSym.getName()));
1659 
1660   int c = objSym.getComdatIndex();
1661   if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) {
1662     Undefined newSym(&f, StringRef(), binding, visibility, type);
1663     sym->resolve(newSym);
1664     sym->referenced = true;
1665     return;
1666   }
1667 
1668   if (objSym.isCommon()) {
1669     sym->resolve(CommonSymbol{&f, StringRef(), binding, visibility, STT_OBJECT,
1670                               objSym.getCommonAlignment(),
1671                               objSym.getCommonSize()});
1672   } else {
1673     Defined newSym(&f, StringRef(), binding, visibility, type, 0, 0, nullptr);
1674     if (objSym.canBeOmittedFromSymbolTable())
1675       newSym.exportDynamic = false;
1676     sym->resolve(newSym);
1677   }
1678 }
1679 
1680 void BitcodeFile::parse() {
1681   for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) {
1682     keptComdats.push_back(
1683         s.second == Comdat::NoDeduplicate ||
1684         symtab.comdatGroups.try_emplace(CachedHashStringRef(s.first), this)
1685             .second);
1686   }
1687 
1688   if (numSymbols == 0) {
1689     numSymbols = obj->symbols().size();
1690     symbols = std::make_unique<Symbol *[]>(numSymbols);
1691   }
1692   // Process defined symbols first. See the comment in
1693   // ObjFile<ELFT>::initializeSymbols.
1694   for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
1695     if (!irSym.isUndefined())
1696       createBitcodeSymbol(symbols[i], keptComdats, irSym, *this);
1697   for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
1698     if (irSym.isUndefined())
1699       createBitcodeSymbol(symbols[i], keptComdats, irSym, *this);
1700 
1701   for (auto l : obj->getDependentLibraries())
1702     addDependentLibrary(l, this);
1703 }
1704 
1705 void BitcodeFile::parseLazy() {
1706   numSymbols = obj->symbols().size();
1707   symbols = std::make_unique<Symbol *[]>(numSymbols);
1708   for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
1709     if (!irSym.isUndefined()) {
1710       auto *sym = symtab.insert(saver().save(irSym.getName()));
1711       sym->resolve(LazyObject{*this});
1712       symbols[i] = sym;
1713     }
1714 }
1715 
1716 void BitcodeFile::postParse() {
1717   for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
1718     const Symbol &sym = *symbols[i];
1719     if (sym.file == this || !sym.isDefined() || irSym.isUndefined() ||
1720         irSym.isCommon() || irSym.isWeak())
1721       continue;
1722     int c = irSym.getComdatIndex();
1723     if (c != -1 && !keptComdats[c])
1724       continue;
1725     reportDuplicate(sym, this, nullptr, 0);
1726   }
1727 }
1728 
1729 void BinaryFile::parse() {
1730   ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer());
1731   auto *section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
1732                                      8, data, ".data");
1733   sections.push_back(section);
1734 
1735   // For each input file foo that is embedded to a result as a binary
1736   // blob, we define _binary_foo_{start,end,size} symbols, so that
1737   // user programs can access blobs by name. Non-alphanumeric
1738   // characters in a filename are replaced with underscore.
1739   std::string s = "_binary_" + mb.getBufferIdentifier().str();
1740   for (size_t i = 0; i < s.size(); ++i)
1741     if (!isAlnum(s[i]))
1742       s[i] = '_';
1743 
1744   llvm::StringSaver &saver = lld::saver();
1745 
1746   symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_start"),
1747                                       STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0, 0,
1748                                       section});
1749   symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_end"),
1750                                       STB_GLOBAL, STV_DEFAULT, STT_OBJECT,
1751                                       data.size(), 0, section});
1752   symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_size"),
1753                                       STB_GLOBAL, STV_DEFAULT, STT_OBJECT,
1754                                       data.size(), 0, nullptr});
1755 }
1756 
1757 ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName,
1758                                 bool lazy) {
1759   ELFFileBase *f;
1760   switch (getELFKind(mb, archiveName)) {
1761   case ELF32LEKind:
1762     f = make<ObjFile<ELF32LE>>(ELF32LEKind, mb, archiveName);
1763     break;
1764   case ELF32BEKind:
1765     f = make<ObjFile<ELF32BE>>(ELF32BEKind, mb, archiveName);
1766     break;
1767   case ELF64LEKind:
1768     f = make<ObjFile<ELF64LE>>(ELF64LEKind, mb, archiveName);
1769     break;
1770   case ELF64BEKind:
1771     f = make<ObjFile<ELF64BE>>(ELF64BEKind, mb, archiveName);
1772     break;
1773   default:
1774     llvm_unreachable("getELFKind");
1775   }
1776   f->init();
1777   f->lazy = lazy;
1778   return f;
1779 }
1780 
1781 template <class ELFT> void ObjFile<ELFT>::parseLazy() {
1782   const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>();
1783   numSymbols = eSyms.size();
1784   symbols = std::make_unique<Symbol *[]>(numSymbols);
1785 
1786   // resolve() may trigger this->extract() if an existing symbol is an undefined
1787   // symbol. If that happens, this function has served its purpose, and we can
1788   // exit from the loop early.
1789   for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1790     if (eSyms[i].st_shndx == SHN_UNDEF)
1791       continue;
1792     symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this));
1793     symbols[i]->resolve(LazyObject{*this});
1794     if (!lazy)
1795       break;
1796   }
1797 }
1798 
1799 bool InputFile::shouldExtractForCommon(StringRef name) {
1800   if (isa<BitcodeFile>(this))
1801     return isBitcodeNonCommonDef(mb, name, archiveName);
1802 
1803   return isNonCommonDef(mb, name, archiveName);
1804 }
1805 
1806 std::string elf::replaceThinLTOSuffix(StringRef path) {
1807   auto [suffix, repl] = config->thinLTOObjectSuffixReplace;
1808   if (path.consume_back(suffix))
1809     return (path + repl).str();
1810   return std::string(path);
1811 }
1812 
1813 template class elf::ObjFile<ELF32LE>;
1814 template class elf::ObjFile<ELF32BE>;
1815 template class elf::ObjFile<ELF64LE>;
1816 template class elf::ObjFile<ELF64BE>;
1817 
1818 template void SharedFile::parse<ELF32LE>();
1819 template void SharedFile::parse<ELF32BE>();
1820 template void SharedFile::parse<ELF64LE>();
1821 template void SharedFile::parse<ELF64BE>();
1822