xref: /llvm-project/lld/ELF/InputFiles.cpp (revision 84af3ee5124de3385b829c3a9980fd734f0d92e8)
1 //===- InputFiles.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "InputFiles.h"
10 #include "Config.h"
11 #include "DWARF.h"
12 #include "Driver.h"
13 #include "InputSection.h"
14 #include "LinkerScript.h"
15 #include "SymbolTable.h"
16 #include "Symbols.h"
17 #include "SyntheticSections.h"
18 #include "Target.h"
19 #include "lld/Common/CommonLinkerContext.h"
20 #include "lld/Common/DWARF.h"
21 #include "llvm/ADT/CachedHashString.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/LTO/LTO.h"
24 #include "llvm/Object/IRObjectFile.h"
25 #include "llvm/Support/ARMAttributeParser.h"
26 #include "llvm/Support/ARMBuildAttributes.h"
27 #include "llvm/Support/Endian.h"
28 #include "llvm/Support/FileSystem.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/RISCVAttributeParser.h"
31 #include "llvm/Support/TimeProfiler.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include <optional>
34 
35 using namespace llvm;
36 using namespace llvm::ELF;
37 using namespace llvm::object;
38 using namespace llvm::sys;
39 using namespace llvm::sys::fs;
40 using namespace llvm::support::endian;
41 using namespace lld;
42 using namespace lld::elf;
43 
44 // This function is explicitly instantiated in ARM.cpp, don't do it here to
45 // avoid warnings with MSVC.
46 extern template void ObjFile<ELF32LE>::importCmseSymbols();
47 extern template void ObjFile<ELF32BE>::importCmseSymbols();
48 extern template void ObjFile<ELF64LE>::importCmseSymbols();
49 extern template void ObjFile<ELF64BE>::importCmseSymbols();
50 
51 // Returns "<internal>", "foo.a(bar.o)" or "baz.o".
52 std::string elf::toStr(Ctx &ctx, const InputFile *f) {
53   static std::mutex mu;
54   if (!f)
55     return "<internal>";
56 
57   {
58     std::lock_guard<std::mutex> lock(mu);
59     if (f->toStringCache.empty()) {
60       if (f->archiveName.empty())
61         f->toStringCache = f->getName();
62       else
63         (f->archiveName + "(" + f->getName() + ")").toVector(f->toStringCache);
64     }
65   }
66   return std::string(f->toStringCache);
67 }
68 
69 const ELFSyncStream &elf::operator<<(const ELFSyncStream &s,
70                                      const InputFile *f) {
71   return s << toStr(s.ctx, f);
72 }
73 
74 static ELFKind getELFKind(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName) {
75   unsigned char size;
76   unsigned char endian;
77   std::tie(size, endian) = getElfArchType(mb.getBuffer());
78 
79   auto report = [&](StringRef msg) {
80     StringRef filename = mb.getBufferIdentifier();
81     if (archiveName.empty())
82       Fatal(ctx) << filename << ": " << msg;
83     else
84       Fatal(ctx) << archiveName << "(" << filename << "): " << msg;
85   };
86 
87   if (!mb.getBuffer().starts_with(ElfMagic))
88     report("not an ELF file");
89   if (endian != ELFDATA2LSB && endian != ELFDATA2MSB)
90     report("corrupted ELF file: invalid data encoding");
91   if (size != ELFCLASS32 && size != ELFCLASS64)
92     report("corrupted ELF file: invalid file class");
93 
94   size_t bufSize = mb.getBuffer().size();
95   if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) ||
96       (size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr)))
97     report("corrupted ELF file: file is too short");
98 
99   if (size == ELFCLASS32)
100     return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
101   return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
102 }
103 
104 // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD
105 // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how
106 // the input objects have been compiled.
107 static void updateARMVFPArgs(Ctx &ctx, const ARMAttributeParser &attributes,
108                              const InputFile *f) {
109   std::optional<unsigned> attr =
110       attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args);
111   if (!attr)
112     // If an ABI tag isn't present then it is implicitly given the value of 0
113     // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files,
114     // including some in glibc that don't use FP args (and should have value 3)
115     // don't have the attribute so we do not consider an implicit value of 0
116     // as a clash.
117     return;
118 
119   unsigned vfpArgs = *attr;
120   ARMVFPArgKind arg;
121   switch (vfpArgs) {
122   case ARMBuildAttrs::BaseAAPCS:
123     arg = ARMVFPArgKind::Base;
124     break;
125   case ARMBuildAttrs::HardFPAAPCS:
126     arg = ARMVFPArgKind::VFP;
127     break;
128   case ARMBuildAttrs::ToolChainFPPCS:
129     // Tool chain specific convention that conforms to neither AAPCS variant.
130     arg = ARMVFPArgKind::ToolChain;
131     break;
132   case ARMBuildAttrs::CompatibleFPAAPCS:
133     // Object compatible with all conventions.
134     return;
135   default:
136     ErrAlways(ctx) << f << ": unknown Tag_ABI_VFP_args value: " << vfpArgs;
137     return;
138   }
139   // Follow ld.bfd and error if there is a mix of calling conventions.
140   if (ctx.arg.armVFPArgs != arg && ctx.arg.armVFPArgs != ARMVFPArgKind::Default)
141     ErrAlways(ctx) << f << ": incompatible Tag_ABI_VFP_args";
142   else
143     ctx.arg.armVFPArgs = arg;
144 }
145 
146 // The ARM support in lld makes some use of instructions that are not available
147 // on all ARM architectures. Namely:
148 // - Use of BLX instruction for interworking between ARM and Thumb state.
149 // - Use of the extended Thumb branch encoding in relocation.
150 // - Use of the MOVT/MOVW instructions in Thumb Thunks.
151 // The ARM Attributes section contains information about the architecture chosen
152 // at compile time. We follow the convention that if at least one input object
153 // is compiled with an architecture that supports these features then lld is
154 // permitted to use them.
155 static void updateSupportedARMFeatures(Ctx &ctx,
156                                        const ARMAttributeParser &attributes) {
157   std::optional<unsigned> attr =
158       attributes.getAttributeValue(ARMBuildAttrs::CPU_arch);
159   if (!attr)
160     return;
161   auto arch = *attr;
162   switch (arch) {
163   case ARMBuildAttrs::Pre_v4:
164   case ARMBuildAttrs::v4:
165   case ARMBuildAttrs::v4T:
166     // Architectures prior to v5 do not support BLX instruction
167     break;
168   case ARMBuildAttrs::v5T:
169   case ARMBuildAttrs::v5TE:
170   case ARMBuildAttrs::v5TEJ:
171   case ARMBuildAttrs::v6:
172   case ARMBuildAttrs::v6KZ:
173   case ARMBuildAttrs::v6K:
174     ctx.arg.armHasBlx = true;
175     // Architectures used in pre-Cortex processors do not support
176     // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception
177     // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do.
178     break;
179   default:
180     // All other Architectures have BLX and extended branch encoding
181     ctx.arg.armHasBlx = true;
182     ctx.arg.armJ1J2BranchEncoding = true;
183     if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M)
184       // All Architectures used in Cortex processors with the exception
185       // of v6-M and v6S-M have the MOVT and MOVW instructions.
186       ctx.arg.armHasMovtMovw = true;
187     break;
188   }
189 
190   // Only ARMv8-M or later architectures have CMSE support.
191   std::optional<unsigned> profile =
192       attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile);
193   if (!profile)
194     return;
195   if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base &&
196       profile == ARMBuildAttrs::MicroControllerProfile)
197     ctx.arg.armCMSESupport = true;
198 
199   // The thumb PLT entries require Thumb2 which can be used on multiple archs.
200   // For now, let's limit it to ones where ARM isn't available and we know have
201   // Thumb2.
202   std::optional<unsigned> armISA =
203       attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use);
204   std::optional<unsigned> thumb =
205       attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use);
206   ctx.arg.armHasArmISA |= armISA && *armISA >= ARMBuildAttrs::Allowed;
207   ctx.arg.armHasThumb2ISA |= thumb && *thumb >= ARMBuildAttrs::AllowThumb32;
208 }
209 
210 InputFile::InputFile(Ctx &ctx, Kind k, MemoryBufferRef m)
211     : ctx(ctx), mb(m), groupId(ctx.driver.nextGroupId), fileKind(k) {
212   // All files within the same --{start,end}-group get the same group ID.
213   // Otherwise, a new file will get a new group ID.
214   if (!ctx.driver.isInGroup)
215     ++ctx.driver.nextGroupId;
216 }
217 
218 InputFile::~InputFile() {}
219 
220 std::optional<MemoryBufferRef> elf::readFile(Ctx &ctx, StringRef path) {
221   llvm::TimeTraceScope timeScope("Load input files", path);
222 
223   // The --chroot option changes our virtual root directory.
224   // This is useful when you are dealing with files created by --reproduce.
225   if (!ctx.arg.chroot.empty() && path.starts_with("/"))
226     path = ctx.saver.save(ctx.arg.chroot + path);
227 
228   bool remapped = false;
229   auto it = ctx.arg.remapInputs.find(path);
230   if (it != ctx.arg.remapInputs.end()) {
231     path = it->second;
232     remapped = true;
233   } else {
234     for (const auto &[pat, toFile] : ctx.arg.remapInputsWildcards) {
235       if (pat.match(path)) {
236         path = toFile;
237         remapped = true;
238         break;
239       }
240     }
241   }
242   if (remapped) {
243     // Use /dev/null to indicate an input file that should be ignored. Change
244     // the path to NUL on Windows.
245 #ifdef _WIN32
246     if (path == "/dev/null")
247       path = "NUL";
248 #endif
249   }
250 
251   Log(ctx) << path;
252   ctx.arg.dependencyFiles.insert(llvm::CachedHashString(path));
253 
254   auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false,
255                                        /*RequiresNullTerminator=*/false);
256   if (auto ec = mbOrErr.getError()) {
257     ErrAlways(ctx) << "cannot open " << path << ": " << ec.message();
258     return std::nullopt;
259   }
260 
261   MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef();
262   ctx.memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership
263 
264   if (ctx.tar)
265     ctx.tar->append(relativeToRoot(path), mbref.getBuffer());
266   return mbref;
267 }
268 
269 // All input object files must be for the same architecture
270 // (e.g. it does not make sense to link x86 object files with
271 // MIPS object files.) This function checks for that error.
272 static bool isCompatible(Ctx &ctx, InputFile *file) {
273   if (!file->isElf() && !isa<BitcodeFile>(file))
274     return true;
275 
276   if (file->ekind == ctx.arg.ekind && file->emachine == ctx.arg.emachine) {
277     if (ctx.arg.emachine != EM_MIPS)
278       return true;
279     if (isMipsN32Abi(ctx, *file) == ctx.arg.mipsN32Abi)
280       return true;
281   }
282 
283   StringRef target =
284       !ctx.arg.bfdname.empty() ? ctx.arg.bfdname : ctx.arg.emulation;
285   if (!target.empty()) {
286     Err(ctx) << file << " is incompatible with " << target;
287     return false;
288   }
289 
290   InputFile *existing = nullptr;
291   if (!ctx.objectFiles.empty())
292     existing = ctx.objectFiles[0];
293   else if (!ctx.sharedFiles.empty())
294     existing = ctx.sharedFiles[0];
295   else if (!ctx.bitcodeFiles.empty())
296     existing = ctx.bitcodeFiles[0];
297   auto diag = Err(ctx);
298   diag << file << " is incompatible";
299   if (existing)
300     diag << " with " << existing;
301   return false;
302 }
303 
304 template <class ELFT> static void doParseFile(Ctx &ctx, InputFile *file) {
305   if (!isCompatible(ctx, file))
306     return;
307 
308   // Lazy object file
309   if (file->lazy) {
310     if (auto *f = dyn_cast<BitcodeFile>(file)) {
311       ctx.lazyBitcodeFiles.push_back(f);
312       f->parseLazy();
313     } else {
314       cast<ObjFile<ELFT>>(file)->parseLazy();
315     }
316     return;
317   }
318 
319   if (ctx.arg.trace)
320     Msg(ctx) << file;
321 
322   if (file->kind() == InputFile::ObjKind) {
323     ctx.objectFiles.push_back(cast<ELFFileBase>(file));
324     cast<ObjFile<ELFT>>(file)->parse();
325   } else if (auto *f = dyn_cast<SharedFile>(file)) {
326     f->parse<ELFT>();
327   } else if (auto *f = dyn_cast<BitcodeFile>(file)) {
328     ctx.bitcodeFiles.push_back(f);
329     f->parse();
330   } else {
331     ctx.binaryFiles.push_back(cast<BinaryFile>(file));
332     cast<BinaryFile>(file)->parse();
333   }
334 }
335 
336 // Add symbols in File to the symbol table.
337 void elf::parseFile(Ctx &ctx, InputFile *file) {
338   invokeELFT(doParseFile, ctx, file);
339 }
340 
341 // This function is explicitly instantiated in ARM.cpp. Mark it extern here,
342 // to avoid warnings when building with MSVC.
343 extern template void ObjFile<ELF32LE>::importCmseSymbols();
344 extern template void ObjFile<ELF32BE>::importCmseSymbols();
345 extern template void ObjFile<ELF64LE>::importCmseSymbols();
346 extern template void ObjFile<ELF64BE>::importCmseSymbols();
347 
348 template <class ELFT>
349 static void
350 doParseFiles(Ctx &ctx,
351              const SmallVector<std::unique_ptr<InputFile>, 0> &files) {
352   // Add all files to the symbol table. This will add almost all symbols that we
353   // need to the symbol table. This process might add files to the link due to
354   // addDependentLibrary.
355   for (size_t i = 0; i < files.size(); ++i) {
356     llvm::TimeTraceScope timeScope("Parse input files", files[i]->getName());
357     doParseFile<ELFT>(ctx, files[i].get());
358   }
359   if (ctx.driver.armCmseImpLib)
360     cast<ObjFile<ELFT>>(*ctx.driver.armCmseImpLib).importCmseSymbols();
361 }
362 
363 void elf::parseFiles(Ctx &ctx,
364                      const SmallVector<std::unique_ptr<InputFile>, 0> &files) {
365   llvm::TimeTraceScope timeScope("Parse input files");
366   invokeELFT(doParseFiles, ctx, files);
367 }
368 
369 // Concatenates arguments to construct a string representing an error location.
370 StringRef InputFile::getNameForScript() const {
371   if (archiveName.empty())
372     return getName();
373 
374   if (nameForScriptCache.empty())
375     nameForScriptCache = (archiveName + Twine(':') + getName()).str();
376 
377   return nameForScriptCache;
378 }
379 
380 // An ELF object file may contain a `.deplibs` section. If it exists, the
381 // section contains a list of library specifiers such as `m` for libm. This
382 // function resolves a given name by finding the first matching library checking
383 // the various ways that a library can be specified to LLD. This ELF extension
384 // is a form of autolinking and is called `dependent libraries`. It is currently
385 // unique to LLVM and lld.
386 static void addDependentLibrary(Ctx &ctx, StringRef specifier,
387                                 const InputFile *f) {
388   if (!ctx.arg.dependentLibraries)
389     return;
390   if (std::optional<std::string> s = searchLibraryBaseName(ctx, specifier))
391     ctx.driver.addFile(ctx.saver.save(*s), /*withLOption=*/true);
392   else if (std::optional<std::string> s = findFromSearchPaths(ctx, specifier))
393     ctx.driver.addFile(ctx.saver.save(*s), /*withLOption=*/true);
394   else if (fs::exists(specifier))
395     ctx.driver.addFile(specifier, /*withLOption=*/false);
396   else
397     ErrAlways(ctx)
398         << f << ": unable to find library from dependent library specifier: "
399         << specifier;
400 }
401 
402 // Record the membership of a section group so that in the garbage collection
403 // pass, section group members are kept or discarded as a unit.
404 template <class ELFT>
405 static void handleSectionGroup(ArrayRef<InputSectionBase *> sections,
406                                ArrayRef<typename ELFT::Word> entries) {
407   bool hasAlloc = false;
408   for (uint32_t index : entries.slice(1)) {
409     if (index >= sections.size())
410       return;
411     if (InputSectionBase *s = sections[index])
412       if (s != &InputSection::discarded && s->flags & SHF_ALLOC)
413         hasAlloc = true;
414   }
415 
416   // If any member has the SHF_ALLOC flag, the whole group is subject to garbage
417   // collection. See the comment in markLive(). This rule retains .debug_types
418   // and .rela.debug_types.
419   if (!hasAlloc)
420     return;
421 
422   // Connect the members in a circular doubly-linked list via
423   // nextInSectionGroup.
424   InputSectionBase *head;
425   InputSectionBase *prev = nullptr;
426   for (uint32_t index : entries.slice(1)) {
427     InputSectionBase *s = sections[index];
428     if (!s || s == &InputSection::discarded)
429       continue;
430     if (prev)
431       prev->nextInSectionGroup = s;
432     else
433       head = s;
434     prev = s;
435   }
436   if (prev)
437     prev->nextInSectionGroup = head;
438 }
439 
440 template <class ELFT> void ObjFile<ELFT>::initDwarf() {
441   dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(
442       std::make_unique<LLDDwarfObj<ELFT>>(this), "",
443       [&](Error err) { Warn(ctx) << getName() + ": " << std::move(err); },
444       [&](Error warning) {
445         Warn(ctx) << getName() << ": " << std::move(warning);
446       }));
447 }
448 
449 DWARFCache *ELFFileBase::getDwarf() {
450   assert(fileKind == ObjKind);
451   llvm::call_once(initDwarf, [this]() {
452     switch (ekind) {
453     default:
454       llvm_unreachable("");
455     case ELF32LEKind:
456       return cast<ObjFile<ELF32LE>>(this)->initDwarf();
457     case ELF32BEKind:
458       return cast<ObjFile<ELF32BE>>(this)->initDwarf();
459     case ELF64LEKind:
460       return cast<ObjFile<ELF64LE>>(this)->initDwarf();
461     case ELF64BEKind:
462       return cast<ObjFile<ELF64BE>>(this)->initDwarf();
463     }
464   });
465   return dwarf.get();
466 }
467 
468 ELFFileBase::ELFFileBase(Ctx &ctx, Kind k, ELFKind ekind, MemoryBufferRef mb)
469     : InputFile(ctx, k, mb) {
470   this->ekind = ekind;
471 }
472 
473 ELFFileBase::~ELFFileBase() {}
474 
475 template <typename Elf_Shdr>
476 static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) {
477   for (const Elf_Shdr &sec : sections)
478     if (sec.sh_type == type)
479       return &sec;
480   return nullptr;
481 }
482 
483 void ELFFileBase::init() {
484   switch (ekind) {
485   case ELF32LEKind:
486     init<ELF32LE>(fileKind);
487     break;
488   case ELF32BEKind:
489     init<ELF32BE>(fileKind);
490     break;
491   case ELF64LEKind:
492     init<ELF64LE>(fileKind);
493     break;
494   case ELF64BEKind:
495     init<ELF64BE>(fileKind);
496     break;
497   default:
498     llvm_unreachable("getELFKind");
499   }
500 }
501 
502 template <class ELFT> void ELFFileBase::init(InputFile::Kind k) {
503   using Elf_Shdr = typename ELFT::Shdr;
504   using Elf_Sym = typename ELFT::Sym;
505 
506   // Initialize trivial attributes.
507   const ELFFile<ELFT> &obj = getObj<ELFT>();
508   emachine = obj.getHeader().e_machine;
509   osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI];
510   abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION];
511 
512   ArrayRef<Elf_Shdr> sections = CHECK2(obj.sections(), this);
513   elfShdrs = sections.data();
514   numELFShdrs = sections.size();
515 
516   // Find a symbol table.
517   const Elf_Shdr *symtabSec =
518       findSection(sections, k == SharedKind ? SHT_DYNSYM : SHT_SYMTAB);
519 
520   if (!symtabSec)
521     return;
522 
523   // Initialize members corresponding to a symbol table.
524   firstGlobal = symtabSec->sh_info;
525 
526   ArrayRef<Elf_Sym> eSyms = CHECK2(obj.symbols(symtabSec), this);
527   if (firstGlobal == 0 || firstGlobal > eSyms.size())
528     Fatal(ctx) << this << ": invalid sh_info in symbol table";
529 
530   elfSyms = reinterpret_cast<const void *>(eSyms.data());
531   numSymbols = eSyms.size();
532   stringTable = CHECK2(obj.getStringTableForSymtab(*symtabSec, sections), this);
533 }
534 
535 template <class ELFT>
536 uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const {
537   return CHECK2(
538       this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable),
539       this);
540 }
541 
542 template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) {
543   object::ELFFile<ELFT> obj = this->getObj();
544   // Read a section table. justSymbols is usually false.
545   if (this->justSymbols) {
546     initializeJustSymbols();
547     initializeSymbols(obj);
548     return;
549   }
550 
551   // Handle dependent libraries and selection of section groups as these are not
552   // done in parallel.
553   ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
554   StringRef shstrtab = CHECK2(obj.getSectionStringTable(objSections), this);
555   uint64_t size = objSections.size();
556   sections.resize(size);
557   for (size_t i = 0; i != size; ++i) {
558     const Elf_Shdr &sec = objSections[i];
559     if (LLVM_LIKELY(sec.sh_type == SHT_PROGBITS))
560       continue;
561     if (LLVM_LIKELY(sec.sh_type == SHT_GROUP)) {
562       StringRef signature = getShtGroupSignature(objSections, sec);
563       ArrayRef<Elf_Word> entries =
564           CHECK2(obj.template getSectionContentsAsArray<Elf_Word>(sec), this);
565       if (entries.empty())
566         Fatal(ctx) << this << ": empty SHT_GROUP";
567 
568       Elf_Word flag = entries[0];
569       if (flag && flag != GRP_COMDAT)
570         Fatal(ctx) << this << ": unsupported SHT_GROUP format";
571 
572       bool keepGroup = !flag || ignoreComdats ||
573                        ctx.symtab->comdatGroups
574                            .try_emplace(CachedHashStringRef(signature), this)
575                            .second;
576       if (keepGroup) {
577         if (!ctx.arg.resolveGroups)
578           sections[i] = createInputSection(
579               i, sec, check(obj.getSectionName(sec, shstrtab)));
580       } else {
581         // Otherwise, discard group members.
582         for (uint32_t secIndex : entries.slice(1)) {
583           if (secIndex >= size)
584             Fatal(ctx) << this
585                        << ": invalid section index in group: " << secIndex;
586           sections[secIndex] = &InputSection::discarded;
587         }
588       }
589       continue;
590     }
591 
592     if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !ctx.arg.relocatable) {
593       StringRef name = check(obj.getSectionName(sec, shstrtab));
594       ArrayRef<char> data = CHECK2(
595           this->getObj().template getSectionContentsAsArray<char>(sec), this);
596       if (!data.empty() && data.back() != '\0') {
597         Err(ctx)
598             << this
599             << ": corrupted dependent libraries section (unterminated string): "
600             << name;
601       } else {
602         for (const char *d = data.begin(), *e = data.end(); d < e;) {
603           StringRef s(d);
604           addDependentLibrary(ctx, s, this);
605           d += s.size() + 1;
606         }
607       }
608       sections[i] = &InputSection::discarded;
609       continue;
610     }
611 
612     switch (ctx.arg.emachine) {
613     case EM_ARM:
614       if (sec.sh_type == SHT_ARM_ATTRIBUTES) {
615         ARMAttributeParser attributes;
616         ArrayRef<uint8_t> contents =
617             check(this->getObj().getSectionContents(sec));
618         StringRef name = check(obj.getSectionName(sec, shstrtab));
619         sections[i] = &InputSection::discarded;
620         if (Error e = attributes.parse(contents, ekind == ELF32LEKind
621                                                      ? llvm::endianness::little
622                                                      : llvm::endianness::big)) {
623           InputSection isec(*this, sec, name);
624           Warn(ctx) << &isec << ": " << std::move(e);
625         } else {
626           updateSupportedARMFeatures(ctx, attributes);
627           updateARMVFPArgs(ctx, attributes, this);
628 
629           // FIXME: Retain the first attribute section we see. The eglibc ARM
630           // dynamic loaders require the presence of an attribute section for
631           // dlopen to work. In a full implementation we would merge all
632           // attribute sections.
633           if (ctx.in.attributes == nullptr) {
634             ctx.in.attributes =
635                 std::make_unique<InputSection>(*this, sec, name);
636             sections[i] = ctx.in.attributes.get();
637           }
638         }
639       }
640       break;
641     case EM_AARCH64:
642       // Producing a static binary with MTE globals is not currently supported,
643       // remove all SHT_AARCH64_MEMTAG_GLOBALS_STATIC sections as they're unused
644       // medatada, and we don't want them to end up in the output file for
645       // static executables.
646       if (sec.sh_type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC &&
647           !canHaveMemtagGlobals(ctx))
648         sections[i] = &InputSection::discarded;
649       break;
650     }
651   }
652 
653   // Read a symbol table.
654   initializeSymbols(obj);
655 }
656 
657 // Sections with SHT_GROUP and comdat bits define comdat section groups.
658 // They are identified and deduplicated by group name. This function
659 // returns a group name.
660 template <class ELFT>
661 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
662                                               const Elf_Shdr &sec) {
663   typename ELFT::SymRange symbols = this->getELFSyms<ELFT>();
664   if (sec.sh_info >= symbols.size())
665     Fatal(ctx) << this << ": invalid symbol index";
666   const typename ELFT::Sym &sym = symbols[sec.sh_info];
667   return CHECK2(sym.getName(this->stringTable), this);
668 }
669 
670 template <class ELFT>
671 bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) {
672   // On a regular link we don't merge sections if -O0 (default is -O1). This
673   // sometimes makes the linker significantly faster, although the output will
674   // be bigger.
675   //
676   // Doing the same for -r would create a problem as it would combine sections
677   // with different sh_entsize. One option would be to just copy every SHF_MERGE
678   // section as is to the output. While this would produce a valid ELF file with
679   // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when
680   // they see two .debug_str. We could have separate logic for combining
681   // SHF_MERGE sections based both on their name and sh_entsize, but that seems
682   // to be more trouble than it is worth. Instead, we just use the regular (-O1)
683   // logic for -r.
684   if (ctx.arg.optimize == 0 && !ctx.arg.relocatable)
685     return false;
686 
687   // A mergeable section with size 0 is useless because they don't have
688   // any data to merge. A mergeable string section with size 0 can be
689   // argued as invalid because it doesn't end with a null character.
690   // We'll avoid a mess by handling them as if they were non-mergeable.
691   if (sec.sh_size == 0)
692     return false;
693 
694   // Check for sh_entsize. The ELF spec is not clear about the zero
695   // sh_entsize. It says that "the member [sh_entsize] contains 0 if
696   // the section does not hold a table of fixed-size entries". We know
697   // that Rust 1.13 produces a string mergeable section with a zero
698   // sh_entsize. Here we just accept it rather than being picky about it.
699   uint64_t entSize = sec.sh_entsize;
700   if (entSize == 0)
701     return false;
702   if (sec.sh_size % entSize)
703     ErrAlways(ctx) << this << ":(" << name << "): SHF_MERGE section size ("
704                    << uint64_t(sec.sh_size)
705                    << ") must be a multiple of sh_entsize (" << entSize << ")";
706   if (sec.sh_flags & SHF_WRITE)
707     Err(ctx) << this << ":(" << name
708              << "): writable SHF_MERGE section is not supported";
709 
710   return true;
711 }
712 
713 // This is for --just-symbols.
714 //
715 // --just-symbols is a very minor feature that allows you to link your
716 // output against other existing program, so that if you load both your
717 // program and the other program into memory, your output can refer the
718 // other program's symbols.
719 //
720 // When the option is given, we link "just symbols". The section table is
721 // initialized with null pointers.
722 template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() {
723   sections.resize(numELFShdrs);
724 }
725 
726 static bool isKnownSpecificSectionType(uint32_t t, uint32_t flags) {
727   if (SHT_LOUSER <= t && t <= SHT_HIUSER && !(flags & SHF_ALLOC))
728     return true;
729   if (SHT_LOOS <= t && t <= SHT_HIOS && !(flags & SHF_OS_NONCONFORMING))
730     return true;
731   // Allow all processor-specific types. This is different from GNU ld.
732   return SHT_LOPROC <= t && t <= SHT_HIPROC;
733 }
734 
735 template <class ELFT>
736 void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
737                                        const llvm::object::ELFFile<ELFT> &obj) {
738   ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
739   StringRef shstrtab = CHECK2(obj.getSectionStringTable(objSections), this);
740   uint64_t size = objSections.size();
741   SmallVector<ArrayRef<Elf_Word>, 0> selectedGroups;
742   for (size_t i = 0; i != size; ++i) {
743     if (this->sections[i] == &InputSection::discarded)
744       continue;
745     const Elf_Shdr &sec = objSections[i];
746     const uint32_t type = sec.sh_type;
747 
748     // SHF_EXCLUDE'ed sections are discarded by the linker. However,
749     // if -r is given, we'll let the final link discard such sections.
750     // This is compatible with GNU.
751     if ((sec.sh_flags & SHF_EXCLUDE) && !ctx.arg.relocatable) {
752       if (type == SHT_LLVM_CALL_GRAPH_PROFILE)
753         cgProfileSectionIndex = i;
754       if (type == SHT_LLVM_ADDRSIG) {
755         // We ignore the address-significance table if we know that the object
756         // file was created by objcopy or ld -r. This is because these tools
757         // will reorder the symbols in the symbol table, invalidating the data
758         // in the address-significance table, which refers to symbols by index.
759         if (sec.sh_link != 0)
760           this->addrsigSec = &sec;
761         else if (ctx.arg.icf == ICFLevel::Safe)
762           Warn(ctx) << this
763                     << ": --icf=safe conservatively ignores "
764                        "SHT_LLVM_ADDRSIG [index "
765                     << i
766                     << "] with sh_link=0 "
767                        "(likely created using objcopy or ld -r)";
768       }
769       this->sections[i] = &InputSection::discarded;
770       continue;
771     }
772 
773     switch (type) {
774     case SHT_GROUP: {
775       if (!ctx.arg.relocatable)
776         sections[i] = &InputSection::discarded;
777       StringRef signature =
778           cantFail(this->getELFSyms<ELFT>()[sec.sh_info].getName(stringTable));
779       ArrayRef<Elf_Word> entries =
780           cantFail(obj.template getSectionContentsAsArray<Elf_Word>(sec));
781       if ((entries[0] & GRP_COMDAT) == 0 || ignoreComdats ||
782           ctx.symtab->comdatGroups.find(CachedHashStringRef(signature))
783                   ->second == this)
784         selectedGroups.push_back(entries);
785       break;
786     }
787     case SHT_SYMTAB_SHNDX:
788       shndxTable = CHECK2(obj.getSHNDXTable(sec, objSections), this);
789       break;
790     case SHT_SYMTAB:
791     case SHT_STRTAB:
792     case SHT_REL:
793     case SHT_RELA:
794     case SHT_CREL:
795     case SHT_NULL:
796       break;
797     case SHT_PROGBITS:
798     case SHT_NOTE:
799     case SHT_NOBITS:
800     case SHT_INIT_ARRAY:
801     case SHT_FINI_ARRAY:
802     case SHT_PREINIT_ARRAY:
803       this->sections[i] =
804           createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab)));
805       break;
806     case SHT_LLVM_LTO:
807       // Discard .llvm.lto in a relocatable link that does not use the bitcode.
808       // The concatenated output does not properly reflect the linking
809       // semantics. In addition, since we do not use the bitcode wrapper format,
810       // the concatenated raw bitcode would be invalid.
811       if (ctx.arg.relocatable && !ctx.arg.fatLTOObjects) {
812         sections[i] = &InputSection::discarded;
813         break;
814       }
815       [[fallthrough]];
816     default:
817       this->sections[i] =
818           createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab)));
819       if (type == SHT_LLVM_SYMPART)
820         ctx.hasSympart.store(true, std::memory_order_relaxed);
821       else if (ctx.arg.rejectMismatch &&
822                !isKnownSpecificSectionType(type, sec.sh_flags))
823         Err(ctx) << this->sections[i] << ": unknown section type 0x"
824                  << Twine::utohexstr(type);
825       break;
826     }
827   }
828 
829   // We have a second loop. It is used to:
830   // 1) handle SHF_LINK_ORDER sections.
831   // 2) create relocation sections. In some cases the section header index of a
832   //    relocation section may be smaller than that of the relocated section. In
833   //    such cases, the relocation section would attempt to reference a target
834   //    section that has not yet been created. For simplicity, delay creation of
835   //    relocation sections until now.
836   for (size_t i = 0; i != size; ++i) {
837     if (this->sections[i] == &InputSection::discarded)
838       continue;
839     const Elf_Shdr &sec = objSections[i];
840 
841     if (isStaticRelSecType(sec.sh_type)) {
842       // Find a relocation target section and associate this section with that.
843       // Target may have been discarded if it is in a different section group
844       // and the group is discarded, even though it's a violation of the spec.
845       // We handle that situation gracefully by discarding dangling relocation
846       // sections.
847       const uint32_t info = sec.sh_info;
848       InputSectionBase *s = getRelocTarget(i, info);
849       if (!s)
850         continue;
851 
852       // ELF spec allows mergeable sections with relocations, but they are rare,
853       // and it is in practice hard to merge such sections by contents, because
854       // applying relocations at end of linking changes section contents. So, we
855       // simply handle such sections as non-mergeable ones. Degrading like this
856       // is acceptable because section merging is optional.
857       if (auto *ms = dyn_cast<MergeInputSection>(s)) {
858         s = makeThreadLocal<InputSection>(ms->file, ms->name, ms->type,
859                                           ms->flags, ms->addralign, ms->entsize,
860                                           ms->contentMaybeDecompress());
861         sections[info] = s;
862       }
863 
864       if (s->relSecIdx != 0)
865         ErrAlways(ctx) << s
866                        << ": multiple relocation sections to one section are "
867                           "not supported";
868       s->relSecIdx = i;
869 
870       // Relocation sections are usually removed from the output, so return
871       // `nullptr` for the normal case. However, if -r or --emit-relocs is
872       // specified, we need to copy them to the output. (Some post link analysis
873       // tools specify --emit-relocs to obtain the information.)
874       if (ctx.arg.copyRelocs) {
875         auto *isec = makeThreadLocal<InputSection>(
876             *this, sec, check(obj.getSectionName(sec, shstrtab)));
877         // If the relocated section is discarded (due to /DISCARD/ or
878         // --gc-sections), the relocation section should be discarded as well.
879         s->dependentSections.push_back(isec);
880         sections[i] = isec;
881       }
882       continue;
883     }
884 
885     // A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have
886     // the flag.
887     if (!sec.sh_link || !(sec.sh_flags & SHF_LINK_ORDER))
888       continue;
889 
890     InputSectionBase *linkSec = nullptr;
891     if (sec.sh_link < size)
892       linkSec = this->sections[sec.sh_link];
893     if (!linkSec) {
894       ErrAlways(ctx) << this
895                      << ": invalid sh_link index: " << uint32_t(sec.sh_link);
896       continue;
897     }
898 
899     // A SHF_LINK_ORDER section is discarded if its linked-to section is
900     // discarded.
901     InputSection *isec = cast<InputSection>(this->sections[i]);
902     linkSec->dependentSections.push_back(isec);
903     if (!isa<InputSection>(linkSec))
904       ErrAlways(ctx)
905           << "a section " << isec->name
906           << " with SHF_LINK_ORDER should not refer a non-regular section: "
907           << linkSec;
908   }
909 
910   for (ArrayRef<Elf_Word> entries : selectedGroups)
911     handleSectionGroup<ELFT>(this->sections, entries);
912 }
913 
914 // Read the following info from the .note.gnu.property section and write it to
915 // the corresponding fields in `ObjFile`:
916 // - Feature flags (32 bits) representing x86 or AArch64 features for
917 //   hardware-assisted call flow control;
918 // - AArch64 PAuth ABI core info (16 bytes).
919 template <class ELFT>
920 static void readGnuProperty(Ctx &ctx, const InputSection &sec,
921                             ObjFile<ELFT> &f) {
922   using Elf_Nhdr = typename ELFT::Nhdr;
923   using Elf_Note = typename ELFT::Note;
924 
925   ArrayRef<uint8_t> data = sec.content();
926   auto err = [&](const uint8_t *place) -> ELFSyncStream {
927     auto diag = Err(ctx);
928     diag << sec.file << ":(" << sec.name << "+0x"
929          << Twine::utohexstr(place - sec.content().data()) << "): ";
930     return diag;
931   };
932   while (!data.empty()) {
933     // Read one NOTE record.
934     auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data());
935     if (data.size() < sizeof(Elf_Nhdr) ||
936         data.size() < nhdr->getSize(sec.addralign))
937       return void(err(data.data()) << "data is too short");
938 
939     Elf_Note note(*nhdr);
940     if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") {
941       data = data.slice(nhdr->getSize(sec.addralign));
942       continue;
943     }
944 
945     uint32_t featureAndType = ctx.arg.emachine == EM_AARCH64
946                                   ? GNU_PROPERTY_AARCH64_FEATURE_1_AND
947                                   : GNU_PROPERTY_X86_FEATURE_1_AND;
948 
949     // Read a body of a NOTE record, which consists of type-length-value fields.
950     ArrayRef<uint8_t> desc = note.getDesc(sec.addralign);
951     while (!desc.empty()) {
952       const uint8_t *place = desc.data();
953       if (desc.size() < 8)
954         return void(err(place) << "program property is too short");
955       uint32_t type = read32<ELFT::Endianness>(desc.data());
956       uint32_t size = read32<ELFT::Endianness>(desc.data() + 4);
957       desc = desc.slice(8);
958       if (desc.size() < size)
959         return void(err(place) << "program property is too short");
960 
961       if (type == featureAndType) {
962         // We found a FEATURE_1_AND field. There may be more than one of these
963         // in a .note.gnu.property section, for a relocatable object we
964         // accumulate the bits set.
965         if (size < 4)
966           return void(err(place) << "FEATURE_1_AND entry is too short");
967         f.andFeatures |= read32<ELFT::Endianness>(desc.data());
968       } else if (ctx.arg.emachine == EM_AARCH64 &&
969                  type == GNU_PROPERTY_AARCH64_FEATURE_PAUTH) {
970         if (!f.aarch64PauthAbiCoreInfo.empty()) {
971           return void(
972               err(data.data())
973               << "multiple GNU_PROPERTY_AARCH64_FEATURE_PAUTH entries are "
974                  "not supported");
975         } else if (size != 16) {
976           return void(err(data.data())
977                       << "GNU_PROPERTY_AARCH64_FEATURE_PAUTH entry "
978                          "is invalid: expected 16 bytes, but got "
979                       << size);
980         }
981         f.aarch64PauthAbiCoreInfo = desc;
982       }
983 
984       // Padding is present in the note descriptor, if necessary.
985       desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size));
986     }
987 
988     // Go to next NOTE record to look for more FEATURE_1_AND descriptions.
989     data = data.slice(nhdr->getSize(sec.addralign));
990   }
991 }
992 
993 template <class ELFT>
994 InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx, uint32_t info) {
995   if (info < this->sections.size()) {
996     InputSectionBase *target = this->sections[info];
997 
998     // Strictly speaking, a relocation section must be included in the
999     // group of the section it relocates. However, LLVM 3.3 and earlier
1000     // would fail to do so, so we gracefully handle that case.
1001     if (target == &InputSection::discarded)
1002       return nullptr;
1003 
1004     if (target != nullptr)
1005       return target;
1006   }
1007 
1008   Err(ctx) << this << ": relocation section (index " << idx
1009            << ") has invalid sh_info (" << info << ')';
1010   return nullptr;
1011 }
1012 
1013 // The function may be called concurrently for different input files. For
1014 // allocation, prefer makeThreadLocal which does not require holding a lock.
1015 template <class ELFT>
1016 InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx,
1017                                                     const Elf_Shdr &sec,
1018                                                     StringRef name) {
1019   if (name.starts_with(".n")) {
1020     // The GNU linker uses .note.GNU-stack section as a marker indicating
1021     // that the code in the object file does not expect that the stack is
1022     // executable (in terms of NX bit). If all input files have the marker,
1023     // the GNU linker adds a PT_GNU_STACK segment to tells the loader to
1024     // make the stack non-executable. Most object files have this section as
1025     // of 2017.
1026     //
1027     // But making the stack non-executable is a norm today for security
1028     // reasons. Failure to do so may result in a serious security issue.
1029     // Therefore, we make LLD always add PT_GNU_STACK unless it is
1030     // explicitly told to do otherwise (by -z execstack). Because the stack
1031     // executable-ness is controlled solely by command line options,
1032     // .note.GNU-stack sections are, with one exception, ignored. Report
1033     // an error if we encounter an executable .note.GNU-stack to force the
1034     // user to explicitly request an executable stack.
1035     if (name == ".note.GNU-stack") {
1036       if ((sec.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable &&
1037           ctx.arg.zGnustack != GnuStackKind::Exec) {
1038         Err(ctx) << this
1039                  << ": requires an executable stack, but -z execstack is not "
1040                     "specified";
1041       }
1042       return &InputSection::discarded;
1043     }
1044 
1045     // Object files that use processor features such as Intel Control-Flow
1046     // Enforcement (CET) or AArch64 Branch Target Identification BTI, use a
1047     // .note.gnu.property section containing a bitfield of feature bits like the
1048     // GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag.
1049     //
1050     // Since we merge bitmaps from multiple object files to create a new
1051     // .note.gnu.property containing a single AND'ed bitmap, we discard an input
1052     // file's .note.gnu.property section.
1053     if (name == ".note.gnu.property") {
1054       readGnuProperty<ELFT>(ctx, InputSection(*this, sec, name), *this);
1055       return &InputSection::discarded;
1056     }
1057 
1058     // Split stacks is a feature to support a discontiguous stack,
1059     // commonly used in the programming language Go. For the details,
1060     // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled
1061     // for split stack will include a .note.GNU-split-stack section.
1062     if (name == ".note.GNU-split-stack") {
1063       if (ctx.arg.relocatable) {
1064         ErrAlways(ctx) << "cannot mix split-stack and non-split-stack in a "
1065                           "relocatable link";
1066         return &InputSection::discarded;
1067       }
1068       this->splitStack = true;
1069       return &InputSection::discarded;
1070     }
1071 
1072     // An object file compiled for split stack, but where some of the
1073     // functions were compiled with the no_split_stack_attribute will
1074     // include a .note.GNU-no-split-stack section.
1075     if (name == ".note.GNU-no-split-stack") {
1076       this->someNoSplitStack = true;
1077       return &InputSection::discarded;
1078     }
1079 
1080     // Strip existing .note.gnu.build-id sections so that the output won't have
1081     // more than one build-id. This is not usually a problem because input
1082     // object files normally don't have .build-id sections, but you can create
1083     // such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard
1084     // against it.
1085     if (name == ".note.gnu.build-id")
1086       return &InputSection::discarded;
1087   }
1088 
1089   // The linker merges EH (exception handling) frames and creates a
1090   // .eh_frame_hdr section for runtime. So we handle them with a special
1091   // class. For relocatable outputs, they are just passed through.
1092   if (name == ".eh_frame" && !ctx.arg.relocatable)
1093     return makeThreadLocal<EhInputSection>(*this, sec, name);
1094 
1095   if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name))
1096     return makeThreadLocal<MergeInputSection>(*this, sec, name);
1097   return makeThreadLocal<InputSection>(*this, sec, name);
1098 }
1099 
1100 // Initialize symbols. symbols is a parallel array to the corresponding ELF
1101 // symbol table.
1102 template <class ELFT>
1103 void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
1104   ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1105   if (!symbols)
1106     symbols = std::make_unique<Symbol *[]>(numSymbols);
1107 
1108   // Some entries have been filled by LazyObjFile.
1109   auto *symtab = ctx.symtab.get();
1110   for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i)
1111     if (!symbols[i])
1112       symbols[i] = symtab->insert(CHECK2(eSyms[i].getName(stringTable), this));
1113 
1114   // Perform symbol resolution on non-local symbols.
1115   SmallVector<unsigned, 32> undefineds;
1116   for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1117     const Elf_Sym &eSym = eSyms[i];
1118     uint32_t secIdx = eSym.st_shndx;
1119     if (secIdx == SHN_UNDEF) {
1120       undefineds.push_back(i);
1121       continue;
1122     }
1123 
1124     uint8_t binding = eSym.getBinding();
1125     uint8_t stOther = eSym.st_other;
1126     uint8_t type = eSym.getType();
1127     uint64_t value = eSym.st_value;
1128     uint64_t size = eSym.st_size;
1129 
1130     Symbol *sym = symbols[i];
1131     sym->isUsedInRegularObj = true;
1132     if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) {
1133       if (value == 0 || value >= UINT32_MAX)
1134         Err(ctx) << this << ": common symbol '" << sym->getName()
1135                  << "' has invalid alignment: " << value;
1136       hasCommonSyms = true;
1137       sym->resolve(ctx, CommonSymbol{ctx, this, StringRef(), binding, stOther,
1138                                      type, value, size});
1139       continue;
1140     }
1141 
1142     // Handle global defined symbols. Defined::section will be set in postParse.
1143     sym->resolve(ctx, Defined{ctx, this, StringRef(), binding, stOther, type,
1144                               value, size, nullptr});
1145   }
1146 
1147   // Undefined symbols (excluding those defined relative to non-prevailing
1148   // sections) can trigger recursive extract. Process defined symbols first so
1149   // that the relative order between a defined symbol and an undefined symbol
1150   // does not change the symbol resolution behavior. In addition, a set of
1151   // interconnected symbols will all be resolved to the same file, instead of
1152   // being resolved to different files.
1153   for (unsigned i : undefineds) {
1154     const Elf_Sym &eSym = eSyms[i];
1155     Symbol *sym = symbols[i];
1156     sym->resolve(ctx, Undefined{this, StringRef(), eSym.getBinding(),
1157                                 eSym.st_other, eSym.getType()});
1158     sym->isUsedInRegularObj = true;
1159     sym->referenced = true;
1160   }
1161 }
1162 
1163 template <class ELFT>
1164 void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) {
1165   if (!justSymbols)
1166     initializeSections(ignoreComdats, getObj());
1167 
1168   if (!firstGlobal)
1169     return;
1170   SymbolUnion *locals = makeThreadLocalN<SymbolUnion>(firstGlobal);
1171   memset(locals, 0, sizeof(SymbolUnion) * firstGlobal);
1172 
1173   ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1174   for (size_t i = 0, end = firstGlobal; i != end; ++i) {
1175     const Elf_Sym &eSym = eSyms[i];
1176     uint32_t secIdx = eSym.st_shndx;
1177     if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
1178       secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
1179     else if (secIdx >= SHN_LORESERVE)
1180       secIdx = 0;
1181     if (LLVM_UNLIKELY(secIdx >= sections.size())) {
1182       Err(ctx) << this << ": invalid section index: " << secIdx;
1183       secIdx = 0;
1184     }
1185     if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL))
1186       ErrAlways(ctx) << this << ": non-local symbol (" << i
1187                      << ") found at index < .symtab's sh_info (" << end << ")";
1188 
1189     InputSectionBase *sec = sections[secIdx];
1190     uint8_t type = eSym.getType();
1191     if (type == STT_FILE)
1192       sourceFile = CHECK2(eSym.getName(stringTable), this);
1193     unsigned stName = eSym.st_name;
1194     if (LLVM_UNLIKELY(stringTable.size() <= stName)) {
1195       Err(ctx) << this << ": invalid symbol name offset";
1196       stName = 0;
1197     }
1198     StringRef name(stringTable.data() + stName);
1199 
1200     symbols[i] = reinterpret_cast<Symbol *>(locals + i);
1201     if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded)
1202       new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type,
1203                                  /*discardedSecIdx=*/secIdx);
1204     else
1205       new (symbols[i]) Defined(ctx, this, name, STB_LOCAL, eSym.st_other, type,
1206                                eSym.st_value, eSym.st_size, sec);
1207     symbols[i]->partition = 1;
1208     symbols[i]->isUsedInRegularObj = true;
1209   }
1210 }
1211 
1212 // Called after all ObjFile::parse is called for all ObjFiles. This checks
1213 // duplicate symbols and may do symbol property merge in the future.
1214 template <class ELFT> void ObjFile<ELFT>::postParse() {
1215   static std::mutex mu;
1216   ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1217   for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1218     const Elf_Sym &eSym = eSyms[i];
1219     Symbol &sym = *symbols[i];
1220     uint32_t secIdx = eSym.st_shndx;
1221     uint8_t binding = eSym.getBinding();
1222     if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK &&
1223                       binding != STB_GNU_UNIQUE))
1224       Err(ctx) << this << ": symbol (" << i
1225                << ") has invalid binding: " << (int)binding;
1226 
1227     // st_value of STT_TLS represents the assigned offset, not the actual
1228     // address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can
1229     // only be referenced by special TLS relocations. It is usually an error if
1230     // a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa.
1231     if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS &&
1232         eSym.getType() != STT_NOTYPE)
1233       Err(ctx) << "TLS attribute mismatch: " << &sym << "\n>>> in " << sym.file
1234                << "\n>>> in " << this;
1235 
1236     // Handle non-COMMON defined symbol below. !sym.file allows a symbol
1237     // assignment to redefine a symbol without an error.
1238     if (!sym.isDefined() || secIdx == SHN_UNDEF)
1239       continue;
1240     if (LLVM_UNLIKELY(secIdx >= SHN_LORESERVE)) {
1241       if (secIdx == SHN_COMMON)
1242         continue;
1243       if (secIdx == SHN_XINDEX)
1244         secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
1245       else
1246         secIdx = 0;
1247     }
1248 
1249     if (LLVM_UNLIKELY(secIdx >= sections.size())) {
1250       Err(ctx) << this << ": invalid section index: " << secIdx;
1251       continue;
1252     }
1253     InputSectionBase *sec = sections[secIdx];
1254     if (sec == &InputSection::discarded) {
1255       if (sym.traced) {
1256         printTraceSymbol(Undefined{this, sym.getName(), sym.binding,
1257                                    sym.stOther, sym.type, secIdx},
1258                          sym.getName());
1259       }
1260       if (sym.file == this) {
1261         std::lock_guard<std::mutex> lock(mu);
1262         ctx.nonPrevailingSyms.emplace_back(&sym, secIdx);
1263       }
1264       continue;
1265     }
1266 
1267     if (sym.file == this) {
1268       cast<Defined>(sym).section = sec;
1269       continue;
1270     }
1271 
1272     if (sym.binding == STB_WEAK || binding == STB_WEAK)
1273       continue;
1274     std::lock_guard<std::mutex> lock(mu);
1275     ctx.duplicates.push_back({&sym, this, sec, eSym.st_value});
1276   }
1277 }
1278 
1279 // The handling of tentative definitions (COMMON symbols) in archives is murky.
1280 // A tentative definition will be promoted to a global definition if there are
1281 // no non-tentative definitions to dominate it. When we hold a tentative
1282 // definition to a symbol and are inspecting archive members for inclusion
1283 // there are 2 ways we can proceed:
1284 //
1285 // 1) Consider the tentative definition a 'real' definition (ie promotion from
1286 //    tentative to real definition has already happened) and not inspect
1287 //    archive members for Global/Weak definitions to replace the tentative
1288 //    definition. An archive member would only be included if it satisfies some
1289 //    other undefined symbol. This is the behavior Gold uses.
1290 //
1291 // 2) Consider the tentative definition as still undefined (ie the promotion to
1292 //    a real definition happens only after all symbol resolution is done).
1293 //    The linker searches archive members for STB_GLOBAL definitions to
1294 //    replace the tentative definition with. This is the behavior used by
1295 //    GNU ld.
1296 //
1297 //  The second behavior is inherited from SysVR4, which based it on the FORTRAN
1298 //  COMMON BLOCK model. This behavior is needed for proper initialization in old
1299 //  (pre F90) FORTRAN code that is packaged into an archive.
1300 //
1301 //  The following functions search archive members for definitions to replace
1302 //  tentative definitions (implementing behavior 2).
1303 static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName,
1304                                   StringRef archiveName) {
1305   IRSymtabFile symtabFile = check(readIRSymtab(mb));
1306   for (const irsymtab::Reader::SymbolRef &sym :
1307        symtabFile.TheReader.symbols()) {
1308     if (sym.isGlobal() && sym.getName() == symName)
1309       return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon();
1310   }
1311   return false;
1312 }
1313 
1314 template <class ELFT>
1315 static bool isNonCommonDef(Ctx &ctx, ELFKind ekind, MemoryBufferRef mb,
1316                            StringRef symName, StringRef archiveName) {
1317   ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(ctx, ekind, mb, archiveName);
1318   obj->init();
1319   StringRef stringtable = obj->getStringTable();
1320 
1321   for (auto sym : obj->template getGlobalELFSyms<ELFT>()) {
1322     Expected<StringRef> name = sym.getName(stringtable);
1323     if (name && name.get() == symName)
1324       return sym.isDefined() && sym.getBinding() == STB_GLOBAL &&
1325              !sym.isCommon();
1326   }
1327   return false;
1328 }
1329 
1330 static bool isNonCommonDef(Ctx &ctx, MemoryBufferRef mb, StringRef symName,
1331                            StringRef archiveName) {
1332   switch (getELFKind(ctx, mb, archiveName)) {
1333   case ELF32LEKind:
1334     return isNonCommonDef<ELF32LE>(ctx, ELF32LEKind, mb, symName, archiveName);
1335   case ELF32BEKind:
1336     return isNonCommonDef<ELF32BE>(ctx, ELF32BEKind, mb, symName, archiveName);
1337   case ELF64LEKind:
1338     return isNonCommonDef<ELF64LE>(ctx, ELF64LEKind, mb, symName, archiveName);
1339   case ELF64BEKind:
1340     return isNonCommonDef<ELF64BE>(ctx, ELF64BEKind, mb, symName, archiveName);
1341   default:
1342     llvm_unreachable("getELFKind");
1343   }
1344 }
1345 
1346 SharedFile::SharedFile(Ctx &ctx, MemoryBufferRef m, StringRef defaultSoName)
1347     : ELFFileBase(ctx, SharedKind, getELFKind(ctx, m, ""), m),
1348       soName(defaultSoName), isNeeded(!ctx.arg.asNeeded) {}
1349 
1350 // Parse the version definitions in the object file if present, and return a
1351 // vector whose nth element contains a pointer to the Elf_Verdef for version
1352 // identifier n. Version identifiers that are not definitions map to nullptr.
1353 template <typename ELFT>
1354 static SmallVector<const void *, 0>
1355 parseVerdefs(const uint8_t *base, const typename ELFT::Shdr *sec) {
1356   if (!sec)
1357     return {};
1358 
1359   // Build the Verdefs array by following the chain of Elf_Verdef objects
1360   // from the start of the .gnu.version_d section.
1361   SmallVector<const void *, 0> verdefs;
1362   const uint8_t *verdef = base + sec->sh_offset;
1363   for (unsigned i = 0, e = sec->sh_info; i != e; ++i) {
1364     auto *curVerdef = reinterpret_cast<const typename ELFT::Verdef *>(verdef);
1365     verdef += curVerdef->vd_next;
1366     unsigned verdefIndex = curVerdef->vd_ndx;
1367     if (verdefIndex >= verdefs.size())
1368       verdefs.resize(verdefIndex + 1);
1369     verdefs[verdefIndex] = curVerdef;
1370   }
1371   return verdefs;
1372 }
1373 
1374 // Parse SHT_GNU_verneed to properly set the name of a versioned undefined
1375 // symbol. We detect fatal issues which would cause vulnerabilities, but do not
1376 // implement sophisticated error checking like in llvm-readobj because the value
1377 // of such diagnostics is low.
1378 template <typename ELFT>
1379 std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj,
1380                                                const typename ELFT::Shdr *sec) {
1381   if (!sec)
1382     return {};
1383   std::vector<uint32_t> verneeds;
1384   ArrayRef<uint8_t> data = CHECK2(obj.getSectionContents(*sec), this);
1385   const uint8_t *verneedBuf = data.begin();
1386   for (unsigned i = 0; i != sec->sh_info; ++i) {
1387     if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) {
1388       Err(ctx) << this << " has an invalid Verneed";
1389       break;
1390     }
1391     auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf);
1392     const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux;
1393     for (unsigned j = 0; j != vn->vn_cnt; ++j) {
1394       if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) {
1395         Err(ctx) << this << " has an invalid Vernaux";
1396         break;
1397       }
1398       auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf);
1399       if (aux->vna_name >= this->stringTable.size()) {
1400         Err(ctx) << this << " has a Vernaux with an invalid vna_name";
1401         break;
1402       }
1403       uint16_t version = aux->vna_other & VERSYM_VERSION;
1404       if (version >= verneeds.size())
1405         verneeds.resize(version + 1);
1406       verneeds[version] = aux->vna_name;
1407       vernauxBuf += aux->vna_next;
1408     }
1409     verneedBuf += vn->vn_next;
1410   }
1411   return verneeds;
1412 }
1413 
1414 // We do not usually care about alignments of data in shared object
1415 // files because the loader takes care of it. However, if we promote a
1416 // DSO symbol to point to .bss due to copy relocation, we need to keep
1417 // the original alignment requirements. We infer it in this function.
1418 template <typename ELFT>
1419 static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections,
1420                              const typename ELFT::Sym &sym) {
1421   uint64_t ret = UINT64_MAX;
1422   if (sym.st_value)
1423     ret = 1ULL << llvm::countr_zero((uint64_t)sym.st_value);
1424   if (0 < sym.st_shndx && sym.st_shndx < sections.size())
1425     ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign);
1426   return (ret > UINT32_MAX) ? 0 : ret;
1427 }
1428 
1429 // Fully parse the shared object file.
1430 //
1431 // This function parses symbol versions. If a DSO has version information,
1432 // the file has a ".gnu.version_d" section which contains symbol version
1433 // definitions. Each symbol is associated to one version through a table in
1434 // ".gnu.version" section. That table is a parallel array for the symbol
1435 // table, and each table entry contains an index in ".gnu.version_d".
1436 //
1437 // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for
1438 // VER_NDX_GLOBAL. There's no table entry for these special versions in
1439 // ".gnu.version_d".
1440 //
1441 // The file format for symbol versioning is perhaps a bit more complicated
1442 // than necessary, but you can easily understand the code if you wrap your
1443 // head around the data structure described above.
1444 template <class ELFT> void SharedFile::parse() {
1445   using Elf_Dyn = typename ELFT::Dyn;
1446   using Elf_Shdr = typename ELFT::Shdr;
1447   using Elf_Sym = typename ELFT::Sym;
1448   using Elf_Verdef = typename ELFT::Verdef;
1449   using Elf_Versym = typename ELFT::Versym;
1450 
1451   ArrayRef<Elf_Dyn> dynamicTags;
1452   const ELFFile<ELFT> obj = this->getObj<ELFT>();
1453   ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>();
1454 
1455   const Elf_Shdr *versymSec = nullptr;
1456   const Elf_Shdr *verdefSec = nullptr;
1457   const Elf_Shdr *verneedSec = nullptr;
1458   symbols = std::make_unique<Symbol *[]>(numSymbols);
1459 
1460   // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d.
1461   for (const Elf_Shdr &sec : sections) {
1462     switch (sec.sh_type) {
1463     default:
1464       continue;
1465     case SHT_DYNAMIC:
1466       dynamicTags =
1467           CHECK2(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this);
1468       break;
1469     case SHT_GNU_versym:
1470       versymSec = &sec;
1471       break;
1472     case SHT_GNU_verdef:
1473       verdefSec = &sec;
1474       break;
1475     case SHT_GNU_verneed:
1476       verneedSec = &sec;
1477       break;
1478     }
1479   }
1480 
1481   if (versymSec && numSymbols == 0) {
1482     ErrAlways(ctx) << "SHT_GNU_versym should be associated with symbol table";
1483     return;
1484   }
1485 
1486   // Search for a DT_SONAME tag to initialize this->soName.
1487   for (const Elf_Dyn &dyn : dynamicTags) {
1488     if (dyn.d_tag == DT_NEEDED) {
1489       uint64_t val = dyn.getVal();
1490       if (val >= this->stringTable.size()) {
1491         Err(ctx) << this << ": invalid DT_NEEDED entry";
1492         return;
1493       }
1494       dtNeeded.push_back(this->stringTable.data() + val);
1495     } else if (dyn.d_tag == DT_SONAME) {
1496       uint64_t val = dyn.getVal();
1497       if (val >= this->stringTable.size()) {
1498         Err(ctx) << this << ": invalid DT_SONAME entry";
1499         return;
1500       }
1501       soName = this->stringTable.data() + val;
1502     }
1503   }
1504 
1505   // DSOs are uniquified not by filename but by soname.
1506   StringSaver &ss = ctx.saver;
1507   DenseMap<CachedHashStringRef, SharedFile *>::iterator it;
1508   bool wasInserted;
1509   std::tie(it, wasInserted) =
1510       ctx.symtab->soNames.try_emplace(CachedHashStringRef(soName), this);
1511 
1512   // If a DSO appears more than once on the command line with and without
1513   // --as-needed, --no-as-needed takes precedence over --as-needed because a
1514   // user can add an extra DSO with --no-as-needed to force it to be added to
1515   // the dependency list.
1516   it->second->isNeeded |= isNeeded;
1517   if (!wasInserted)
1518     return;
1519 
1520   ctx.sharedFiles.push_back(this);
1521 
1522   verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec);
1523   std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec);
1524 
1525   // Parse ".gnu.version" section which is a parallel array for the symbol
1526   // table. If a given file doesn't have a ".gnu.version" section, we use
1527   // VER_NDX_GLOBAL.
1528   size_t size = numSymbols - firstGlobal;
1529   std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL);
1530   if (versymSec) {
1531     ArrayRef<Elf_Versym> versym =
1532         CHECK2(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec),
1533                this)
1534             .slice(firstGlobal);
1535     for (size_t i = 0; i < size; ++i)
1536       versyms[i] = versym[i].vs_index;
1537   }
1538 
1539   // System libraries can have a lot of symbols with versions. Using a
1540   // fixed buffer for computing the versions name (foo@ver) can save a
1541   // lot of allocations.
1542   SmallString<0> versionedNameBuffer;
1543 
1544   // Add symbols to the symbol table.
1545   ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>();
1546   for (size_t i = 0, e = syms.size(); i != e; ++i) {
1547     const Elf_Sym &sym = syms[i];
1548 
1549     // ELF spec requires that all local symbols precede weak or global
1550     // symbols in each symbol table, and the index of first non-local symbol
1551     // is stored to sh_info. If a local symbol appears after some non-local
1552     // symbol, that's a violation of the spec.
1553     StringRef name = CHECK2(sym.getName(stringTable), this);
1554     if (sym.getBinding() == STB_LOCAL) {
1555       Err(ctx) << this << ": invalid local symbol '" << name
1556                << "' in global part of symbol table";
1557       continue;
1558     }
1559 
1560     const uint16_t ver = versyms[i], idx = ver & ~VERSYM_HIDDEN;
1561     if (sym.isUndefined()) {
1562       // For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but
1563       // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL.
1564       if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) {
1565         if (idx >= verneeds.size()) {
1566           ErrAlways(ctx) << "corrupt input file: version need index " << idx
1567                          << " for symbol " << name
1568                          << " is out of bounds\n>>> defined in " << this;
1569           continue;
1570         }
1571         StringRef verName = stringTable.data() + verneeds[idx];
1572         versionedNameBuffer.clear();
1573         name = ss.save((name + "@" + verName).toStringRef(versionedNameBuffer));
1574       }
1575       Symbol *s = ctx.symtab->addSymbol(
1576           Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()});
1577       s->exportDynamic = true;
1578       if (sym.getBinding() != STB_WEAK &&
1579           ctx.arg.unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore)
1580         requiredSymbols.push_back(s);
1581       continue;
1582     }
1583 
1584     if (ver == VER_NDX_LOCAL ||
1585         (ver != VER_NDX_GLOBAL && idx >= verdefs.size())) {
1586       // In GNU ld < 2.31 (before 3be08ea4728b56d35e136af4e6fd3086ade17764), the
1587       // MIPS port puts _gp_disp symbol into DSO files and incorrectly assigns
1588       // VER_NDX_LOCAL. Workaround this bug.
1589       if (ctx.arg.emachine == EM_MIPS && name == "_gp_disp")
1590         continue;
1591       ErrAlways(ctx) << "corrupt input file: version definition index " << idx
1592                      << " for symbol " << name
1593                      << " is out of bounds\n>>> defined in " << this;
1594       continue;
1595     }
1596 
1597     uint32_t alignment = getAlignment<ELFT>(sections, sym);
1598     if (ver == idx) {
1599       auto *s = ctx.symtab->addSymbol(
1600           SharedSymbol{*this, name, sym.getBinding(), sym.st_other,
1601                        sym.getType(), sym.st_value, sym.st_size, alignment});
1602       s->dsoDefined = true;
1603       if (s->file == this)
1604         s->versionId = ver;
1605     }
1606 
1607     // Also add the symbol with the versioned name to handle undefined symbols
1608     // with explicit versions.
1609     if (ver == VER_NDX_GLOBAL)
1610       continue;
1611 
1612     StringRef verName =
1613         stringTable.data() +
1614         reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name;
1615     versionedNameBuffer.clear();
1616     name = (name + "@" + verName).toStringRef(versionedNameBuffer);
1617     auto *s = ctx.symtab->addSymbol(
1618         SharedSymbol{*this, ss.save(name), sym.getBinding(), sym.st_other,
1619                      sym.getType(), sym.st_value, sym.st_size, alignment});
1620     s->dsoDefined = true;
1621     if (s->file == this)
1622       s->versionId = idx;
1623   }
1624 }
1625 
1626 static ELFKind getBitcodeELFKind(const Triple &t) {
1627   if (t.isLittleEndian())
1628     return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind;
1629   return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind;
1630 }
1631 
1632 static uint16_t getBitcodeMachineKind(Ctx &ctx, StringRef path,
1633                                       const Triple &t) {
1634   switch (t.getArch()) {
1635   case Triple::aarch64:
1636   case Triple::aarch64_be:
1637     return EM_AARCH64;
1638   case Triple::amdgcn:
1639   case Triple::r600:
1640     return EM_AMDGPU;
1641   case Triple::arm:
1642   case Triple::armeb:
1643   case Triple::thumb:
1644   case Triple::thumbeb:
1645     return EM_ARM;
1646   case Triple::avr:
1647     return EM_AVR;
1648   case Triple::hexagon:
1649     return EM_HEXAGON;
1650   case Triple::loongarch32:
1651   case Triple::loongarch64:
1652     return EM_LOONGARCH;
1653   case Triple::mips:
1654   case Triple::mipsel:
1655   case Triple::mips64:
1656   case Triple::mips64el:
1657     return EM_MIPS;
1658   case Triple::msp430:
1659     return EM_MSP430;
1660   case Triple::ppc:
1661   case Triple::ppcle:
1662     return EM_PPC;
1663   case Triple::ppc64:
1664   case Triple::ppc64le:
1665     return EM_PPC64;
1666   case Triple::riscv32:
1667   case Triple::riscv64:
1668     return EM_RISCV;
1669   case Triple::sparcv9:
1670     return EM_SPARCV9;
1671   case Triple::systemz:
1672     return EM_S390;
1673   case Triple::x86:
1674     return t.isOSIAMCU() ? EM_IAMCU : EM_386;
1675   case Triple::x86_64:
1676     return EM_X86_64;
1677   default:
1678     ErrAlways(ctx) << path
1679                    << ": could not infer e_machine from bitcode target triple "
1680                    << t.str();
1681     return EM_NONE;
1682   }
1683 }
1684 
1685 static uint8_t getOsAbi(const Triple &t) {
1686   switch (t.getOS()) {
1687   case Triple::AMDHSA:
1688     return ELF::ELFOSABI_AMDGPU_HSA;
1689   case Triple::AMDPAL:
1690     return ELF::ELFOSABI_AMDGPU_PAL;
1691   case Triple::Mesa3D:
1692     return ELF::ELFOSABI_AMDGPU_MESA3D;
1693   default:
1694     return ELF::ELFOSABI_NONE;
1695   }
1696 }
1697 
1698 BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName,
1699                          uint64_t offsetInArchive, bool lazy)
1700     : InputFile(ctx, BitcodeKind, mb) {
1701   this->archiveName = archiveName;
1702   this->lazy = lazy;
1703 
1704   std::string path = mb.getBufferIdentifier().str();
1705   if (ctx.arg.thinLTOIndexOnly)
1706     path = replaceThinLTOSuffix(ctx, mb.getBufferIdentifier());
1707 
1708   // ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1709   // name. If two archives define two members with the same name, this
1710   // causes a collision which result in only one of the objects being taken
1711   // into consideration at LTO time (which very likely causes undefined
1712   // symbols later in the link stage). So we append file offset to make
1713   // filename unique.
1714   StringSaver &ss = ctx.saver;
1715   StringRef name = archiveName.empty()
1716                        ? ss.save(path)
1717                        : ss.save(archiveName + "(" + path::filename(path) +
1718                                  " at " + utostr(offsetInArchive) + ")");
1719   MemoryBufferRef mbref(mb.getBuffer(), name);
1720 
1721   obj = CHECK2(lto::InputFile::create(mbref), this);
1722 
1723   Triple t(obj->getTargetTriple());
1724   ekind = getBitcodeELFKind(t);
1725   emachine = getBitcodeMachineKind(ctx, mb.getBufferIdentifier(), t);
1726   osabi = getOsAbi(t);
1727 }
1728 
1729 static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
1730   switch (gvVisibility) {
1731   case GlobalValue::DefaultVisibility:
1732     return STV_DEFAULT;
1733   case GlobalValue::HiddenVisibility:
1734     return STV_HIDDEN;
1735   case GlobalValue::ProtectedVisibility:
1736     return STV_PROTECTED;
1737   }
1738   llvm_unreachable("unknown visibility");
1739 }
1740 
1741 static void createBitcodeSymbol(Ctx &ctx, Symbol *&sym,
1742                                 const lto::InputFile::Symbol &objSym,
1743                                 BitcodeFile &f) {
1744   uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL;
1745   uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE;
1746   uint8_t visibility = mapVisibility(objSym.getVisibility());
1747 
1748   if (!sym) {
1749     // Symbols can be duplicated in bitcode files because of '#include' and
1750     // linkonce_odr. Use uniqueSaver to save symbol names for de-duplication.
1751     // Update objSym.Name to reference (via StringRef) the string saver's copy;
1752     // this way LTO can reference the same string saver's copy rather than
1753     // keeping copies of its own.
1754     objSym.Name = ctx.uniqueSaver.save(objSym.getName());
1755     sym = ctx.symtab->insert(objSym.getName());
1756   }
1757 
1758   if (objSym.isUndefined()) {
1759     Undefined newSym(&f, StringRef(), binding, visibility, type);
1760     sym->resolve(ctx, newSym);
1761     sym->referenced = true;
1762     return;
1763   }
1764 
1765   if (objSym.isCommon()) {
1766     sym->resolve(ctx, CommonSymbol{ctx, &f, StringRef(), binding, visibility,
1767                                    STT_OBJECT, objSym.getCommonAlignment(),
1768                                    objSym.getCommonSize()});
1769   } else {
1770     Defined newSym(ctx, &f, StringRef(), binding, visibility, type, 0, 0,
1771                    nullptr);
1772     // The definition can be omitted if all bitcode definitions satisfy
1773     // `canBeOmittedFromSymbolTable()` and isUsedInRegularObj is false.
1774     // The latter condition is tested in Symbol::includeInDynsym.
1775     sym->ltoCanOmit = objSym.canBeOmittedFromSymbolTable() &&
1776                       (!sym->isDefined() || sym->ltoCanOmit);
1777     sym->resolve(ctx, newSym);
1778   }
1779 }
1780 
1781 void BitcodeFile::parse() {
1782   for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) {
1783     keptComdats.push_back(
1784         s.second == Comdat::NoDeduplicate ||
1785         ctx.symtab->comdatGroups.try_emplace(CachedHashStringRef(s.first), this)
1786             .second);
1787   }
1788 
1789   if (numSymbols == 0) {
1790     numSymbols = obj->symbols().size();
1791     symbols = std::make_unique<Symbol *[]>(numSymbols);
1792   }
1793   // Process defined symbols first. See the comment in
1794   // ObjFile<ELFT>::initializeSymbols.
1795   for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
1796     if (!irSym.isUndefined())
1797       createBitcodeSymbol(ctx, symbols[i], irSym, *this);
1798   for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
1799     if (irSym.isUndefined())
1800       createBitcodeSymbol(ctx, symbols[i], irSym, *this);
1801 
1802   for (auto l : obj->getDependentLibraries())
1803     addDependentLibrary(ctx, l, this);
1804 }
1805 
1806 void BitcodeFile::parseLazy() {
1807   numSymbols = obj->symbols().size();
1808   symbols = std::make_unique<Symbol *[]>(numSymbols);
1809   for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
1810     // Symbols can be duplicated in bitcode files because of '#include' and
1811     // linkonce_odr. Use uniqueSaver to save symbol names for de-duplication.
1812     // Update objSym.Name to reference (via StringRef) the string saver's copy;
1813     // this way LTO can reference the same string saver's copy rather than
1814     // keeping copies of its own.
1815     irSym.Name = ctx.uniqueSaver.save(irSym.getName());
1816     if (!irSym.isUndefined()) {
1817       auto *sym = ctx.symtab->insert(irSym.getName());
1818       sym->resolve(ctx, LazySymbol{*this});
1819       symbols[i] = sym;
1820     }
1821   }
1822 }
1823 
1824 void BitcodeFile::postParse() {
1825   for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
1826     const Symbol &sym = *symbols[i];
1827     if (sym.file == this || !sym.isDefined() || irSym.isUndefined() ||
1828         irSym.isCommon() || irSym.isWeak())
1829       continue;
1830     int c = irSym.getComdatIndex();
1831     if (c != -1 && !keptComdats[c])
1832       continue;
1833     reportDuplicate(ctx, sym, this, nullptr, 0);
1834   }
1835 }
1836 
1837 void BinaryFile::parse() {
1838   ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer());
1839   auto *section =
1840       make<InputSection>(this, ".data", SHT_PROGBITS, SHF_ALLOC | SHF_WRITE,
1841                          /*addralign=*/8, /*entsize=*/0, data);
1842   sections.push_back(section);
1843 
1844   // For each input file foo that is embedded to a result as a binary
1845   // blob, we define _binary_foo_{start,end,size} symbols, so that
1846   // user programs can access blobs by name. Non-alphanumeric
1847   // characters in a filename are replaced with underscore.
1848   std::string s = "_binary_" + mb.getBufferIdentifier().str();
1849   for (char &c : s)
1850     if (!isAlnum(c))
1851       c = '_';
1852 
1853   llvm::StringSaver &ss = ctx.saver;
1854   ctx.symtab->addAndCheckDuplicate(
1855       ctx, Defined{ctx, this, ss.save(s + "_start"), STB_GLOBAL, STV_DEFAULT,
1856                    STT_OBJECT, 0, 0, section});
1857   ctx.symtab->addAndCheckDuplicate(
1858       ctx, Defined{ctx, this, ss.save(s + "_end"), STB_GLOBAL, STV_DEFAULT,
1859                    STT_OBJECT, data.size(), 0, section});
1860   ctx.symtab->addAndCheckDuplicate(
1861       ctx, Defined{ctx, this, ss.save(s + "_size"), STB_GLOBAL, STV_DEFAULT,
1862                    STT_OBJECT, data.size(), 0, nullptr});
1863 }
1864 
1865 InputFile *elf::createInternalFile(Ctx &ctx, StringRef name) {
1866   auto *file =
1867       make<InputFile>(ctx, InputFile::InternalKind, MemoryBufferRef("", name));
1868   // References from an internal file do not lead to --warn-backrefs
1869   // diagnostics.
1870   file->groupId = 0;
1871   return file;
1872 }
1873 
1874 std::unique_ptr<ELFFileBase> elf::createObjFile(Ctx &ctx, MemoryBufferRef mb,
1875                                                 StringRef archiveName,
1876                                                 bool lazy) {
1877   std::unique_ptr<ELFFileBase> f;
1878   switch (getELFKind(ctx, mb, archiveName)) {
1879   case ELF32LEKind:
1880     f = std::make_unique<ObjFile<ELF32LE>>(ctx, ELF32LEKind, mb, archiveName);
1881     break;
1882   case ELF32BEKind:
1883     f = std::make_unique<ObjFile<ELF32BE>>(ctx, ELF32BEKind, mb, archiveName);
1884     break;
1885   case ELF64LEKind:
1886     f = std::make_unique<ObjFile<ELF64LE>>(ctx, ELF64LEKind, mb, archiveName);
1887     break;
1888   case ELF64BEKind:
1889     f = std::make_unique<ObjFile<ELF64BE>>(ctx, ELF64BEKind, mb, archiveName);
1890     break;
1891   default:
1892     llvm_unreachable("getELFKind");
1893   }
1894   f->init();
1895   f->lazy = lazy;
1896   return f;
1897 }
1898 
1899 template <class ELFT> void ObjFile<ELFT>::parseLazy() {
1900   const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>();
1901   numSymbols = eSyms.size();
1902   symbols = std::make_unique<Symbol *[]>(numSymbols);
1903 
1904   // resolve() may trigger this->extract() if an existing symbol is an undefined
1905   // symbol. If that happens, this function has served its purpose, and we can
1906   // exit from the loop early.
1907   auto *symtab = ctx.symtab.get();
1908   for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1909     if (eSyms[i].st_shndx == SHN_UNDEF)
1910       continue;
1911     symbols[i] = symtab->insert(CHECK2(eSyms[i].getName(stringTable), this));
1912     symbols[i]->resolve(ctx, LazySymbol{*this});
1913     if (!lazy)
1914       break;
1915   }
1916 }
1917 
1918 bool InputFile::shouldExtractForCommon(StringRef name) const {
1919   if (isa<BitcodeFile>(this))
1920     return isBitcodeNonCommonDef(mb, name, archiveName);
1921 
1922   return isNonCommonDef(ctx, mb, name, archiveName);
1923 }
1924 
1925 std::string elf::replaceThinLTOSuffix(Ctx &ctx, StringRef path) {
1926   auto [suffix, repl] = ctx.arg.thinLTOObjectSuffixReplace;
1927   if (path.consume_back(suffix))
1928     return (path + repl).str();
1929   return std::string(path);
1930 }
1931 
1932 template class elf::ObjFile<ELF32LE>;
1933 template class elf::ObjFile<ELF32BE>;
1934 template class elf::ObjFile<ELF64LE>;
1935 template class elf::ObjFile<ELF64BE>;
1936 
1937 template void SharedFile::parse<ELF32LE>();
1938 template void SharedFile::parse<ELF32BE>();
1939 template void SharedFile::parse<ELF64LE>();
1940 template void SharedFile::parse<ELF64BE>();
1941