xref: /llvm-project/lld/COFF/InputFiles.h (revision fb01a289038c16e13c6133ee602a58254b349411)
1 //===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_COFF_INPUT_FILES_H
10 #define LLD_COFF_INPUT_FILES_H
11 
12 #include "Config.h"
13 #include "lld/Common/LLVM.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/StringSet.h"
18 #include "llvm/BinaryFormat/Magic.h"
19 #include "llvm/Object/Archive.h"
20 #include "llvm/Object/COFF.h"
21 #include "llvm/Support/StringSaver.h"
22 #include <memory>
23 #include <set>
24 #include <vector>
25 
26 namespace llvm {
27 struct DILineInfo;
28 namespace pdb {
29 class DbiModuleDescriptorBuilder;
30 class NativeSession;
31 }
32 namespace lto {
33 class InputFile;
34 }
35 }
36 
37 namespace lld {
38 class DWARFCache;
39 
40 namespace coff {
41 class COFFLinkerContext;
42 
43 const COFFSyncStream &operator<<(const COFFSyncStream &, const InputFile *);
44 
45 std::vector<MemoryBufferRef> getArchiveMembers(COFFLinkerContext &,
46                                                llvm::object::Archive *file);
47 
48 using llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN;
49 using llvm::COFF::MachineTypes;
50 using llvm::object::Archive;
51 using llvm::object::COFFObjectFile;
52 using llvm::object::COFFSymbolRef;
53 using llvm::object::coff_import_header;
54 using llvm::object::coff_section;
55 
56 class Chunk;
57 class Defined;
58 class DefinedImportData;
59 class DefinedImportThunk;
60 class DefinedRegular;
61 class ImportThunkChunk;
62 class ImportThunkChunkARM64EC;
63 class SectionChunk;
64 class Symbol;
65 class SymbolTable;
66 class Undefined;
67 class TpiSource;
68 
69 // The root class of input files.
70 class InputFile {
71 public:
72   enum Kind {
73     ArchiveKind,
74     ObjectKind,
75     PDBKind,
76     ImportKind,
77     BitcodeKind,
78     DLLKind
79   };
80   Kind kind() const { return fileKind; }
81   virtual ~InputFile() {}
82 
83   // Returns the filename.
84   StringRef getName() const { return mb.getBufferIdentifier(); }
85 
86   // Reads a file (the constructor doesn't do that).
87   virtual void parse() = 0;
88 
89   // Returns the CPU type this file was compiled to.
90   virtual MachineTypes getMachineType() const {
91     return IMAGE_FILE_MACHINE_UNKNOWN;
92   }
93 
94   MemoryBufferRef mb;
95 
96   // An archive file name if this file is created from an archive.
97   StringRef parentName;
98 
99   // Returns .drectve section contents if exist.
100   StringRef getDirectives() { return directives; }
101 
102   SymbolTable &symtab;
103 
104 protected:
105   InputFile(SymbolTable &s, Kind k, MemoryBufferRef m, bool lazy = false)
106       : mb(m), symtab(s), fileKind(k), lazy(lazy) {}
107 
108   StringRef directives;
109 
110 private:
111   const Kind fileKind;
112 
113 public:
114   // True if this is a lazy ObjFile or BitcodeFile.
115   bool lazy = false;
116 };
117 
118 // .lib or .a file.
119 class ArchiveFile : public InputFile {
120 public:
121   explicit ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m);
122   static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
123   void parse() override;
124 
125   // Enqueues an archive member load for the given symbol. If we've already
126   // enqueued a load for the same archive member, this function does nothing,
127   // which ensures that we don't load the same member more than once.
128   void addMember(const Archive::Symbol &sym);
129 
130 private:
131   std::unique_ptr<Archive> file;
132   llvm::DenseSet<uint64_t> seen;
133 };
134 
135 // .obj or .o file. This may be a member of an archive file.
136 class ObjFile : public InputFile {
137 public:
138   static ObjFile *create(COFFLinkerContext &ctx, MemoryBufferRef mb,
139                          bool lazy = false);
140   explicit ObjFile(SymbolTable &symtab, COFFObjectFile *coffObj, bool lazy);
141 
142   static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
143   void parse() override;
144   void parseLazy();
145   MachineTypes getMachineType() const override;
146   ArrayRef<Chunk *> getChunks() { return chunks; }
147   ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
148   ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
149   ArrayRef<SectionChunk *> getGuardFidChunks() { return guardFidChunks; }
150   ArrayRef<SectionChunk *> getGuardIATChunks() { return guardIATChunks; }
151   ArrayRef<SectionChunk *> getGuardLJmpChunks() { return guardLJmpChunks; }
152   ArrayRef<SectionChunk *> getGuardEHContChunks() { return guardEHContChunks; }
153   ArrayRef<Symbol *> getSymbols() { return symbols; }
154 
155   MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
156 
157   ArrayRef<uint8_t> getDebugSection(StringRef secName);
158 
159   // Returns a Symbol object for the symbolIndex'th symbol in the
160   // underlying object file.
161   Symbol *getSymbol(uint32_t symbolIndex) {
162     return symbols[symbolIndex];
163   }
164 
165   // Returns the underlying COFF file.
166   COFFObjectFile *getCOFFObj() { return coffObj.get(); }
167 
168   // Add a symbol for a range extension thunk. Return the new symbol table
169   // index. This index can be used to modify a relocation.
170   uint32_t addRangeThunkSymbol(Symbol *thunk) {
171     symbols.push_back(thunk);
172     return symbols.size() - 1;
173   }
174 
175   void includeResourceChunks();
176 
177   bool isResourceObjFile() const { return !resourceChunks.empty(); }
178 
179   // Flags in the absolute @feat.00 symbol if it is present. These usually
180   // indicate if an object was compiled with certain security features enabled
181   // like stack guard, safeseh, /guard:cf, or other things.
182   uint32_t feat00Flags = 0;
183 
184   // True if this object file is compatible with SEH.  COFF-specific and
185   // x86-only. COFF spec 5.10.1. The .sxdata section.
186   bool hasSafeSEH() { return feat00Flags & 0x1; }
187 
188   // True if this file was compiled with /guard:cf.
189   bool hasGuardCF() { return feat00Flags & 0x800; }
190 
191   // True if this file was compiled with /guard:ehcont.
192   bool hasGuardEHCont() { return feat00Flags & 0x4000; }
193 
194   // Pointer to the PDB module descriptor builder. Various debug info records
195   // will reference object files by "module index", which is here. Things like
196   // source files and section contributions are also recorded here. Will be null
197   // if we are not producing a PDB.
198   llvm::pdb::DbiModuleDescriptorBuilder *moduleDBI = nullptr;
199 
200   const coff_section *addrsigSec = nullptr;
201 
202   const coff_section *callgraphSec = nullptr;
203 
204   // When using Microsoft precompiled headers, this is the PCH's key.
205   // The same key is used by both the precompiled object, and objects using the
206   // precompiled object. Any difference indicates out-of-date objects.
207   std::optional<uint32_t> pchSignature;
208 
209   // Whether this file was compiled with /hotpatch.
210   bool hotPatchable = false;
211 
212   // Whether the object was already merged into the final PDB.
213   bool mergedIntoPDB = false;
214 
215   // If the OBJ has a .debug$T stream, this tells how it will be handled.
216   TpiSource *debugTypesObj = nullptr;
217 
218   // The .debug$P or .debug$T section data if present. Empty otherwise.
219   ArrayRef<uint8_t> debugTypes;
220 
221   std::optional<std::pair<StringRef, uint32_t>>
222   getVariableLocation(StringRef var);
223 
224   std::optional<llvm::DILineInfo> getDILineInfo(uint32_t offset,
225                                                 uint32_t sectionIndex);
226 
227 private:
228   const coff_section* getSection(uint32_t i);
229   const coff_section *getSection(COFFSymbolRef sym) {
230     return getSection(sym.getSectionNumber());
231   }
232 
233   void enqueuePdbFile(StringRef path, ObjFile *fromFile);
234 
235   void initializeChunks();
236   void initializeSymbols();
237   void initializeFlags();
238   void initializeDependencies();
239   void initializeECThunks();
240 
241   SectionChunk *
242   readSection(uint32_t sectionNumber,
243               const llvm::object::coff_aux_section_definition *def,
244               StringRef leaderName);
245 
246   void readAssociativeDefinition(
247       COFFSymbolRef coffSym,
248       const llvm::object::coff_aux_section_definition *def);
249 
250   void readAssociativeDefinition(
251       COFFSymbolRef coffSym,
252       const llvm::object::coff_aux_section_definition *def,
253       uint32_t parentSection);
254 
255   void recordPrevailingSymbolForMingw(
256       COFFSymbolRef coffSym,
257       llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
258 
259   void maybeAssociateSEHForMingw(
260       COFFSymbolRef sym, const llvm::object::coff_aux_section_definition *def,
261       const llvm::DenseMap<StringRef, uint32_t> &prevailingSectionMap);
262 
263   // Given a new symbol Sym with comdat selection Selection, if the new
264   // symbol is not (yet) Prevailing and the existing comdat leader set to
265   // Leader, emits a diagnostic if the new symbol and its selection doesn't
266   // match the existing symbol and its selection. If either old or new
267   // symbol have selection IMAGE_COMDAT_SELECT_LARGEST, Sym might replace
268   // the existing leader. In that case, Prevailing is set to true.
269   void
270   handleComdatSelection(COFFSymbolRef sym, llvm::COFF::COMDATType &selection,
271                         bool &prevailing, DefinedRegular *leader,
272                         const llvm::object::coff_aux_section_definition *def);
273 
274   std::optional<Symbol *>
275   createDefined(COFFSymbolRef sym,
276                 std::vector<const llvm::object::coff_aux_section_definition *>
277                     &comdatDefs,
278                 bool &prevailingComdat);
279   Symbol *createRegular(COFFSymbolRef sym);
280   Symbol *createUndefined(COFFSymbolRef sym, bool overrideLazy);
281 
282   std::unique_ptr<COFFObjectFile> coffObj;
283 
284   // List of all chunks defined by this file. This includes both section
285   // chunks and non-section chunks for common symbols.
286   std::vector<Chunk *> chunks;
287 
288   std::vector<SectionChunk *> resourceChunks;
289 
290   // CodeView debug info sections.
291   std::vector<SectionChunk *> debugChunks;
292 
293   // Chunks containing symbol table indices of exception handlers. Only used for
294   // 32-bit x86.
295   std::vector<SectionChunk *> sxDataChunks;
296 
297   // Chunks containing symbol table indices of address taken symbols, address
298   // taken IAT entries, longjmp and ehcont targets. These are not linked into
299   // the final binary when /guard:cf is set.
300   std::vector<SectionChunk *> guardFidChunks;
301   std::vector<SectionChunk *> guardIATChunks;
302   std::vector<SectionChunk *> guardLJmpChunks;
303   std::vector<SectionChunk *> guardEHContChunks;
304 
305   std::vector<SectionChunk *> hybmpChunks;
306 
307   // This vector contains a list of all symbols defined or referenced by this
308   // file. They are indexed such that you can get a Symbol by symbol
309   // index. Nonexistent indices (which are occupied by auxiliary
310   // symbols in the real symbol table) are filled with null pointers.
311   std::vector<Symbol *> symbols;
312 
313   // This vector contains the same chunks as Chunks, but they are
314   // indexed such that you can get a SectionChunk by section index.
315   // Nonexistent section indices are filled with null pointers.
316   // (Because section number is 1-based, the first slot is always a
317   // null pointer.) This vector is only valid during initialization.
318   std::vector<SectionChunk *> sparseChunks;
319 
320   DWARFCache *dwarf = nullptr;
321 };
322 
323 // This is a PDB type server dependency, that is not a input file per se, but
324 // needs to be treated like one. Such files are discovered from the debug type
325 // stream.
326 class PDBInputFile : public InputFile {
327 public:
328   explicit PDBInputFile(COFFLinkerContext &ctx, MemoryBufferRef m);
329   ~PDBInputFile();
330   static bool classof(const InputFile *f) { return f->kind() == PDBKind; }
331   void parse() override;
332 
333   static PDBInputFile *findFromRecordPath(const COFFLinkerContext &ctx,
334                                           StringRef path, ObjFile *fromFile);
335 
336   // Record possible errors while opening the PDB file
337   std::optional<std::string> loadErrorStr;
338 
339   // This is the actual interface to the PDB (if it was opened successfully)
340   std::unique_ptr<llvm::pdb::NativeSession> session;
341 
342   // If the PDB has a .debug$T stream, this tells how it will be handled.
343   TpiSource *debugTypesObj = nullptr;
344 };
345 
346 // This type represents import library members that contain DLL names
347 // and symbols exported from the DLLs. See Microsoft PE/COFF spec. 7
348 // for details about the format.
349 class ImportFile : public InputFile {
350 public:
351   explicit ImportFile(COFFLinkerContext &ctx, MemoryBufferRef m);
352 
353   static bool classof(const InputFile *f) { return f->kind() == ImportKind; }
354   MachineTypes getMachineType() const override { return getMachineType(mb); }
355   static MachineTypes getMachineType(MemoryBufferRef m);
356   bool isSameImport(const ImportFile *other) const;
357   bool isEC() const { return impECSym != nullptr; }
358 
359   DefinedImportData *impSym = nullptr;
360   Defined *thunkSym = nullptr;
361   ImportThunkChunkARM64EC *impchkThunk = nullptr;
362   ImportFile *hybridFile = nullptr;
363   std::string dllName;
364 
365 private:
366   void parse() override;
367   ImportThunkChunk *makeImportThunk();
368 
369 public:
370   StringRef externalName;
371   const coff_import_header *hdr;
372   Chunk *location = nullptr;
373 
374   // Auxiliary IAT symbols and chunks on ARM64EC.
375   DefinedImportData *impECSym = nullptr;
376   Chunk *auxLocation = nullptr;
377   Defined *auxThunkSym = nullptr;
378   DefinedImportData *auxImpCopySym = nullptr;
379   Chunk *auxCopyLocation = nullptr;
380 
381   // We want to eliminate dllimported symbols if no one actually refers to them.
382   // These "Live" bits are used to keep track of which import library members
383   // are actually in use.
384   //
385   // If the Live bit is turned off by MarkLive, Writer will ignore dllimported
386   // symbols provided by this import library member.
387   bool live;
388 };
389 
390 // Used for LTO.
391 class BitcodeFile : public InputFile {
392 public:
393   explicit BitcodeFile(SymbolTable &symtab, MemoryBufferRef mb,
394                        std::unique_ptr<llvm::lto::InputFile> &obj, bool lazy);
395   ~BitcodeFile();
396 
397   static BitcodeFile *create(COFFLinkerContext &ctx, MemoryBufferRef mb,
398                              StringRef archiveName, uint64_t offsetInArchive,
399                              bool lazy);
400   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
401   ArrayRef<Symbol *> getSymbols() { return symbols; }
402   MachineTypes getMachineType() const override {
403     return getMachineType(obj.get());
404   }
405   static MachineTypes getMachineType(const llvm::lto::InputFile *obj);
406   void parseLazy();
407   std::unique_ptr<llvm::lto::InputFile> obj;
408 
409 private:
410   void parse() override;
411 
412   std::vector<Symbol *> symbols;
413 };
414 
415 // .dll file. MinGW only.
416 class DLLFile : public InputFile {
417 public:
418   explicit DLLFile(SymbolTable &symtab, MemoryBufferRef m)
419       : InputFile(symtab, DLLKind, m) {}
420   static bool classof(const InputFile *f) { return f->kind() == DLLKind; }
421   void parse() override;
422   MachineTypes getMachineType() const override;
423 
424   struct Symbol {
425     StringRef dllName;
426     StringRef symbolName;
427     llvm::COFF::ImportNameType nameType;
428     llvm::COFF::ImportType importType;
429   };
430 
431   void makeImport(Symbol *s);
432 
433 private:
434   std::unique_ptr<COFFObjectFile> coffObj;
435   llvm::StringSet<> seen;
436 };
437 
438 inline bool isBitcode(MemoryBufferRef mb) {
439   return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
440 }
441 
442 std::string replaceThinLTOSuffix(StringRef path, StringRef suffix,
443                                  StringRef repl);
444 } // namespace coff
445 
446 std::string toString(const coff::InputFile *file);
447 } // namespace lld
448 
449 #endif
450