xref: /llvm-project/lld/ELF/LinkerScript.h (revision 18078605046c50f01f31e826ea3591f99019de38)
1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_ELF_LINKER_SCRIPT_H
10 #define LLD_ELF_LINKER_SCRIPT_H
11 
12 #include "Config.h"
13 #include "InputSection.h"
14 #include "Writer.h"
15 #include "lld/Common/LLVM.h"
16 #include "lld/Common/Strings.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Compiler.h"
23 #include <cstddef>
24 #include <cstdint>
25 #include <functional>
26 #include <memory>
27 
28 namespace lld::elf {
29 
30 class Defined;
31 class InputFile;
32 class InputSection;
33 class InputSectionBase;
34 class OutputSection;
35 class SectionBase;
36 class ThunkSection;
37 struct OutputDesc;
38 struct SectionClass;
39 struct SectionClassDesc;
40 
41 // This represents an r-value in the linker script.
42 struct ExprValue {
43   ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
44             const Twine &loc)
45       : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
46 
47   ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
48 
49   bool isAbsolute() const { return forceAbsolute || sec == nullptr; }
50   uint64_t getValue() const;
51   uint64_t getSecAddr() const;
52   uint64_t getSectionOffset() const;
53 
54   // If a value is relative to a section, it has a non-null Sec.
55   SectionBase *sec;
56 
57   uint64_t val;
58   uint64_t alignment = 1;
59 
60   // The original st_type if the expression represents a symbol. Any operation
61   // resets type to STT_NOTYPE.
62   uint8_t type = llvm::ELF::STT_NOTYPE;
63 
64   // True if this expression is enclosed in ABSOLUTE().
65   // This flag affects the return value of getValue().
66   bool forceAbsolute;
67 
68   // Original source location. Used for error messages.
69   std::string loc;
70 };
71 
72 // This represents an expression in the linker script.
73 // ScriptParser::readExpr reads an expression and returns an Expr.
74 // Later, we evaluate the expression by calling the function.
75 using Expr = std::function<ExprValue()>;
76 
77 // This enum is used to implement linker script SECTIONS command.
78 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
79 enum SectionsCommandKind {
80   AssignmentKind, // . = expr or <sym> = expr
81   OutputSectionKind,
82   InputSectionKind,
83   ByteKind,  // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
84   ClassKind, // CLASS(class_name)
85 };
86 
87 struct SectionCommand {
88   SectionCommand(int k) : kind(k) {}
89   int kind;
90 };
91 
92 // This represents ". = <expr>" or "<symbol> = <expr>".
93 struct SymbolAssignment : SectionCommand {
94   SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc)
95       : SectionCommand(AssignmentKind), name(name), expression(e),
96         symOrder(symOrder), location(loc) {}
97 
98   static bool classof(const SectionCommand *c) {
99     return c->kind == AssignmentKind;
100   }
101 
102   // The LHS of an expression. Name is either a symbol name or ".".
103   StringRef name;
104   Defined *sym = nullptr;
105 
106   // The RHS of an expression.
107   Expr expression;
108 
109   // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
110   bool provide = false;
111   bool hidden = false;
112 
113   // This assignment references DATA_SEGMENT_RELRO_END.
114   bool dataSegmentRelroEnd = false;
115 
116   unsigned symOrder;
117 
118   // Holds file name and line number for error reporting.
119   std::string location;
120 
121   // A string representation of this command. We use this for -Map.
122   std::string commandString;
123 
124   // Address of this assignment command.
125   uint64_t addr;
126 
127   // Size of this assignment command. This is usually 0, but if
128   // you move '.' this may be greater than 0.
129   uint64_t size;
130 };
131 
132 // Linker scripts allow additional constraints to be put on output sections.
133 // If an output section is marked as ONLY_IF_RO, the section is created
134 // only if its input sections are read-only. Likewise, an output section
135 // with ONLY_IF_RW is created if all input sections are RW.
136 enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
137 
138 // This struct is used to represent the location and size of regions of
139 // target memory. Instances of the struct are created by parsing the
140 // MEMORY command.
141 struct MemoryRegion {
142   MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
143                uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
144       : name(std::string(name)), origin(origin), length(length), flags(flags),
145         invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
146 
147   std::string name;
148   Expr origin;
149   Expr length;
150   // A section can be assigned to the region if any of these ELF section flags
151   // are set...
152   uint32_t flags;
153   // ... or any of these flags are not set.
154   // For example, the memory region attribute "r" maps to SHF_WRITE.
155   uint32_t invFlags;
156   // A section cannot be assigned to the region if any of these ELF section
157   // flags are set...
158   uint32_t negFlags;
159   // ... or any of these flags are not set.
160   // For example, the memory region attribute "!r" maps to SHF_WRITE.
161   uint32_t negInvFlags;
162   uint64_t curPos = 0;
163 
164   uint64_t getOrigin() const { return origin().getValue(); }
165   uint64_t getLength() const { return length().getValue(); }
166 
167   bool compatibleWith(uint32_t secFlags) const {
168     if ((secFlags & negFlags) || (~secFlags & negInvFlags))
169       return false;
170     return (secFlags & flags) || (~secFlags & invFlags);
171   }
172 };
173 
174 // This struct represents one section match pattern in SECTIONS() command.
175 // It can optionally have negative match pattern for EXCLUDED_FILE command.
176 // Also it may be surrounded with SORT() command, so contains sorting rules.
177 class SectionPattern {
178   StringMatcher excludedFilePat;
179 
180   // Cache of the most recent input argument and result of excludesFile().
181   mutable std::optional<std::pair<const InputFile *, bool>> excludesFileCache;
182 
183 public:
184   SectionPattern(StringMatcher &&pat1, StringMatcher &&pat2)
185       : excludedFilePat(pat1), sectionPat(pat2),
186         sortOuter(SortSectionPolicy::Default),
187         sortInner(SortSectionPolicy::Default) {}
188 
189   bool excludesFile(const InputFile &file) const;
190 
191   StringMatcher sectionPat;
192   SortSectionPolicy sortOuter;
193   SortSectionPolicy sortInner;
194 };
195 
196 class InputSectionDescription : public SectionCommand {
197   enum class MatchType { Trivial, WholeArchive, ArchivesExcluded } matchType;
198   SingleStringMatcher filePat;
199 
200   // Cache of the most recent input argument and result of matchesFile().
201   mutable std::optional<std::pair<const InputFile *, bool>> matchesFileCache;
202 
203 public:
204   InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
205                           uint64_t withoutFlags = 0, StringRef classRef = {})
206       : SectionCommand(InputSectionKind), matchType(MatchType::Trivial),
207         filePat(filePattern), classRef(classRef), withFlags(withFlags),
208         withoutFlags(withoutFlags) {
209     assert((filePattern.empty() || classRef.empty()) &&
210            "file pattern and class reference are mutually exclusive");
211 
212     // The matching syntax for whole archives and files outside of an archive
213     // can't be handled by SingleStringMatcher, and instead are handled
214     // manually within matchesFile()
215     if (!filePattern.empty()) {
216       if (filePattern.back() == ':') {
217         matchType = MatchType::WholeArchive;
218         filePat = filePattern.drop_back();
219       } else if (filePattern.front() == ':') {
220         matchType = MatchType::ArchivesExcluded;
221         filePat = filePattern.drop_front();
222       }
223     }
224   }
225 
226   static bool classof(const SectionCommand *c) {
227     return c->kind == InputSectionKind;
228   }
229 
230   bool matchesFile(const InputFile &file) const;
231 
232   // Input sections that matches at least one of SectionPatterns
233   // will be associated with this InputSectionDescription.
234   SmallVector<SectionPattern, 0> sectionPatterns;
235 
236   // If present, input section matching uses class membership instead of file
237   // and section patterns (mutually exclusive).
238   StringRef classRef;
239 
240   // Includes InputSections and MergeInputSections. Used temporarily during
241   // assignment of input sections to output sections.
242   SmallVector<InputSectionBase *, 0> sectionBases;
243 
244   // Used after the finalizeInputSections() pass. MergeInputSections have been
245   // merged into MergeSyntheticSections.
246   SmallVector<InputSection *, 0> sections;
247 
248   // Temporary record of synthetic ThunkSection instances and the pass that
249   // they were created in. This is used to insert newly created ThunkSections
250   // into Sections at the end of a createThunks() pass.
251   SmallVector<std::pair<ThunkSection *, uint32_t>, 0> thunkSections;
252 
253   // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
254   uint64_t withFlags;
255   uint64_t withoutFlags;
256 };
257 
258 // Represents BYTE(), SHORT(), LONG(), or QUAD().
259 struct ByteCommand : SectionCommand {
260   ByteCommand(Expr e, unsigned size, std::string commandString)
261       : SectionCommand(ByteKind), commandString(commandString), expression(e),
262         size(size) {}
263 
264   static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
265 
266   // Keeps string representing the command. Used for -Map" is perhaps better.
267   std::string commandString;
268 
269   Expr expression;
270 
271   // This is just an offset of this assignment command in the output section.
272   unsigned offset;
273 
274   // Size of this data command.
275   unsigned size;
276 };
277 
278 struct InsertCommand {
279   SmallVector<StringRef, 0> names;
280   bool isAfter;
281   StringRef where;
282 };
283 
284 // A NOCROSSREFS/NOCROSSREFS_TO command that prohibits references between
285 // certain output sections.
286 struct NoCrossRefCommand {
287   SmallVector<StringRef, 0> outputSections;
288 
289   // When true, this describes a NOCROSSREFS_TO command that probits references
290   // to the first output section from any of the other sections.
291   bool toFirst = false;
292 };
293 
294 struct PhdrsCommand {
295   StringRef name;
296   unsigned type = llvm::ELF::PT_NULL;
297   bool hasFilehdr = false;
298   bool hasPhdrs = false;
299   std::optional<unsigned> flags;
300   Expr lmaExpr = nullptr;
301 };
302 
303 class LinkerScript final {
304   // Temporary state used in processSectionCommands() and assignAddresses()
305   // that must be reinitialized for each call to the above functions, and must
306   // not be used outside of the scope of a call to the above functions.
307   struct AddressState {
308     AddressState(const LinkerScript &);
309     OutputSection *outSec = nullptr;
310     MemoryRegion *memRegion = nullptr;
311     MemoryRegion *lmaRegion = nullptr;
312     uint64_t lmaOffset = 0;
313     uint64_t tbssAddr = 0;
314   };
315 
316   Ctx &ctx;
317   SmallVector<std::unique_ptr<OutputDesc>, 0> descPool;
318   llvm::DenseMap<llvm::CachedHashStringRef, OutputDesc *> nameToOutputSection;
319 
320   StringRef getOutputSectionName(const InputSectionBase *s) const;
321   void addSymbol(SymbolAssignment *cmd);
322   void declareSymbol(SymbolAssignment *cmd);
323   void assignSymbol(SymbolAssignment *cmd, bool inSec);
324   void setDot(Expr e, const Twine &loc, bool inSec);
325   void expandOutputSection(uint64_t size);
326   void expandMemoryRegions(uint64_t size);
327 
328   SmallVector<InputSectionBase *, 0>
329   computeInputSections(const InputSectionDescription *,
330                        ArrayRef<InputSectionBase *>, const SectionBase &outCmd);
331 
332   SmallVector<InputSectionBase *, 0> createInputSectionList(OutputSection &cmd);
333 
334   void discardSynthetic(OutputSection &);
335 
336   SmallVector<size_t, 0> getPhdrIndices(OutputSection *sec);
337 
338   std::pair<MemoryRegion *, MemoryRegion *>
339   findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
340 
341   bool assignOffsets(OutputSection *sec);
342 
343   // This captures the local AddressState and makes it accessible
344   // deliberately. This is needed as there are some cases where we cannot just
345   // thread the current state through to a lambda function created by the
346   // script parser.
347   // This should remain a plain pointer as its lifetime is smaller than
348   // LinkerScript.
349   AddressState *state = nullptr;
350 
351   std::unique_ptr<OutputSection> aether;
352 
353   uint64_t dot = 0;
354 
355 public:
356   // OutputSection may be incomplete. Avoid inline ctor/dtor.
357   LinkerScript(Ctx &ctx);
358   ~LinkerScript();
359 
360   OutputDesc *createOutputSection(StringRef name, StringRef location);
361   OutputDesc *getOrCreateOutputSection(StringRef name);
362 
363   bool hasPhdrsCommands() { return !phdrsCommands.empty(); }
364   uint64_t getDot() { return dot; }
365   void discard(InputSectionBase &s);
366 
367   ExprValue getSymbolValue(StringRef name, const Twine &loc);
368 
369   void addOrphanSections();
370   void diagnoseOrphanHandling() const;
371   void diagnoseMissingSGSectionAddress() const;
372   void adjustOutputSections();
373   void adjustSectionsAfterSorting();
374 
375   SmallVector<std::unique_ptr<PhdrEntry>, 0> createPhdrs();
376   bool needsInterpSection();
377 
378   bool shouldKeep(InputSectionBase *s);
379   std::pair<const OutputSection *, const Defined *> assignAddresses();
380   bool spillSections();
381   void erasePotentialSpillSections();
382   void allocateHeaders(SmallVector<std::unique_ptr<PhdrEntry>, 0> &phdrs);
383   void processSectionCommands();
384   void processSymbolAssignments();
385   void declareSymbols();
386 
387   // Used to handle INSERT AFTER statements.
388   void processInsertCommands();
389 
390   // Describe memory region usage.
391   void printMemoryUsage(raw_ostream &os);
392 
393   // Record a pending error during an assignAddresses invocation.
394   // assignAddresses is executed more than once. Therefore, lld::error should be
395   // avoided to not report duplicate errors.
396   void recordError(const Twine &msg);
397 
398   // Check backward location counter assignment and memory region/LMA overflows.
399   void checkFinalScriptConditions() const;
400 
401   // Add symbols that are referenced in the linker script to the symbol table.
402   // Symbols referenced in a PROVIDE command are only added to the symbol table
403   // if the PROVIDE command actually provides the symbol.
404   // It also adds the symbols referenced by the used PROVIDE symbols to the
405   // linker script referenced symbols list.
406   void addScriptReferencedSymbolsToSymTable();
407 
408   // Returns true if the PROVIDE symbol should be added to the link.
409   // A PROVIDE symbol is added to the link only if it satisfies an
410   // undefined reference.
411   bool shouldAddProvideSym(StringRef symName);
412 
413   // SECTIONS command list.
414   SmallVector<SectionCommand *, 0> sectionCommands;
415 
416   // PHDRS command list.
417   SmallVector<PhdrsCommand, 0> phdrsCommands;
418 
419   bool hasSectionsCommand = false;
420   bool seenDataAlign = false;
421   bool seenRelroEnd = false;
422   bool errorOnMissingSection = false;
423   SmallVector<SmallString<0>, 0> recordedErrors;
424 
425   // List of section patterns specified with KEEP commands. They will
426   // be kept even if they are unused and --gc-sections is specified.
427   SmallVector<InputSectionDescription *, 0> keptSections;
428 
429   // A map from memory region name to a memory region descriptor.
430   llvm::MapVector<llvm::StringRef, MemoryRegion *> memoryRegions;
431 
432   // A list of symbols referenced by the script.
433   SmallVector<llvm::StringRef, 0> referencedSymbols;
434 
435   // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
436   // to be reordered.
437   SmallVector<InsertCommand, 0> insertCommands;
438 
439   // OutputSections specified by OVERWRITE_SECTIONS.
440   SmallVector<OutputDesc *, 0> overwriteSections;
441 
442   // NOCROSSREFS(_TO) commands.
443   SmallVector<NoCrossRefCommand, 0> noCrossRefs;
444 
445   // Sections that will be warned/errored by --orphan-handling.
446   SmallVector<const InputSectionBase *, 0> orphanSections;
447 
448   // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE
449   // expression. For example, if the PROVIDE command is:
450   //
451   // PROVIDE(v = a + b + c);
452   //
453   // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
454   llvm::MapVector<StringRef, SmallVector<StringRef, 0>> provideMap;
455   // Store defined symbols that should ignore PROVIDE commands.
456   llvm::DenseSet<Symbol *> unusedProvideSyms;
457 
458   // List of potential spill locations (PotentialSpillSection) for an input
459   // section.
460   struct PotentialSpillList {
461     // Never nullptr.
462     PotentialSpillSection *head;
463     PotentialSpillSection *tail;
464   };
465   llvm::DenseMap<InputSectionBase *, PotentialSpillList> potentialSpillLists;
466 
467   // Named lists of input sections that can be collectively referenced in output
468   // section descriptions. Multiple references allow for sections to spill from
469   // one output section to another.
470   llvm::DenseMap<llvm::CachedHashStringRef, SectionClassDesc *> sectionClasses;
471 };
472 
473 } // end namespace lld::elf
474 
475 #endif // LLD_ELF_LINKER_SCRIPT_H
476