xref: /llvm-project/lld/MachO/InputSection.h (revision ab27253ad395881c0798ac5c8efc2f6fc2922399)
1 //===- InputSection.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLD_MACHO_INPUT_SECTION_H
10 #define LLD_MACHO_INPUT_SECTION_H
11 
12 #include "Config.h"
13 #include "Relocations.h"
14 #include "Symbols.h"
15 
16 #include "lld/Common/LLVM.h"
17 #include "lld/Common/Memory.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/CachedHashString.h"
21 #include "llvm/ADT/TinyPtrVector.h"
22 #include "llvm/BinaryFormat/MachO.h"
23 
24 namespace lld {
25 namespace macho {
26 
27 class InputFile;
28 class OutputSection;
29 
30 class InputSection {
31 public:
32   enum Kind : uint8_t {
33     ConcatKind,
34     CStringLiteralKind,
35     WordLiteralKind,
36   };
37 
38   Kind kind() const { return sectionKind; }
39   virtual ~InputSection() = default;
40   virtual uint64_t getSize() const { return data.size(); }
41   virtual bool empty() const { return data.empty(); }
42   InputFile *getFile() const { return section.file; }
43   StringRef getName() const { return section.name; }
44   StringRef getSegName() const { return section.segname; }
45   uint32_t getFlags() const { return section.flags; }
46   uint64_t getFileSize() const;
47   // Translates \p off -- an offset relative to this InputSection -- into an
48   // offset from the beginning of its parent OutputSection.
49   virtual uint64_t getOffset(uint64_t off) const = 0;
50   // The offset from the beginning of the file.
51   uint64_t getVA(uint64_t off) const;
52   // Return a user-friendly string for use in diagnostics.
53   // Format: /path/to/object.o:(symbol _func+0x123)
54   std::string getLocation(uint64_t off) const;
55   // Return the source line corresponding to an address, or the empty string.
56   // Format: Source.cpp:123 (/path/to/Source.cpp:123)
57   std::string getSourceLocation(uint64_t off) const;
58   // Return the relocation at \p off, if it exists. This does a linear search.
59   const Reloc *getRelocAt(uint32_t off) const;
60   // Whether the data at \p off in this InputSection is live.
61   virtual bool isLive(uint64_t off) const = 0;
62   virtual void markLive(uint64_t off) = 0;
63   virtual InputSection *canonical() { return this; }
64   virtual const InputSection *canonical() const { return this; }
65 
66 protected:
67   InputSection(Kind kind, const Section &section, ArrayRef<uint8_t> data,
68                uint32_t align)
69       : sectionKind(kind), keepUnique(false), hasAltEntry(false), align(align),
70         data(data), section(section) {}
71 
72   InputSection(const InputSection &rhs)
73       : sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false),
74         align(rhs.align), data(rhs.data), section(rhs.section) {}
75 
76   Kind sectionKind;
77 
78 public:
79   // is address assigned?
80   bool isFinal = false;
81   // keep the address of the symbol(s) in this section unique in the final
82   // binary ?
83   bool keepUnique : 1;
84   // Does this section have symbols at offsets other than zero? (NOTE: only
85   // applies to ConcatInputSections.)
86   bool hasAltEntry : 1;
87   uint32_t align = 1;
88 
89   OutputSection *parent = nullptr;
90   ArrayRef<uint8_t> data;
91   std::vector<Reloc> relocs;
92   // The symbols that belong to this InputSection, sorted by value. With
93   // .subsections_via_symbols, there is typically only one element here.
94   llvm::TinyPtrVector<Defined *> symbols;
95 
96   const Section &section;
97 
98 protected:
99   const Defined *getContainingSymbol(uint64_t off) const;
100 };
101 
102 // ConcatInputSections are combined into (Concat)OutputSections through simple
103 // concatenation, in contrast with literal sections which may have their
104 // contents merged before output.
105 class ConcatInputSection final : public InputSection {
106 public:
107   ConcatInputSection(const Section &section, ArrayRef<uint8_t> data,
108                      uint32_t align = 1)
109       : InputSection(ConcatKind, section, data, align) {}
110 
111   uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }
112   uint64_t getVA() const { return InputSection::getVA(0); }
113   // ConcatInputSections are entirely live or dead, so the offset is irrelevant.
114   bool isLive(uint64_t off) const override { return live; }
115   void markLive(uint64_t off) override { live = true; }
116   bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); }
117   bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
118   void writeTo(uint8_t *buf);
119 
120   void foldIdentical(ConcatInputSection *redundant,
121                      Symbol::ICFFoldKind foldKind = Symbol::ICFFoldKind::Body);
122   ConcatInputSection *canonical() override {
123     return replacement ? replacement : this;
124   }
125   const InputSection *canonical() const override {
126     return replacement ? replacement : this;
127   }
128 
129   static bool classof(const InputSection *isec) {
130     return isec->kind() == ConcatKind;
131   }
132 
133   // Points to the surviving section after this one is folded by ICF
134   ConcatInputSection *replacement = nullptr;
135   // Equivalence-class ID for ICF
136   uint32_t icfEqClass[2] = {0, 0};
137 
138   // With subsections_via_symbols, most symbols have their own InputSection,
139   // and for weak symbols (e.g. from inline functions), only the
140   // InputSection from one translation unit will make it to the output,
141   // while all copies in other translation units are coalesced into the
142   // first and not copied to the output.
143   bool wasCoalesced = false;
144   bool live = !config->deadStrip;
145   bool hasCallSites = false;
146   // This variable has two usages. Initially, it represents the input order.
147   // After assignAddresses is called, it represents the offset from the
148   // beginning of the output section this section was assigned to.
149   uint64_t outSecOff = 0;
150 };
151 
152 // Initialize a fake InputSection that does not belong to any InputFile.
153 // The created ConcatInputSection will always have 'live=true'
154 ConcatInputSection *makeSyntheticInputSection(StringRef segName,
155                                               StringRef sectName,
156                                               uint32_t flags = 0,
157                                               ArrayRef<uint8_t> data = {},
158                                               uint32_t align = 1);
159 
160 // Helper functions to make it easy to sprinkle asserts.
161 
162 inline bool shouldOmitFromOutput(InputSection *isec) {
163   return isa<ConcatInputSection>(isec) &&
164          cast<ConcatInputSection>(isec)->shouldOmitFromOutput();
165 }
166 
167 inline bool isCoalescedWeak(InputSection *isec) {
168   return isa<ConcatInputSection>(isec) &&
169          cast<ConcatInputSection>(isec)->isCoalescedWeak();
170 }
171 
172 // We allocate a lot of these and binary search on them, so they should be as
173 // compact as possible. Hence the use of 31 rather than 64 bits for the hash.
174 struct StringPiece {
175   // Offset from the start of the containing input section.
176   uint32_t inSecOff;
177   uint32_t live : 1;
178   // Only set if deduplicating literals
179   uint32_t hash : 31;
180   // Offset from the start of the containing output section.
181   uint64_t outSecOff = 0;
182 
183   StringPiece(uint64_t off, uint32_t hash)
184       : inSecOff(off), live(!config->deadStrip), hash(hash) {}
185 };
186 
187 static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
188 
189 // CStringInputSections are composed of multiple null-terminated string
190 // literals, which we represent using StringPieces. These literals can be
191 // deduplicated and tail-merged, so translating offsets between the input and
192 // outputs sections is more complicated.
193 //
194 // NOTE: One significant difference between LLD and ld64 is that we merge all
195 // cstring literals, even those referenced directly by non-private symbols.
196 // ld64 is more conservative and does not do that. This was mostly done for
197 // implementation simplicity; if we find programs that need the more
198 // conservative behavior we can certainly implement that.
199 class CStringInputSection final : public InputSection {
200 public:
201   CStringInputSection(const Section &section, ArrayRef<uint8_t> data,
202                       uint32_t align, bool dedupLiterals)
203       : InputSection(CStringLiteralKind, section, data, align),
204         deduplicateLiterals(dedupLiterals) {}
205 
206   uint64_t getOffset(uint64_t off) const override;
207   bool isLive(uint64_t off) const override { return getStringPiece(off).live; }
208   void markLive(uint64_t off) override { getStringPiece(off).live = true; }
209   // Find the StringPiece that contains this offset.
210   StringPiece &getStringPiece(uint64_t off);
211   const StringPiece &getStringPiece(uint64_t off) const;
212   // Split at each null byte.
213   void splitIntoPieces();
214 
215   LLVM_ATTRIBUTE_ALWAYS_INLINE
216   StringRef getStringRef(size_t i) const {
217     size_t begin = pieces[i].inSecOff;
218     // The endpoint should be *at* the null terminator, not after. This matches
219     // the behavior of StringRef(const char *Str).
220     size_t end =
221         ((pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff) - 1;
222     return toStringRef(data.slice(begin, end - begin));
223   }
224 
225   StringRef getStringRefAtOffset(uint64_t off) const {
226     return getStringRef(getStringPieceIndex(off));
227   }
228 
229   // Returns i'th piece as a CachedHashStringRef. This function is very hot when
230   // string merging is enabled, so we want to inline.
231   LLVM_ATTRIBUTE_ALWAYS_INLINE
232   llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {
233     assert(deduplicateLiterals);
234     return {getStringRef(i), pieces[i].hash};
235   }
236 
237   static bool classof(const InputSection *isec) {
238     return isec->kind() == CStringLiteralKind;
239   }
240 
241   bool deduplicateLiterals = false;
242   std::vector<StringPiece> pieces;
243 
244 private:
245   size_t getStringPieceIndex(uint64_t off) const;
246 };
247 
248 class WordLiteralInputSection final : public InputSection {
249 public:
250   WordLiteralInputSection(const Section &section, ArrayRef<uint8_t> data,
251                           uint32_t align);
252   uint64_t getOffset(uint64_t off) const override;
253   bool isLive(uint64_t off) const override {
254     return live[off >> power2LiteralSize];
255   }
256   void markLive(uint64_t off) override {
257     live[off >> power2LiteralSize] = true;
258   }
259 
260   static bool classof(const InputSection *isec) {
261     return isec->kind() == WordLiteralKind;
262   }
263 
264 private:
265   unsigned power2LiteralSize;
266   // The liveness of data[off] is tracked by live[off >> power2LiteralSize].
267   llvm::BitVector live;
268 };
269 
270 inline uint8_t sectionType(uint32_t flags) {
271   return flags & llvm::MachO::SECTION_TYPE;
272 }
273 
274 inline bool isZeroFill(uint32_t flags) {
275   return llvm::MachO::isVirtualSection(sectionType(flags));
276 }
277 
278 inline bool isThreadLocalVariables(uint32_t flags) {
279   return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES;
280 }
281 
282 // These sections contain the data for initializing thread-local variables.
283 inline bool isThreadLocalData(uint32_t flags) {
284   return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR ||
285          sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL;
286 }
287 
288 inline bool isDebugSection(uint32_t flags) {
289   return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) ==
290          llvm::MachO::S_ATTR_DEBUG;
291 }
292 
293 inline bool isWordLiteralSection(uint32_t flags) {
294   return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||
295          sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||
296          sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;
297 }
298 
299 bool isCodeSection(const InputSection *);
300 bool isCfStringSection(const InputSection *);
301 bool isClassRefsSection(const InputSection *);
302 bool isSelRefsSection(const InputSection *);
303 bool isEhFrameSection(const InputSection *);
304 bool isGccExceptTabSection(const InputSection *);
305 
306 extern std::vector<ConcatInputSection *> inputSections;
307 // This is used as a counter for specyfing input order for input sections
308 extern int inputSectionsOrder;
309 
310 namespace section_names {
311 
312 constexpr const char authGot[] = "__auth_got";
313 constexpr const char authPtr[] = "__auth_ptr";
314 constexpr const char binding[] = "__binding";
315 constexpr const char bitcodeBundle[] = "__bundle";
316 constexpr const char cString[] = "__cstring";
317 constexpr const char cfString[] = "__cfstring";
318 constexpr const char cgProfile[] = "__cg_profile";
319 constexpr const char chainFixups[] = "__chainfixups";
320 constexpr const char codeSignature[] = "__code_signature";
321 constexpr const char common[] = "__common";
322 constexpr const char compactUnwind[] = "__compact_unwind";
323 constexpr const char data[] = "__data";
324 constexpr const char debugAbbrev[] = "__debug_abbrev";
325 constexpr const char debugInfo[] = "__debug_info";
326 constexpr const char debugLine[] = "__debug_line";
327 constexpr const char debugStr[] = "__debug_str";
328 constexpr const char debugStrOffs[] = "__debug_str_offs";
329 constexpr const char ehFrame[] = "__eh_frame";
330 constexpr const char gccExceptTab[] = "__gcc_except_tab";
331 constexpr const char export_[] = "__export";
332 constexpr const char dataInCode[] = "__data_in_code";
333 constexpr const char functionStarts[] = "__func_starts";
334 constexpr const char got[] = "__got";
335 constexpr const char header[] = "__mach_header";
336 constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
337 constexpr const char initOffsets[] = "__init_offsets";
338 constexpr const char const_[] = "__const";
339 constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
340 constexpr const char lazyBinding[] = "__lazy_binding";
341 constexpr const char literals[] = "__literals";
342 constexpr const char functionMap[] = "__llvm_merge";
343 constexpr const char moduleInitFunc[] = "__mod_init_func";
344 constexpr const char moduleTermFunc[] = "__mod_term_func";
345 constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
346 constexpr const char objcCatList[] = "__objc_catlist";
347 constexpr const char objcClassList[] = "__objc_classlist";
348 constexpr const char objcMethList[] = "__objc_methlist";
349 constexpr const char objcClassRefs[] = "__objc_classrefs";
350 constexpr const char objcConst[] = "__objc_const";
351 constexpr const char objCImageInfo[] = "__objc_imageinfo";
352 constexpr const char objcStubs[] = "__objc_stubs";
353 constexpr const char objcSelrefs[] = "__objc_selrefs";
354 constexpr const char objcMethname[] = "__objc_methname";
355 constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
356 constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
357 constexpr const char objcProtoList[] = "__objc_protolist";
358 constexpr const char outlinedHashTree[] = "__llvm_outline";
359 constexpr const char pageZero[] = "__pagezero";
360 constexpr const char pointers[] = "__pointers";
361 constexpr const char rebase[] = "__rebase";
362 constexpr const char staticInit[] = "__StaticInit";
363 constexpr const char stringTable[] = "__string_table";
364 constexpr const char stubHelper[] = "__stub_helper";
365 constexpr const char stubs[] = "__stubs";
366 constexpr const char swift[] = "__swift";
367 constexpr const char symbolTable[] = "__symbol_table";
368 constexpr const char textCoalNt[] = "__textcoal_nt";
369 constexpr const char text[] = "__text";
370 constexpr const char threadPtrs[] = "__thread_ptrs";
371 constexpr const char threadVars[] = "__thread_vars";
372 constexpr const char unwindInfo[] = "__unwind_info";
373 constexpr const char weakBinding[] = "__weak_binding";
374 constexpr const char zeroFill[] = "__zerofill";
375 constexpr const char addrSig[] = "__llvm_addrsig";
376 
377 } // namespace section_names
378 
379 void addInputSection(InputSection *inputSection);
380 } // namespace macho
381 
382 std::string toString(const macho::InputSection *);
383 
384 } // namespace lld
385 
386 #endif
387