xref: /llvm-project/lld/ELF/MarkLive.cpp (revision 3733ed6f1c6b0eef1e13e175ac81ad309fc0b080)
1 //===- MarkLive.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements --gc-sections, which is a feature to remove unused
10 // sections from output. Unused sections are sections that are not reachable
11 // from known GC-root symbols or sections. Naturally the feature is
12 // implemented as a mark-sweep garbage collector.
13 //
14 // Here's how it works. Each InputSectionBase has a "Live" bit. The bit is off
15 // by default. Starting with GC-root symbols or sections, markLive function
16 // defined in this file visits all reachable sections to set their Live
17 // bits. Writer will then ignore sections whose Live bits are off, so that
18 // such sections are not included into output.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "MarkLive.h"
23 #include "InputFiles.h"
24 #include "InputSection.h"
25 #include "LinkerScript.h"
26 #include "SymbolTable.h"
27 #include "Symbols.h"
28 #include "SyntheticSections.h"
29 #include "Target.h"
30 #include "lld/Common/CommonLinkerContext.h"
31 #include "lld/Common/Strings.h"
32 #include "llvm/ADT/STLExtras.h"
33 #include "llvm/Object/ELF.h"
34 #include "llvm/Support/TimeProfiler.h"
35 #include <vector>
36 
37 using namespace llvm;
38 using namespace llvm::ELF;
39 using namespace llvm::object;
40 using namespace llvm::support::endian;
41 using namespace lld;
42 using namespace lld::elf;
43 
44 namespace {
45 template <class ELFT> class MarkLive {
46 public:
47   MarkLive(Ctx &ctx, unsigned partition) : ctx(ctx), partition(partition) {}
48 
49   void run();
50   void moveToMain();
51 
52 private:
53   void enqueue(InputSectionBase *sec, uint64_t offset);
54   void markSymbol(Symbol *sym);
55   void mark();
56 
57   template <class RelTy>
58   void resolveReloc(InputSectionBase &sec, RelTy &rel, bool fromFDE);
59 
60   template <class RelTy>
61   void scanEhFrameSection(EhInputSection &eh, ArrayRef<RelTy> rels);
62 
63   Ctx &ctx;
64   // The index of the partition that we are currently processing.
65   unsigned partition;
66 
67   // A list of sections to visit.
68   SmallVector<InputSection *, 0> queue;
69 
70   // There are normally few input sections whose names are valid C
71   // identifiers, so we just store a SmallVector instead of a multimap.
72   DenseMap<StringRef, SmallVector<InputSectionBase *, 0>> cNamedSections;
73 };
74 } // namespace
75 
76 template <class ELFT>
77 static uint64_t getAddend(Ctx &ctx, InputSectionBase &sec,
78                           const typename ELFT::Rel &rel) {
79   return ctx.target->getImplicitAddend(sec.content().begin() + rel.r_offset,
80                                        rel.getType(ctx.arg.isMips64EL));
81 }
82 
83 template <class ELFT>
84 static uint64_t getAddend(Ctx &, InputSectionBase &sec,
85                           const typename ELFT::Rela &rel) {
86   return rel.r_addend;
87 }
88 
89 // Currently, we assume all input CREL relocations have an explicit addend.
90 template <class ELFT>
91 static uint64_t getAddend(Ctx &, InputSectionBase &sec,
92                           const typename ELFT::Crel &rel) {
93   return rel.r_addend;
94 }
95 
96 template <class ELFT>
97 template <class RelTy>
98 void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,
99                                   bool fromFDE) {
100   // If a symbol is referenced in a live section, it is used.
101   Symbol &sym = sec.file->getRelocTargetSym(rel);
102   sym.used = true;
103 
104   if (auto *d = dyn_cast<Defined>(&sym)) {
105     auto *relSec = dyn_cast_or_null<InputSectionBase>(d->section);
106     if (!relSec)
107       return;
108 
109     uint64_t offset = d->value;
110     if (d->isSection())
111       offset += getAddend<ELFT>(ctx, sec, rel);
112 
113     // fromFDE being true means this is referenced by a FDE in a .eh_frame
114     // piece. The relocation points to the described function or to a LSDA. We
115     // only need to keep the LSDA live, so ignore anything that points to
116     // executable sections. If the LSDA is in a section group or has the
117     // SHF_LINK_ORDER flag, we ignore the relocation as well because (a) if the
118     // associated text section is live, the LSDA will be retained due to section
119     // group/SHF_LINK_ORDER rules (b) if the associated text section should be
120     // discarded, marking the LSDA will unnecessarily retain the text section.
121     if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) ||
122                       relSec->nextInSectionGroup)))
123       enqueue(relSec, offset);
124     return;
125   }
126 
127   if (auto *ss = dyn_cast<SharedSymbol>(&sym))
128     if (!ss->isWeak())
129       cast<SharedFile>(ss->file)->isNeeded = true;
130 
131   for (InputSectionBase *sec : cNamedSections.lookup(sym.getName()))
132     enqueue(sec, 0);
133 }
134 
135 // The .eh_frame section is an unfortunate special case.
136 // The section is divided in CIEs and FDEs and the relocations it can have are
137 // * CIEs can refer to a personality function.
138 // * FDEs can refer to a LSDA
139 // * FDEs refer to the function they contain information about
140 // The last kind of relocation cannot keep the referred section alive, or they
141 // would keep everything alive in a common object file. In fact, each FDE is
142 // alive if the section it refers to is alive.
143 // To keep things simple, in here we just ignore the last relocation kind. The
144 // other two keep the referred section alive.
145 //
146 // A possible improvement would be to fully process .eh_frame in the middle of
147 // the gc pass. With that we would be able to also gc some sections holding
148 // LSDAs and personality functions if we found that they were unused.
149 template <class ELFT>
150 template <class RelTy>
151 void MarkLive<ELFT>::scanEhFrameSection(EhInputSection &eh,
152                                         ArrayRef<RelTy> rels) {
153   for (const EhSectionPiece &cie : eh.cies)
154     if (cie.firstRelocation != unsigned(-1))
155       resolveReloc(eh, rels[cie.firstRelocation], false);
156   for (const EhSectionPiece &fde : eh.fdes) {
157     size_t firstRelI = fde.firstRelocation;
158     if (firstRelI == (unsigned)-1)
159       continue;
160     uint64_t pieceEnd = fde.inputOff + fde.size;
161     for (size_t j = firstRelI, end2 = rels.size();
162          j < end2 && rels[j].r_offset < pieceEnd; ++j)
163       resolveReloc(eh, rels[j], true);
164   }
165 }
166 
167 // Some sections are used directly by the loader, so they should never be
168 // garbage-collected. This function returns true if a given section is such
169 // section.
170 static bool isReserved(InputSectionBase *sec) {
171   switch (sec->type) {
172   case SHT_FINI_ARRAY:
173   case SHT_INIT_ARRAY:
174   case SHT_PREINIT_ARRAY:
175     return true;
176   case SHT_NOTE:
177     // SHT_NOTE sections in a group are subject to garbage collection.
178     return !sec->nextInSectionGroup;
179   default:
180     // Support SHT_PROGBITS .init_array (https://golang.org/issue/50295) and
181     // .init_array.N (https://github.com/rust-lang/rust/issues/92181) for a
182     // while.
183     StringRef s = sec->name;
184     return s == ".init" || s == ".fini" || s.starts_with(".init_array") ||
185            s == ".jcr" || s.starts_with(".ctors") || s.starts_with(".dtors");
186   }
187 }
188 
189 template <class ELFT>
190 void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) {
191   // Usually, a whole section is marked as live or dead, but in mergeable
192   // (splittable) sections, each piece of data has independent liveness bit.
193   // So we explicitly tell it which offset is in use.
194   if (auto *ms = dyn_cast<MergeInputSection>(sec))
195     ms->getSectionPiece(offset).live = true;
196 
197   // Set Sec->Partition to the meet (i.e. the "minimum") of Partition and
198   // Sec->Partition in the following lattice: 1 < other < 0. If Sec->Partition
199   // doesn't change, we don't need to do anything.
200   if (sec->partition == 1 || sec->partition == partition)
201     return;
202   sec->partition = sec->partition ? 1 : partition;
203 
204   // Add input section to the queue.
205   if (InputSection *s = dyn_cast<InputSection>(sec))
206     queue.push_back(s);
207 }
208 
209 template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {
210   if (auto *d = dyn_cast_or_null<Defined>(sym))
211     if (auto *isec = dyn_cast_or_null<InputSectionBase>(d->section))
212       enqueue(isec, d->value);
213 }
214 
215 // This is the main function of the garbage collector.
216 // Starting from GC-root sections, this function visits all reachable
217 // sections to set their "Live" bits.
218 template <class ELFT> void MarkLive<ELFT>::run() {
219   // Add GC root symbols.
220 
221   // Preserve externally-visible symbols if the symbols defined by this
222   // file can interpose other ELF file's symbols at runtime.
223   for (Symbol *sym : ctx.symtab->getSymbols())
224     if (sym->isExported && sym->partition == partition)
225       markSymbol(sym);
226 
227   // If this isn't the main partition, that's all that we need to preserve.
228   if (partition != 1) {
229     mark();
230     return;
231   }
232 
233   markSymbol(ctx.symtab->find(ctx.arg.entry));
234   markSymbol(ctx.symtab->find(ctx.arg.init));
235   markSymbol(ctx.symtab->find(ctx.arg.fini));
236   for (StringRef s : ctx.arg.undefined)
237     markSymbol(ctx.symtab->find(s));
238   for (StringRef s : ctx.script->referencedSymbols)
239     markSymbol(ctx.symtab->find(s));
240   for (auto [symName, _] : ctx.symtab->cmseSymMap) {
241     markSymbol(ctx.symtab->cmseSymMap[symName].sym);
242     markSymbol(ctx.symtab->cmseSymMap[symName].acleSeSym);
243   }
244 
245   // Mark .eh_frame sections as live because there are usually no relocations
246   // that point to .eh_frames. Otherwise, the garbage collector would drop
247   // all of them. We also want to preserve personality routines and LSDA
248   // referenced by .eh_frame sections, so we scan them for that here.
249   for (EhInputSection *eh : ctx.ehInputSections) {
250     const RelsOrRelas<ELFT> rels =
251         eh->template relsOrRelas<ELFT>(/*supportsCrel=*/false);
252     if (rels.areRelocsRel())
253       scanEhFrameSection(*eh, rels.rels);
254     else if (rels.relas.size())
255       scanEhFrameSection(*eh, rels.relas);
256   }
257   for (InputSectionBase *sec : ctx.inputSections) {
258     if (sec->flags & SHF_GNU_RETAIN) {
259       enqueue(sec, 0);
260       continue;
261     }
262     if (sec->flags & SHF_LINK_ORDER)
263       continue;
264 
265     // Usually, non-SHF_ALLOC sections are not removed even if they are
266     // unreachable through relocations because reachability is not a good signal
267     // whether they are garbage or not (e.g. there is usually no section
268     // referring to a .comment section, but we want to keep it.) When a
269     // non-SHF_ALLOC section is retained, we also retain sections dependent on
270     // it.
271     //
272     // Note on SHF_LINK_ORDER: Such sections contain metadata and they
273     // have a reverse dependency on the InputSection they are linked with.
274     // We are able to garbage collect them.
275     //
276     // Note on SHF_REL{,A}: Such sections reach here only when -r
277     // or --emit-reloc were given. And they are subject of garbage
278     // collection because, if we remove a text section, we also
279     // remove its relocation section.
280     //
281     // Note on nextInSectionGroup: The ELF spec says that group sections are
282     // included or omitted as a unit. We take the interpretation that:
283     //
284     // - Group members (nextInSectionGroup != nullptr) are subject to garbage
285     //   collection.
286     // - Groups members are retained or discarded as a unit.
287     if (!(sec->flags & SHF_ALLOC)) {
288       if (!isStaticRelSecType(sec->type) && !sec->nextInSectionGroup) {
289         sec->markLive();
290         for (InputSection *isec : sec->dependentSections)
291           isec->markLive();
292       }
293     }
294 
295     // Preserve special sections and those which are specified in linker
296     // script KEEP command.
297     if (isReserved(sec) || ctx.script->shouldKeep(sec)) {
298       enqueue(sec, 0);
299     } else if ((!ctx.arg.zStartStopGC || sec->name.starts_with("__libc_")) &&
300                isValidCIdentifier(sec->name)) {
301       // As a workaround for glibc libc.a before 2.34
302       // (https://sourceware.org/PR27492), retain __libc_atexit and similar
303       // sections regardless of zStartStopGC.
304       cNamedSections[ctx.saver.save("__start_" + sec->name)].push_back(sec);
305       cNamedSections[ctx.saver.save("__stop_" + sec->name)].push_back(sec);
306     }
307   }
308 
309   mark();
310 }
311 
312 template <class ELFT> void MarkLive<ELFT>::mark() {
313   // Mark all reachable sections.
314   while (!queue.empty()) {
315     InputSectionBase &sec = *queue.pop_back_val();
316 
317     const RelsOrRelas<ELFT> rels = sec.template relsOrRelas<ELFT>();
318     for (const typename ELFT::Rel &rel : rels.rels)
319       resolveReloc(sec, rel, false);
320     for (const typename ELFT::Rela &rel : rels.relas)
321       resolveReloc(sec, rel, false);
322     for (const typename ELFT::Crel &rel : rels.crels)
323       resolveReloc(sec, rel, false);
324 
325     for (InputSectionBase *isec : sec.dependentSections)
326       enqueue(isec, 0);
327 
328     // Mark the next group member.
329     if (sec.nextInSectionGroup)
330       enqueue(sec.nextInSectionGroup, 0);
331   }
332 }
333 
334 // Move the sections for some symbols to the main partition, specifically ifuncs
335 // (because they can result in an IRELATIVE being added to the main partition's
336 // GOT, which means that the ifunc must be available when the main partition is
337 // loaded) and TLS symbols (because we only know how to correctly process TLS
338 // relocations for the main partition).
339 //
340 // We also need to move sections whose names are C identifiers that are referred
341 // to from __start_/__stop_ symbols because there will only be one set of
342 // symbols for the whole program.
343 template <class ELFT> void MarkLive<ELFT>::moveToMain() {
344   for (ELFFileBase *file : ctx.objectFiles)
345     for (Symbol *s : file->getSymbols())
346       if (auto *d = dyn_cast<Defined>(s))
347         if ((d->type == STT_GNU_IFUNC || d->type == STT_TLS) && d->section &&
348             d->section->isLive())
349           markSymbol(s);
350 
351   for (InputSectionBase *sec : ctx.inputSections) {
352     if (!sec->isLive() || !isValidCIdentifier(sec->name))
353       continue;
354     if (ctx.symtab->find(("__start_" + sec->name).str()) ||
355         ctx.symtab->find(("__stop_" + sec->name).str()))
356       enqueue(sec, 0);
357   }
358 
359   mark();
360 }
361 
362 // Before calling this function, Live bits are off for all
363 // input sections. This function make some or all of them on
364 // so that they are emitted to the output file.
365 template <class ELFT> void elf::markLive(Ctx &ctx) {
366   llvm::TimeTraceScope timeScope("markLive");
367   // If --gc-sections is not given, retain all input sections.
368   if (!ctx.arg.gcSections) {
369     // If a DSO defines a symbol referenced in a regular object, it is needed.
370     for (Symbol *sym : ctx.symtab->getSymbols())
371       if (auto *s = dyn_cast<SharedSymbol>(sym))
372         if (s->isUsedInRegularObj && !s->isWeak())
373           cast<SharedFile>(s->file)->isNeeded = true;
374     return;
375   }
376 
377   for (InputSectionBase *sec : ctx.inputSections)
378     sec->markDead();
379 
380   // Follow the graph to mark all live sections.
381   for (unsigned i = 1, e = ctx.partitions.size(); i <= e; ++i)
382     MarkLive<ELFT>(ctx, i).run();
383 
384   // If we have multiple partitions, some sections need to live in the main
385   // partition even if they were allocated to a loadable partition. Move them
386   // there now.
387   if (ctx.partitions.size() != 1)
388     MarkLive<ELFT>(ctx, 1).moveToMain();
389 
390   // Report garbage-collected sections.
391   if (ctx.arg.printGcSections)
392     for (InputSectionBase *sec : ctx.inputSections)
393       if (!sec->isLive())
394         Msg(ctx) << "removing unused section " << sec;
395 }
396 
397 template void elf::markLive<ELF32LE>(Ctx &);
398 template void elf::markLive<ELF32BE>(Ctx &);
399 template void elf::markLive<ELF64LE>(Ctx &);
400 template void elf::markLive<ELF64BE>(Ctx &);
401