xref: /llvm-project/lld/wasm/MarkLive.cpp (revision 3792b36234b6c87d728f0a905543e284bf961460)
1 //===- MarkLive.cpp -------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements --gc-sections, which is a feature to remove unused
10 // chunks from the output. Unused chunks are those that are not reachable from
11 // known root symbols or chunks. This feature is implemented as a mark-sweep
12 // garbage collector.
13 //
14 // Here's how it works. Each InputChunk has a "Live" bit. The bit is off by
15 // default. Starting with the GC-roots, visit all reachable chunks and set their
16 // Live bits. The Writer will then ignore chunks whose Live bits are off, so
17 // that such chunk are not appear in the output.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "MarkLive.h"
22 #include "Config.h"
23 #include "InputChunks.h"
24 #include "InputElement.h"
25 #include "SymbolTable.h"
26 #include "Symbols.h"
27 
28 #define DEBUG_TYPE "lld"
29 
30 using namespace llvm;
31 using namespace llvm::wasm;
32 
33 namespace lld::wasm {
34 
35 namespace {
36 
37 class MarkLive {
38 public:
39   void run();
40 
41 private:
42   void enqueue(Symbol *sym);
43   void enqueue(InputChunk *chunk);
44   void enqueueInitFunctions(const ObjFile *sym);
45   void enqueueRetainedSegments(const ObjFile *file);
46   void mark();
47   bool isCallCtorsLive();
48 
49   // A list of chunks to visit.
50   SmallVector<InputChunk *, 256> queue;
51 };
52 
53 } // namespace
54 
55 void MarkLive::enqueue(Symbol *sym) {
56   if (!sym || sym->isLive())
57     return;
58   LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
59 
60   InputFile *file = sym->getFile();
61   bool markImplicitDeps = file && !file->isLive() && sym->isDefined();
62 
63   sym->markLive();
64 
65   if (markImplicitDeps) {
66     if (auto obj = dyn_cast<ObjFile>(file)) {
67       // Mark as live the ctor functions in the object that defines this symbol.
68       // The ctor functions are all referenced by the synthetic callCtors
69       // function. However, this function does not contain relocations so we
70       // have to manually mark the ctors as live.
71       enqueueInitFunctions(obj);
72       // Mark retained segments in the object that defines this symbol live.
73       enqueueRetainedSegments(obj);
74     }
75   }
76 
77   if (InputChunk *chunk = sym->getChunk())
78     queue.push_back(chunk);
79 }
80 
81 void MarkLive::enqueue(InputChunk *chunk) {
82   LLVM_DEBUG(dbgs() << "markLive: " << toString(chunk) << "\n");
83   chunk->live = true;
84   queue.push_back(chunk);
85 }
86 
87 // The ctor functions are all referenced by the synthetic callCtors
88 // function.  However, this function does not contain relocations so we
89 // have to manually mark the ctors as live.
90 void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
91   const WasmLinkingData &l = obj->getWasmObj()->linkingData();
92   for (const WasmInitFunc &f : l.InitFunctions) {
93     auto *initSym = obj->getFunctionSymbol(f.Symbol);
94     if (!initSym->isDiscarded())
95       enqueue(initSym);
96   }
97 }
98 
99 // Mark segments flagged by segment-level no-strip. Segment-level no-strip is
100 // usually used to retain segments without having symbol table entry.
101 void MarkLive::enqueueRetainedSegments(const ObjFile *file) {
102   for (InputChunk *chunk : file->segments)
103     if (chunk->isRetained())
104       enqueue(chunk);
105 }
106 
107 void MarkLive::run() {
108   // Add GC root symbols.
109   if (!ctx.arg.entry.empty())
110     enqueue(symtab->find(ctx.arg.entry));
111 
112   // We need to preserve any no-strip or exported symbol
113   for (Symbol *sym : symtab->symbols())
114     if (sym->isNoStrip() || sym->isExported())
115       enqueue(sym);
116 
117   if (WasmSym::callDtors)
118     enqueue(WasmSym::callDtors);
119 
120   for (const ObjFile *obj : ctx.objectFiles)
121     if (obj->isLive()) {
122       // Enqueue constructors in objects explicitly live from the command-line.
123       enqueueInitFunctions(obj);
124       // Enqueue retained segments in objects explicitly live from the
125       // command-line.
126       enqueueRetainedSegments(obj);
127     }
128 
129   mark();
130 
131   // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
132   // live so that we assign it an index and call it.
133   if (isCallCtorsLive())
134     WasmSym::callCtors->markLive();
135 }
136 
137 void MarkLive::mark() {
138   // Follow relocations to mark all reachable chunks.
139   while (!queue.empty()) {
140     InputChunk *c = queue.pop_back_val();
141 
142     for (const WasmRelocation reloc : c->getRelocations()) {
143       if (reloc.Type == R_WASM_TYPE_INDEX_LEB)
144         continue;
145       Symbol *sym = c->file->getSymbol(reloc.Index);
146 
147       // If the function has been assigned the special index zero in the table,
148       // the relocation doesn't pull in the function body, since the function
149       // won't actually go in the table (the runtime will trap attempts to call
150       // that index, since we don't use it).  A function with a table index of
151       // zero is only reachable via "call", not via "call_indirect".  The stub
152       // functions used for weak-undefined symbols have this behaviour (compare
153       // equal to null pointer, only reachable via direct call).
154       if (reloc.Type == R_WASM_TABLE_INDEX_SLEB ||
155           reloc.Type == R_WASM_TABLE_INDEX_SLEB64 ||
156           reloc.Type == R_WASM_TABLE_INDEX_I32 ||
157           reloc.Type == R_WASM_TABLE_INDEX_I64) {
158         auto *funcSym = cast<FunctionSymbol>(sym);
159         if (funcSym->isStub)
160           continue;
161       }
162 
163       enqueue(sym);
164     }
165   }
166 }
167 
168 void markLive() {
169   if (!ctx.arg.gcSections)
170     return;
171 
172   LLVM_DEBUG(dbgs() << "markLive\n");
173 
174   MarkLive marker;
175   marker.run();
176 
177   // Report garbage-collected sections.
178   if (ctx.arg.printGcSections) {
179     for (const ObjFile *obj : ctx.objectFiles) {
180       for (InputChunk *c : obj->functions)
181         if (!c->live)
182           message("removing unused section " + toString(c));
183       for (InputChunk *c : obj->segments)
184         if (!c->live)
185           message("removing unused section " + toString(c));
186       for (InputGlobal *g : obj->globals)
187         if (!g->live)
188           message("removing unused section " + toString(g));
189       for (InputTag *t : obj->tags)
190         if (!t->live)
191           message("removing unused section " + toString(t));
192       for (InputTable *t : obj->tables)
193         if (!t->live)
194           message("removing unused section " + toString(t));
195     }
196     for (InputChunk *c : ctx.syntheticFunctions)
197       if (!c->live)
198         message("removing unused section " + toString(c));
199     for (InputGlobal *g : ctx.syntheticGlobals)
200       if (!g->live)
201         message("removing unused section " + toString(g));
202     for (InputTable *t : ctx.syntheticTables)
203       if (!t->live)
204         message("removing unused section " + toString(t));
205   }
206 }
207 
208 bool MarkLive::isCallCtorsLive() {
209   // In a reloctable link, we don't call `__wasm_call_ctors`.
210   if (ctx.arg.relocatable)
211     return false;
212 
213   // In Emscripten-style PIC, we call `__wasm_call_ctors` which calls
214   // `__wasm_apply_data_relocs`.
215   if (ctx.isPic)
216     return true;
217 
218   // If there are any init functions, mark `__wasm_call_ctors` live so that
219   // it can call them.
220   for (const ObjFile *file : ctx.objectFiles) {
221     const WasmLinkingData &l = file->getWasmObj()->linkingData();
222     for (const WasmInitFunc &f : l.InitFunctions) {
223       auto *sym = file->getFunctionSymbol(f.Symbol);
224       if (!sym->isDiscarded() && sym->isLive())
225         return true;
226     }
227   }
228 
229   return false;
230 }
231 
232 } // namespace lld::wasm
233