1 //===- MarkLive.cpp -------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements --gc-sections, which is a feature to remove unused 10 // chunks from the output. Unused chunks are those that are not reachable from 11 // known root symbols or chunks. This feature is implemented as a mark-sweep 12 // garbage collector. 13 // 14 // Here's how it works. Each InputChunk has a "Live" bit. The bit is off by 15 // default. Starting with the GC-roots, visit all reachable chunks and set their 16 // Live bits. The Writer will then ignore chunks whose Live bits are off, so 17 // that such chunk are not appear in the output. 18 // 19 //===----------------------------------------------------------------------===// 20 21 #include "MarkLive.h" 22 #include "Config.h" 23 #include "InputChunks.h" 24 #include "InputElement.h" 25 #include "SymbolTable.h" 26 #include "Symbols.h" 27 28 #define DEBUG_TYPE "lld" 29 30 using namespace llvm; 31 using namespace llvm::wasm; 32 33 namespace lld::wasm { 34 35 namespace { 36 37 class MarkLive { 38 public: 39 void run(); 40 41 private: 42 void enqueue(Symbol *sym); 43 void enqueue(InputChunk *chunk); 44 void enqueueInitFunctions(const ObjFile *sym); 45 void enqueueRetainedSegments(const ObjFile *file); 46 void mark(); 47 bool isCallCtorsLive(); 48 49 // A list of chunks to visit. 50 SmallVector<InputChunk *, 256> queue; 51 }; 52 53 } // namespace 54 55 void MarkLive::enqueue(Symbol *sym) { 56 if (!sym || sym->isLive()) 57 return; 58 LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n"); 59 60 InputFile *file = sym->getFile(); 61 bool markImplicitDeps = file && !file->isLive() && sym->isDefined(); 62 63 sym->markLive(); 64 65 if (markImplicitDeps) { 66 if (auto obj = dyn_cast<ObjFile>(file)) { 67 // Mark as live the ctor functions in the object that defines this symbol. 68 // The ctor functions are all referenced by the synthetic callCtors 69 // function. However, this function does not contain relocations so we 70 // have to manually mark the ctors as live. 71 enqueueInitFunctions(obj); 72 // Mark retained segments in the object that defines this symbol live. 73 enqueueRetainedSegments(obj); 74 } 75 } 76 77 if (InputChunk *chunk = sym->getChunk()) 78 queue.push_back(chunk); 79 } 80 81 void MarkLive::enqueue(InputChunk *chunk) { 82 LLVM_DEBUG(dbgs() << "markLive: " << toString(chunk) << "\n"); 83 chunk->live = true; 84 queue.push_back(chunk); 85 } 86 87 // The ctor functions are all referenced by the synthetic callCtors 88 // function. However, this function does not contain relocations so we 89 // have to manually mark the ctors as live. 90 void MarkLive::enqueueInitFunctions(const ObjFile *obj) { 91 const WasmLinkingData &l = obj->getWasmObj()->linkingData(); 92 for (const WasmInitFunc &f : l.InitFunctions) { 93 auto *initSym = obj->getFunctionSymbol(f.Symbol); 94 if (!initSym->isDiscarded()) 95 enqueue(initSym); 96 } 97 } 98 99 // Mark segments flagged by segment-level no-strip. Segment-level no-strip is 100 // usually used to retain segments without having symbol table entry. 101 void MarkLive::enqueueRetainedSegments(const ObjFile *file) { 102 for (InputChunk *chunk : file->segments) 103 if (chunk->isRetained()) 104 enqueue(chunk); 105 } 106 107 void MarkLive::run() { 108 // Add GC root symbols. 109 if (!ctx.arg.entry.empty()) 110 enqueue(symtab->find(ctx.arg.entry)); 111 112 // We need to preserve any no-strip or exported symbol 113 for (Symbol *sym : symtab->symbols()) 114 if (sym->isNoStrip() || sym->isExported()) 115 enqueue(sym); 116 117 if (WasmSym::callDtors) 118 enqueue(WasmSym::callDtors); 119 120 for (const ObjFile *obj : ctx.objectFiles) 121 if (obj->isLive()) { 122 // Enqueue constructors in objects explicitly live from the command-line. 123 enqueueInitFunctions(obj); 124 // Enqueue retained segments in objects explicitly live from the 125 // command-line. 126 enqueueRetainedSegments(obj); 127 } 128 129 mark(); 130 131 // If we have any non-discarded init functions, mark `__wasm_call_ctors` as 132 // live so that we assign it an index and call it. 133 if (isCallCtorsLive()) 134 WasmSym::callCtors->markLive(); 135 } 136 137 void MarkLive::mark() { 138 // Follow relocations to mark all reachable chunks. 139 while (!queue.empty()) { 140 InputChunk *c = queue.pop_back_val(); 141 142 for (const WasmRelocation reloc : c->getRelocations()) { 143 if (reloc.Type == R_WASM_TYPE_INDEX_LEB) 144 continue; 145 Symbol *sym = c->file->getSymbol(reloc.Index); 146 147 // If the function has been assigned the special index zero in the table, 148 // the relocation doesn't pull in the function body, since the function 149 // won't actually go in the table (the runtime will trap attempts to call 150 // that index, since we don't use it). A function with a table index of 151 // zero is only reachable via "call", not via "call_indirect". The stub 152 // functions used for weak-undefined symbols have this behaviour (compare 153 // equal to null pointer, only reachable via direct call). 154 if (reloc.Type == R_WASM_TABLE_INDEX_SLEB || 155 reloc.Type == R_WASM_TABLE_INDEX_SLEB64 || 156 reloc.Type == R_WASM_TABLE_INDEX_I32 || 157 reloc.Type == R_WASM_TABLE_INDEX_I64) { 158 auto *funcSym = cast<FunctionSymbol>(sym); 159 if (funcSym->isStub) 160 continue; 161 } 162 163 enqueue(sym); 164 } 165 } 166 } 167 168 void markLive() { 169 if (!ctx.arg.gcSections) 170 return; 171 172 LLVM_DEBUG(dbgs() << "markLive\n"); 173 174 MarkLive marker; 175 marker.run(); 176 177 // Report garbage-collected sections. 178 if (ctx.arg.printGcSections) { 179 for (const ObjFile *obj : ctx.objectFiles) { 180 for (InputChunk *c : obj->functions) 181 if (!c->live) 182 message("removing unused section " + toString(c)); 183 for (InputChunk *c : obj->segments) 184 if (!c->live) 185 message("removing unused section " + toString(c)); 186 for (InputGlobal *g : obj->globals) 187 if (!g->live) 188 message("removing unused section " + toString(g)); 189 for (InputTag *t : obj->tags) 190 if (!t->live) 191 message("removing unused section " + toString(t)); 192 for (InputTable *t : obj->tables) 193 if (!t->live) 194 message("removing unused section " + toString(t)); 195 } 196 for (InputChunk *c : ctx.syntheticFunctions) 197 if (!c->live) 198 message("removing unused section " + toString(c)); 199 for (InputGlobal *g : ctx.syntheticGlobals) 200 if (!g->live) 201 message("removing unused section " + toString(g)); 202 for (InputTable *t : ctx.syntheticTables) 203 if (!t->live) 204 message("removing unused section " + toString(t)); 205 } 206 } 207 208 bool MarkLive::isCallCtorsLive() { 209 // In a reloctable link, we don't call `__wasm_call_ctors`. 210 if (ctx.arg.relocatable) 211 return false; 212 213 // In Emscripten-style PIC, we call `__wasm_call_ctors` which calls 214 // `__wasm_apply_data_relocs`. 215 if (ctx.isPic) 216 return true; 217 218 // If there are any init functions, mark `__wasm_call_ctors` live so that 219 // it can call them. 220 for (const ObjFile *file : ctx.objectFiles) { 221 const WasmLinkingData &l = file->getWasmObj()->linkingData(); 222 for (const WasmInitFunc &f : l.InitFunctions) { 223 auto *sym = file->getFunctionSymbol(f.Symbol); 224 if (!sym->isDiscarded() && sym->isLive()) 225 return true; 226 } 227 } 228 229 return false; 230 } 231 232 } // namespace lld::wasm 233