1 //===- llvm-extract.cpp - LLVM function extraction utility ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This utility changes the input module to only contain a single function, 11 // which is primarily used for debugging transformations. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/SetVector.h" 16 #include "llvm/ADT/SmallPtrSet.h" 17 #include "llvm/Bitcode/BitcodeWriterPass.h" 18 #include "llvm/IR/DataLayout.h" 19 #include "llvm/IR/IRPrintingPasses.h" 20 #include "llvm/IR/Instructions.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/IR/LegacyPassManager.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/IRReader/IRReader.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/Error.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/ManagedStatic.h" 29 #include "llvm/Support/PrettyStackTrace.h" 30 #include "llvm/Support/Regex.h" 31 #include "llvm/Support/Signals.h" 32 #include "llvm/Support/SourceMgr.h" 33 #include "llvm/Support/SystemUtils.h" 34 #include "llvm/Support/ToolOutputFile.h" 35 #include "llvm/Transforms/IPO.h" 36 #include <memory> 37 using namespace llvm; 38 39 // InputFilename - The filename to read from. 40 static cl::opt<std::string> 41 InputFilename(cl::Positional, cl::desc("<input bitcode file>"), 42 cl::init("-"), cl::value_desc("filename")); 43 44 static cl::opt<std::string> 45 OutputFilename("o", cl::desc("Specify output filename"), 46 cl::value_desc("filename"), cl::init("-")); 47 48 static cl::opt<bool> 49 Force("f", cl::desc("Enable binary output on terminals")); 50 51 static cl::opt<bool> 52 DeleteFn("delete", cl::desc("Delete specified Globals from Module")); 53 54 static cl::opt<bool> 55 Recursive("recursive", 56 cl::desc("Recursively extract all called functions")); 57 58 // ExtractFuncs - The functions to extract from the module. 59 static cl::list<std::string> 60 ExtractFuncs("func", cl::desc("Specify function to extract"), 61 cl::ZeroOrMore, cl::value_desc("function")); 62 63 // ExtractRegExpFuncs - The functions, matched via regular expression, to 64 // extract from the module. 65 static cl::list<std::string> 66 ExtractRegExpFuncs("rfunc", cl::desc("Specify function(s) to extract using a " 67 "regular expression"), 68 cl::ZeroOrMore, cl::value_desc("rfunction")); 69 70 // ExtractAlias - The alias to extract from the module. 71 static cl::list<std::string> 72 ExtractAliases("alias", cl::desc("Specify alias to extract"), 73 cl::ZeroOrMore, cl::value_desc("alias")); 74 75 76 // ExtractRegExpAliases - The aliases, matched via regular expression, to 77 // extract from the module. 78 static cl::list<std::string> 79 ExtractRegExpAliases("ralias", cl::desc("Specify alias(es) to extract using a " 80 "regular expression"), 81 cl::ZeroOrMore, cl::value_desc("ralias")); 82 83 // ExtractGlobals - The globals to extract from the module. 84 static cl::list<std::string> 85 ExtractGlobals("glob", cl::desc("Specify global to extract"), 86 cl::ZeroOrMore, cl::value_desc("global")); 87 88 // ExtractRegExpGlobals - The globals, matched via regular expression, to 89 // extract from the module... 90 static cl::list<std::string> 91 ExtractRegExpGlobals("rglob", cl::desc("Specify global(s) to extract using a " 92 "regular expression"), 93 cl::ZeroOrMore, cl::value_desc("rglobal")); 94 95 static cl::opt<bool> 96 OutputAssembly("S", 97 cl::desc("Write output as LLVM assembly"), cl::Hidden); 98 99 static cl::opt<bool> PreserveBitcodeUseListOrder( 100 "preserve-bc-uselistorder", 101 cl::desc("Preserve use-list order when writing LLVM bitcode."), 102 cl::init(true), cl::Hidden); 103 104 static cl::opt<bool> PreserveAssemblyUseListOrder( 105 "preserve-ll-uselistorder", 106 cl::desc("Preserve use-list order when writing LLVM assembly."), 107 cl::init(false), cl::Hidden); 108 109 int main(int argc, char **argv) { 110 // Print a stack trace if we signal out. 111 sys::PrintStackTraceOnErrorSignal(argv[0]); 112 PrettyStackTraceProgram X(argc, argv); 113 114 LLVMContext Context; 115 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. 116 cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n"); 117 118 // Use lazy loading, since we only care about selected global values. 119 SMDiagnostic Err; 120 std::unique_ptr<Module> M = getLazyIRFileModule(InputFilename, Err, Context); 121 122 if (!M.get()) { 123 Err.print(argv[0], errs()); 124 return 1; 125 } 126 127 // Use SetVector to avoid duplicates. 128 SetVector<GlobalValue *> GVs; 129 130 // Figure out which aliases we should extract. 131 for (size_t i = 0, e = ExtractAliases.size(); i != e; ++i) { 132 GlobalAlias *GA = M->getNamedAlias(ExtractAliases[i]); 133 if (!GA) { 134 errs() << argv[0] << ": program doesn't contain alias named '" 135 << ExtractAliases[i] << "'!\n"; 136 return 1; 137 } 138 GVs.insert(GA); 139 } 140 141 // Extract aliases via regular expression matching. 142 for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) { 143 std::string Error; 144 Regex RegEx(ExtractRegExpAliases[i]); 145 if (!RegEx.isValid(Error)) { 146 errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' " 147 "invalid regex: " << Error; 148 } 149 bool match = false; 150 for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end(); 151 GA != E; GA++) { 152 if (RegEx.match(GA->getName())) { 153 GVs.insert(&*GA); 154 match = true; 155 } 156 } 157 if (!match) { 158 errs() << argv[0] << ": program doesn't contain global named '" 159 << ExtractRegExpAliases[i] << "'!\n"; 160 return 1; 161 } 162 } 163 164 // Figure out which globals we should extract. 165 for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) { 166 GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]); 167 if (!GV) { 168 errs() << argv[0] << ": program doesn't contain global named '" 169 << ExtractGlobals[i] << "'!\n"; 170 return 1; 171 } 172 GVs.insert(GV); 173 } 174 175 // Extract globals via regular expression matching. 176 for (size_t i = 0, e = ExtractRegExpGlobals.size(); i != e; ++i) { 177 std::string Error; 178 Regex RegEx(ExtractRegExpGlobals[i]); 179 if (!RegEx.isValid(Error)) { 180 errs() << argv[0] << ": '" << ExtractRegExpGlobals[i] << "' " 181 "invalid regex: " << Error; 182 } 183 bool match = false; 184 for (auto &GV : M->globals()) { 185 if (RegEx.match(GV.getName())) { 186 GVs.insert(&GV); 187 match = true; 188 } 189 } 190 if (!match) { 191 errs() << argv[0] << ": program doesn't contain global named '" 192 << ExtractRegExpGlobals[i] << "'!\n"; 193 return 1; 194 } 195 } 196 197 // Figure out which functions we should extract. 198 for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) { 199 GlobalValue *GV = M->getFunction(ExtractFuncs[i]); 200 if (!GV) { 201 errs() << argv[0] << ": program doesn't contain function named '" 202 << ExtractFuncs[i] << "'!\n"; 203 return 1; 204 } 205 GVs.insert(GV); 206 } 207 // Extract functions via regular expression matching. 208 for (size_t i = 0, e = ExtractRegExpFuncs.size(); i != e; ++i) { 209 std::string Error; 210 StringRef RegExStr = ExtractRegExpFuncs[i]; 211 Regex RegEx(RegExStr); 212 if (!RegEx.isValid(Error)) { 213 errs() << argv[0] << ": '" << ExtractRegExpFuncs[i] << "' " 214 "invalid regex: " << Error; 215 } 216 bool match = false; 217 for (Module::iterator F = M->begin(), E = M->end(); F != E; 218 F++) { 219 if (RegEx.match(F->getName())) { 220 GVs.insert(&*F); 221 match = true; 222 } 223 } 224 if (!match) { 225 errs() << argv[0] << ": program doesn't contain global named '" 226 << ExtractRegExpFuncs[i] << "'!\n"; 227 return 1; 228 } 229 } 230 231 // Use *argv instead of argv[0] to work around a wrong GCC warning. 232 ExitOnError ExitOnErr(std::string(*argv) + ": error reading input: "); 233 234 if (Recursive) { 235 std::vector<llvm::Function *> Workqueue; 236 for (GlobalValue *GV : GVs) { 237 if (auto *F = dyn_cast<Function>(GV)) { 238 Workqueue.push_back(F); 239 } 240 } 241 while (!Workqueue.empty()) { 242 Function *F = &*Workqueue.back(); 243 Workqueue.pop_back(); 244 ExitOnErr(F->materialize()); 245 for (auto &BB : *F) { 246 for (auto &I : BB) { 247 auto *CI = dyn_cast<CallInst>(&I); 248 if (!CI) 249 continue; 250 Function *CF = CI->getCalledFunction(); 251 if (!CF) 252 continue; 253 if (CF->isDeclaration() || GVs.count(CF)) 254 continue; 255 GVs.insert(CF); 256 Workqueue.push_back(CF); 257 } 258 } 259 } 260 } 261 262 auto Materialize = [&](GlobalValue &GV) { ExitOnErr(GV.materialize()); }; 263 264 // Materialize requisite global values. 265 if (!DeleteFn) { 266 for (size_t i = 0, e = GVs.size(); i != e; ++i) 267 Materialize(*GVs[i]); 268 } else { 269 // Deleting. Materialize every GV that's *not* in GVs. 270 SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end()); 271 for (auto &F : *M) { 272 if (!GVSet.count(&F)) 273 Materialize(F); 274 } 275 } 276 277 { 278 std::vector<GlobalValue *> Gvs(GVs.begin(), GVs.end()); 279 legacy::PassManager Extract; 280 Extract.add(createGVExtractionPass(Gvs, DeleteFn)); 281 Extract.run(*M); 282 283 // Now that we have all the GVs we want, mark the module as fully 284 // materialized. 285 // FIXME: should the GVExtractionPass handle this? 286 ExitOnErr(M->materializeAll()); 287 } 288 289 // In addition to deleting all other functions, we also want to spiff it 290 // up a little bit. Do this now. 291 legacy::PassManager Passes; 292 293 if (!DeleteFn) 294 Passes.add(createGlobalDCEPass()); // Delete unreachable globals 295 Passes.add(createStripDeadDebugInfoPass()); // Remove dead debug info 296 Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls 297 298 std::error_code EC; 299 tool_output_file Out(OutputFilename, EC, sys::fs::F_None); 300 if (EC) { 301 errs() << EC.message() << '\n'; 302 return 1; 303 } 304 305 if (OutputAssembly) 306 Passes.add( 307 createPrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder)); 308 else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true)) 309 Passes.add(createBitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder)); 310 311 Passes.run(*M.get()); 312 313 // Declare success. 314 Out.keep(); 315 316 return 0; 317 } 318