1 //===- llvm-extract.cpp - LLVM function extraction utility ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This utility changes the input module to only contain a single function, 11 // which is primarily used for debugging transformations. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/SetVector.h" 16 #include "llvm/ADT/SmallPtrSet.h" 17 #include "llvm/Bitcode/BitcodeWriterPass.h" 18 #include "llvm/IR/DataLayout.h" 19 #include "llvm/IR/IRPrintingPasses.h" 20 #include "llvm/IR/Instructions.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/IR/LegacyPassManager.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/IRReader/IRReader.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/Error.h" 27 #include "llvm/Support/FileSystem.h" 28 #include "llvm/Support/ManagedStatic.h" 29 #include "llvm/Support/PrettyStackTrace.h" 30 #include "llvm/Support/Regex.h" 31 #include "llvm/Support/Signals.h" 32 #include "llvm/Support/SourceMgr.h" 33 #include "llvm/Support/SystemUtils.h" 34 #include "llvm/Support/ToolOutputFile.h" 35 #include "llvm/Transforms/IPO.h" 36 #include <memory> 37 using namespace llvm; 38 39 // InputFilename - The filename to read from. 40 static cl::opt<std::string> 41 InputFilename(cl::Positional, cl::desc("<input bitcode file>"), 42 cl::init("-"), cl::value_desc("filename")); 43 44 static cl::opt<std::string> 45 OutputFilename("o", cl::desc("Specify output filename"), 46 cl::value_desc("filename"), cl::init("-")); 47 48 static cl::opt<bool> 49 Force("f", cl::desc("Enable binary output on terminals")); 50 51 static cl::opt<bool> 52 DeleteFn("delete", cl::desc("Delete specified Globals from Module")); 53 54 static cl::opt<bool> 55 Recursive("recursive", 56 cl::desc("Recursively extract all called functions")); 57 58 // ExtractFuncs - The functions to extract from the module. 59 static cl::list<std::string> 60 ExtractFuncs("func", cl::desc("Specify function to extract"), 61 cl::ZeroOrMore, cl::value_desc("function")); 62 63 // ExtractRegExpFuncs - The functions, matched via regular expression, to 64 // extract from the module. 65 static cl::list<std::string> 66 ExtractRegExpFuncs("rfunc", cl::desc("Specify function(s) to extract using a " 67 "regular expression"), 68 cl::ZeroOrMore, cl::value_desc("rfunction")); 69 70 // ExtractBlocks - The blocks to extract from the module. 71 static cl::list<std::string> 72 ExtractBlocks("bb", 73 cl::desc("Specify <function, basic block> pairs to extract"), 74 cl::ZeroOrMore, cl::value_desc("function:bb")); 75 76 // ExtractAlias - The alias to extract from the module. 77 static cl::list<std::string> 78 ExtractAliases("alias", cl::desc("Specify alias to extract"), 79 cl::ZeroOrMore, cl::value_desc("alias")); 80 81 82 // ExtractRegExpAliases - The aliases, matched via regular expression, to 83 // extract from the module. 84 static cl::list<std::string> 85 ExtractRegExpAliases("ralias", cl::desc("Specify alias(es) to extract using a " 86 "regular expression"), 87 cl::ZeroOrMore, cl::value_desc("ralias")); 88 89 // ExtractGlobals - The globals to extract from the module. 90 static cl::list<std::string> 91 ExtractGlobals("glob", cl::desc("Specify global to extract"), 92 cl::ZeroOrMore, cl::value_desc("global")); 93 94 // ExtractRegExpGlobals - The globals, matched via regular expression, to 95 // extract from the module... 96 static cl::list<std::string> 97 ExtractRegExpGlobals("rglob", cl::desc("Specify global(s) to extract using a " 98 "regular expression"), 99 cl::ZeroOrMore, cl::value_desc("rglobal")); 100 101 static cl::opt<bool> 102 OutputAssembly("S", 103 cl::desc("Write output as LLVM assembly"), cl::Hidden); 104 105 static cl::opt<bool> PreserveBitcodeUseListOrder( 106 "preserve-bc-uselistorder", 107 cl::desc("Preserve use-list order when writing LLVM bitcode."), 108 cl::init(true), cl::Hidden); 109 110 static cl::opt<bool> PreserveAssemblyUseListOrder( 111 "preserve-ll-uselistorder", 112 cl::desc("Preserve use-list order when writing LLVM assembly."), 113 cl::init(false), cl::Hidden); 114 115 int main(int argc, char **argv) { 116 // Print a stack trace if we signal out. 117 sys::PrintStackTraceOnErrorSignal(argv[0]); 118 PrettyStackTraceProgram X(argc, argv); 119 120 LLVMContext Context; 121 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. 122 cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n"); 123 124 // Use lazy loading, since we only care about selected global values. 125 SMDiagnostic Err; 126 std::unique_ptr<Module> M = getLazyIRFileModule(InputFilename, Err, Context); 127 128 if (!M.get()) { 129 Err.print(argv[0], errs()); 130 return 1; 131 } 132 133 // Use SetVector to avoid duplicates. 134 SetVector<GlobalValue *> GVs; 135 136 // Figure out which aliases we should extract. 137 for (size_t i = 0, e = ExtractAliases.size(); i != e; ++i) { 138 GlobalAlias *GA = M->getNamedAlias(ExtractAliases[i]); 139 if (!GA) { 140 errs() << argv[0] << ": program doesn't contain alias named '" 141 << ExtractAliases[i] << "'!\n"; 142 return 1; 143 } 144 GVs.insert(GA); 145 } 146 147 // Extract aliases via regular expression matching. 148 for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) { 149 std::string Error; 150 Regex RegEx(ExtractRegExpAliases[i]); 151 if (!RegEx.isValid(Error)) { 152 errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' " 153 "invalid regex: " << Error; 154 } 155 bool match = false; 156 for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end(); 157 GA != E; GA++) { 158 if (RegEx.match(GA->getName())) { 159 GVs.insert(&*GA); 160 match = true; 161 } 162 } 163 if (!match) { 164 errs() << argv[0] << ": program doesn't contain global named '" 165 << ExtractRegExpAliases[i] << "'!\n"; 166 return 1; 167 } 168 } 169 170 // Figure out which globals we should extract. 171 for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) { 172 GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]); 173 if (!GV) { 174 errs() << argv[0] << ": program doesn't contain global named '" 175 << ExtractGlobals[i] << "'!\n"; 176 return 1; 177 } 178 GVs.insert(GV); 179 } 180 181 // Extract globals via regular expression matching. 182 for (size_t i = 0, e = ExtractRegExpGlobals.size(); i != e; ++i) { 183 std::string Error; 184 Regex RegEx(ExtractRegExpGlobals[i]); 185 if (!RegEx.isValid(Error)) { 186 errs() << argv[0] << ": '" << ExtractRegExpGlobals[i] << "' " 187 "invalid regex: " << Error; 188 } 189 bool match = false; 190 for (auto &GV : M->globals()) { 191 if (RegEx.match(GV.getName())) { 192 GVs.insert(&GV); 193 match = true; 194 } 195 } 196 if (!match) { 197 errs() << argv[0] << ": program doesn't contain global named '" 198 << ExtractRegExpGlobals[i] << "'!\n"; 199 return 1; 200 } 201 } 202 203 // Figure out which functions we should extract. 204 for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) { 205 GlobalValue *GV = M->getFunction(ExtractFuncs[i]); 206 if (!GV) { 207 errs() << argv[0] << ": program doesn't contain function named '" 208 << ExtractFuncs[i] << "'!\n"; 209 return 1; 210 } 211 GVs.insert(GV); 212 } 213 // Extract functions via regular expression matching. 214 for (size_t i = 0, e = ExtractRegExpFuncs.size(); i != e; ++i) { 215 std::string Error; 216 StringRef RegExStr = ExtractRegExpFuncs[i]; 217 Regex RegEx(RegExStr); 218 if (!RegEx.isValid(Error)) { 219 errs() << argv[0] << ": '" << ExtractRegExpFuncs[i] << "' " 220 "invalid regex: " << Error; 221 } 222 bool match = false; 223 for (Module::iterator F = M->begin(), E = M->end(); F != E; 224 F++) { 225 if (RegEx.match(F->getName())) { 226 GVs.insert(&*F); 227 match = true; 228 } 229 } 230 if (!match) { 231 errs() << argv[0] << ": program doesn't contain global named '" 232 << ExtractRegExpFuncs[i] << "'!\n"; 233 return 1; 234 } 235 } 236 237 // Figure out which BasicBlocks we should extract. 238 SmallVector<BasicBlock *, 4> BBs; 239 for (StringRef StrPair : ExtractBlocks) { 240 auto BBInfo = StrPair.split(':'); 241 // Get the function. 242 Function *F = M->getFunction(BBInfo.first); 243 if (!F) { 244 errs() << argv[0] << ": program doesn't contain a function named '" 245 << BBInfo.first << "'!\n"; 246 return 1; 247 } 248 // Do not materialize this function. 249 GVs.insert(F); 250 // Get the basic block. 251 auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) { 252 return BB.getName().equals(BBInfo.second); 253 }); 254 if (Res == F->end()) { 255 errs() << argv[0] << ": function " << F->getName() 256 << " doesn't contain a basic block named '" << BBInfo.second 257 << "'!\n"; 258 return 1; 259 } 260 BBs.push_back(&*Res); 261 } 262 263 // Use *argv instead of argv[0] to work around a wrong GCC warning. 264 ExitOnError ExitOnErr(std::string(*argv) + ": error reading input: "); 265 266 if (Recursive) { 267 std::vector<llvm::Function *> Workqueue; 268 for (GlobalValue *GV : GVs) { 269 if (auto *F = dyn_cast<Function>(GV)) { 270 Workqueue.push_back(F); 271 } 272 } 273 while (!Workqueue.empty()) { 274 Function *F = &*Workqueue.back(); 275 Workqueue.pop_back(); 276 ExitOnErr(F->materialize()); 277 for (auto &BB : *F) { 278 for (auto &I : BB) { 279 auto *CI = dyn_cast<CallInst>(&I); 280 if (!CI) 281 continue; 282 Function *CF = CI->getCalledFunction(); 283 if (!CF) 284 continue; 285 if (CF->isDeclaration() || GVs.count(CF)) 286 continue; 287 GVs.insert(CF); 288 Workqueue.push_back(CF); 289 } 290 } 291 } 292 } 293 294 auto Materialize = [&](GlobalValue &GV) { ExitOnErr(GV.materialize()); }; 295 296 // Materialize requisite global values. 297 if (!DeleteFn) { 298 for (size_t i = 0, e = GVs.size(); i != e; ++i) 299 Materialize(*GVs[i]); 300 } else { 301 // Deleting. Materialize every GV that's *not* in GVs. 302 SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end()); 303 for (auto &F : *M) { 304 if (!GVSet.count(&F)) 305 Materialize(F); 306 } 307 } 308 309 { 310 std::vector<GlobalValue *> Gvs(GVs.begin(), GVs.end()); 311 legacy::PassManager Extract; 312 Extract.add(createGVExtractionPass(Gvs, DeleteFn)); 313 Extract.run(*M); 314 315 // Now that we have all the GVs we want, mark the module as fully 316 // materialized. 317 // FIXME: should the GVExtractionPass handle this? 318 ExitOnErr(M->materializeAll()); 319 } 320 321 // Extract the specified basic blocks from the module and erase the existing 322 // functions. 323 if (!ExtractBlocks.empty()) { 324 legacy::PassManager PM; 325 PM.add(createBlockExtractorPass(BBs, true)); 326 PM.run(*M); 327 } 328 329 // In addition to deleting all other functions, we also want to spiff it 330 // up a little bit. Do this now. 331 legacy::PassManager Passes; 332 333 if (!DeleteFn) 334 Passes.add(createGlobalDCEPass()); // Delete unreachable globals 335 Passes.add(createStripDeadDebugInfoPass()); // Remove dead debug info 336 Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls 337 338 std::error_code EC; 339 ToolOutputFile Out(OutputFilename, EC, sys::fs::F_None); 340 if (EC) { 341 errs() << EC.message() << '\n'; 342 return 1; 343 } 344 345 if (OutputAssembly) 346 Passes.add( 347 createPrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder)); 348 else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true)) 349 Passes.add(createBitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder)); 350 351 Passes.run(*M.get()); 352 353 // Declare success. 354 Out.keep(); 355 356 return 0; 357 } 358