1 //===- llvm-extract.cpp - LLVM function extraction utility ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This utility changes the input module to only contain a single function, 10 // which is primarily used for debugging transformations. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/SetVector.h" 15 #include "llvm/ADT/SmallPtrSet.h" 16 #include "llvm/Bitcode/BitcodeWriterPass.h" 17 #include "llvm/IR/DataLayout.h" 18 #include "llvm/IR/IRPrintingPasses.h" 19 #include "llvm/IR/Instructions.h" 20 #include "llvm/IR/LLVMContext.h" 21 #include "llvm/IR/LegacyPassManager.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/IRReader/IRReader.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/FileSystem.h" 27 #include "llvm/Support/InitLLVM.h" 28 #include "llvm/Support/Regex.h" 29 #include "llvm/Support/SourceMgr.h" 30 #include "llvm/Support/SystemUtils.h" 31 #include "llvm/Support/ToolOutputFile.h" 32 #include "llvm/Transforms/IPO.h" 33 #include <memory> 34 using namespace llvm; 35 36 // InputFilename - The filename to read from. 37 static cl::opt<std::string> 38 InputFilename(cl::Positional, cl::desc("<input bitcode file>"), 39 cl::init("-"), cl::value_desc("filename")); 40 41 static cl::opt<std::string> 42 OutputFilename("o", cl::desc("Specify output filename"), 43 cl::value_desc("filename"), cl::init("-")); 44 45 static cl::opt<bool> 46 Force("f", cl::desc("Enable binary output on terminals")); 47 48 static cl::opt<bool> 49 DeleteFn("delete", cl::desc("Delete specified Globals from Module")); 50 51 static cl::opt<bool> 52 Recursive("recursive", 53 cl::desc("Recursively extract all called functions")); 54 55 // ExtractFuncs - The functions to extract from the module. 56 static cl::list<std::string> 57 ExtractFuncs("func", cl::desc("Specify function to extract"), 58 cl::ZeroOrMore, cl::value_desc("function")); 59 60 // ExtractRegExpFuncs - The functions, matched via regular expression, to 61 // extract from the module. 62 static cl::list<std::string> 63 ExtractRegExpFuncs("rfunc", cl::desc("Specify function(s) to extract using a " 64 "regular expression"), 65 cl::ZeroOrMore, cl::value_desc("rfunction")); 66 67 // ExtractBlocks - The blocks to extract from the module. 68 static cl::list<std::string> 69 ExtractBlocks("bb", 70 cl::desc("Specify <function, basic block> pairs to extract"), 71 cl::ZeroOrMore, cl::value_desc("function:bb")); 72 73 // ExtractAlias - The alias to extract from the module. 74 static cl::list<std::string> 75 ExtractAliases("alias", cl::desc("Specify alias to extract"), 76 cl::ZeroOrMore, cl::value_desc("alias")); 77 78 79 // ExtractRegExpAliases - The aliases, matched via regular expression, to 80 // extract from the module. 81 static cl::list<std::string> 82 ExtractRegExpAliases("ralias", cl::desc("Specify alias(es) to extract using a " 83 "regular expression"), 84 cl::ZeroOrMore, cl::value_desc("ralias")); 85 86 // ExtractGlobals - The globals to extract from the module. 87 static cl::list<std::string> 88 ExtractGlobals("glob", cl::desc("Specify global to extract"), 89 cl::ZeroOrMore, cl::value_desc("global")); 90 91 // ExtractRegExpGlobals - The globals, matched via regular expression, to 92 // extract from the module... 93 static cl::list<std::string> 94 ExtractRegExpGlobals("rglob", cl::desc("Specify global(s) to extract using a " 95 "regular expression"), 96 cl::ZeroOrMore, cl::value_desc("rglobal")); 97 98 static cl::opt<bool> 99 OutputAssembly("S", 100 cl::desc("Write output as LLVM assembly"), cl::Hidden); 101 102 static cl::opt<bool> PreserveBitcodeUseListOrder( 103 "preserve-bc-uselistorder", 104 cl::desc("Preserve use-list order when writing LLVM bitcode."), 105 cl::init(true), cl::Hidden); 106 107 static cl::opt<bool> PreserveAssemblyUseListOrder( 108 "preserve-ll-uselistorder", 109 cl::desc("Preserve use-list order when writing LLVM assembly."), 110 cl::init(false), cl::Hidden); 111 112 int main(int argc, char **argv) { 113 InitLLVM X(argc, argv); 114 115 LLVMContext Context; 116 cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n"); 117 118 // Use lazy loading, since we only care about selected global values. 119 SMDiagnostic Err; 120 std::unique_ptr<Module> M = getLazyIRFileModule(InputFilename, Err, Context); 121 122 if (!M.get()) { 123 Err.print(argv[0], errs()); 124 return 1; 125 } 126 127 // Use SetVector to avoid duplicates. 128 SetVector<GlobalValue *> GVs; 129 130 // Figure out which aliases we should extract. 131 for (size_t i = 0, e = ExtractAliases.size(); i != e; ++i) { 132 GlobalAlias *GA = M->getNamedAlias(ExtractAliases[i]); 133 if (!GA) { 134 errs() << argv[0] << ": program doesn't contain alias named '" 135 << ExtractAliases[i] << "'!\n"; 136 return 1; 137 } 138 GVs.insert(GA); 139 } 140 141 // Extract aliases via regular expression matching. 142 for (size_t i = 0, e = ExtractRegExpAliases.size(); i != e; ++i) { 143 std::string Error; 144 Regex RegEx(ExtractRegExpAliases[i]); 145 if (!RegEx.isValid(Error)) { 146 errs() << argv[0] << ": '" << ExtractRegExpAliases[i] << "' " 147 "invalid regex: " << Error; 148 } 149 bool match = false; 150 for (Module::alias_iterator GA = M->alias_begin(), E = M->alias_end(); 151 GA != E; GA++) { 152 if (RegEx.match(GA->getName())) { 153 GVs.insert(&*GA); 154 match = true; 155 } 156 } 157 if (!match) { 158 errs() << argv[0] << ": program doesn't contain global named '" 159 << ExtractRegExpAliases[i] << "'!\n"; 160 return 1; 161 } 162 } 163 164 // Figure out which globals we should extract. 165 for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) { 166 GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]); 167 if (!GV) { 168 errs() << argv[0] << ": program doesn't contain global named '" 169 << ExtractGlobals[i] << "'!\n"; 170 return 1; 171 } 172 GVs.insert(GV); 173 } 174 175 // Extract globals via regular expression matching. 176 for (size_t i = 0, e = ExtractRegExpGlobals.size(); i != e; ++i) { 177 std::string Error; 178 Regex RegEx(ExtractRegExpGlobals[i]); 179 if (!RegEx.isValid(Error)) { 180 errs() << argv[0] << ": '" << ExtractRegExpGlobals[i] << "' " 181 "invalid regex: " << Error; 182 } 183 bool match = false; 184 for (auto &GV : M->globals()) { 185 if (RegEx.match(GV.getName())) { 186 GVs.insert(&GV); 187 match = true; 188 } 189 } 190 if (!match) { 191 errs() << argv[0] << ": program doesn't contain global named '" 192 << ExtractRegExpGlobals[i] << "'!\n"; 193 return 1; 194 } 195 } 196 197 // Figure out which functions we should extract. 198 for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) { 199 GlobalValue *GV = M->getFunction(ExtractFuncs[i]); 200 if (!GV) { 201 errs() << argv[0] << ": program doesn't contain function named '" 202 << ExtractFuncs[i] << "'!\n"; 203 return 1; 204 } 205 GVs.insert(GV); 206 } 207 // Extract functions via regular expression matching. 208 for (size_t i = 0, e = ExtractRegExpFuncs.size(); i != e; ++i) { 209 std::string Error; 210 StringRef RegExStr = ExtractRegExpFuncs[i]; 211 Regex RegEx(RegExStr); 212 if (!RegEx.isValid(Error)) { 213 errs() << argv[0] << ": '" << ExtractRegExpFuncs[i] << "' " 214 "invalid regex: " << Error; 215 } 216 bool match = false; 217 for (Module::iterator F = M->begin(), E = M->end(); F != E; 218 F++) { 219 if (RegEx.match(F->getName())) { 220 GVs.insert(&*F); 221 match = true; 222 } 223 } 224 if (!match) { 225 errs() << argv[0] << ": program doesn't contain global named '" 226 << ExtractRegExpFuncs[i] << "'!\n"; 227 return 1; 228 } 229 } 230 231 // Figure out which BasicBlocks we should extract. 232 SmallVector<BasicBlock *, 4> BBs; 233 for (StringRef StrPair : ExtractBlocks) { 234 auto BBInfo = StrPair.split(':'); 235 // Get the function. 236 Function *F = M->getFunction(BBInfo.first); 237 if (!F) { 238 errs() << argv[0] << ": program doesn't contain a function named '" 239 << BBInfo.first << "'!\n"; 240 return 1; 241 } 242 // Do not materialize this function. 243 GVs.insert(F); 244 // Get the basic block. 245 auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) { 246 return BB.getName().equals(BBInfo.second); 247 }); 248 if (Res == F->end()) { 249 errs() << argv[0] << ": function " << F->getName() 250 << " doesn't contain a basic block named '" << BBInfo.second 251 << "'!\n"; 252 return 1; 253 } 254 BBs.push_back(&*Res); 255 } 256 257 // Use *argv instead of argv[0] to work around a wrong GCC warning. 258 ExitOnError ExitOnErr(std::string(*argv) + ": error reading input: "); 259 260 if (Recursive) { 261 std::vector<llvm::Function *> Workqueue; 262 for (GlobalValue *GV : GVs) { 263 if (auto *F = dyn_cast<Function>(GV)) { 264 Workqueue.push_back(F); 265 } 266 } 267 while (!Workqueue.empty()) { 268 Function *F = &*Workqueue.back(); 269 Workqueue.pop_back(); 270 ExitOnErr(F->materialize()); 271 for (auto &BB : *F) { 272 for (auto &I : BB) { 273 CallBase *CB = dyn_cast<CallBase>(&I); 274 if (!CB) 275 continue; 276 Function *CF = CB->getCalledFunction(); 277 if (!CF) 278 continue; 279 if (CF->isDeclaration() || GVs.count(CF)) 280 continue; 281 GVs.insert(CF); 282 Workqueue.push_back(CF); 283 } 284 } 285 } 286 } 287 288 auto Materialize = [&](GlobalValue &GV) { ExitOnErr(GV.materialize()); }; 289 290 // Materialize requisite global values. 291 if (!DeleteFn) { 292 for (size_t i = 0, e = GVs.size(); i != e; ++i) 293 Materialize(*GVs[i]); 294 } else { 295 // Deleting. Materialize every GV that's *not* in GVs. 296 SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end()); 297 for (auto &F : *M) { 298 if (!GVSet.count(&F)) 299 Materialize(F); 300 } 301 } 302 303 { 304 std::vector<GlobalValue *> Gvs(GVs.begin(), GVs.end()); 305 legacy::PassManager Extract; 306 Extract.add(createGVExtractionPass(Gvs, DeleteFn)); 307 Extract.run(*M); 308 309 // Now that we have all the GVs we want, mark the module as fully 310 // materialized. 311 // FIXME: should the GVExtractionPass handle this? 312 ExitOnErr(M->materializeAll()); 313 } 314 315 // Extract the specified basic blocks from the module and erase the existing 316 // functions. 317 if (!ExtractBlocks.empty()) { 318 legacy::PassManager PM; 319 PM.add(createBlockExtractorPass(BBs, true)); 320 PM.run(*M); 321 } 322 323 // In addition to deleting all other functions, we also want to spiff it 324 // up a little bit. Do this now. 325 legacy::PassManager Passes; 326 327 if (!DeleteFn) 328 Passes.add(createGlobalDCEPass()); // Delete unreachable globals 329 Passes.add(createStripDeadDebugInfoPass()); // Remove dead debug info 330 Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls 331 332 std::error_code EC; 333 ToolOutputFile Out(OutputFilename, EC, sys::fs::F_None); 334 if (EC) { 335 errs() << EC.message() << '\n'; 336 return 1; 337 } 338 339 if (OutputAssembly) 340 Passes.add( 341 createPrintModulePass(Out.os(), "", PreserveAssemblyUseListOrder)); 342 else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true)) 343 Passes.add(createBitcodeWriterPass(Out.os(), PreserveBitcodeUseListOrder)); 344 345 Passes.run(*M.get()); 346 347 // Declare success. 348 Out.keep(); 349 350 return 0; 351 } 352