1 //===- BlockExtractor.cpp - Extracts blocks into their own functions ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass extracts the specified basic blocks from the module into their 10 // own functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/IR/Instructions.h" 17 #include "llvm/IR/Module.h" 18 #include "llvm/Pass.h" 19 #include "llvm/Support/CommandLine.h" 20 #include "llvm/Support/Debug.h" 21 #include "llvm/Support/MemoryBuffer.h" 22 #include "llvm/Transforms/IPO.h" 23 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 24 #include "llvm/Transforms/Utils/CodeExtractor.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "block-extractor" 29 30 STATISTIC(NumExtracted, "Number of basic blocks extracted"); 31 32 static cl::opt<std::string> BlockExtractorFile( 33 "extract-blocks-file", cl::value_desc("filename"), 34 cl::desc("A file containing list of basic blocks to extract"), cl::Hidden); 35 36 cl::opt<bool> BlockExtractorEraseFuncs("extract-blocks-erase-funcs", 37 cl::desc("Erase the existing functions"), 38 cl::Hidden); 39 namespace { 40 class BlockExtractor : public ModulePass { 41 SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks; 42 bool EraseFunctions; 43 /// Map a function name to groups of blocks. 44 SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4> 45 BlocksByName; 46 47 void init(const SmallVectorImpl<SmallVector<BasicBlock *, 16>> 48 &GroupsOfBlocksToExtract) { 49 for (const SmallVectorImpl<BasicBlock *> &GroupOfBlocks : 50 GroupsOfBlocksToExtract) { 51 SmallVector<BasicBlock *, 16> NewGroup; 52 NewGroup.append(GroupOfBlocks.begin(), GroupOfBlocks.end()); 53 GroupsOfBlocks.emplace_back(NewGroup); 54 } 55 if (!BlockExtractorFile.empty()) 56 loadFile(); 57 } 58 59 public: 60 static char ID; 61 BlockExtractor(const SmallVectorImpl<BasicBlock *> &BlocksToExtract, 62 bool EraseFunctions) 63 : ModulePass(ID), EraseFunctions(EraseFunctions) { 64 // We want one group per element of the input list. 65 SmallVector<SmallVector<BasicBlock *, 16>, 4> MassagedGroupsOfBlocks; 66 for (BasicBlock *BB : BlocksToExtract) { 67 SmallVector<BasicBlock *, 16> NewGroup; 68 NewGroup.push_back(BB); 69 MassagedGroupsOfBlocks.push_back(NewGroup); 70 } 71 init(MassagedGroupsOfBlocks); 72 } 73 74 BlockExtractor(const SmallVectorImpl<SmallVector<BasicBlock *, 16>> 75 &GroupsOfBlocksToExtract, 76 bool EraseFunctions) 77 : ModulePass(ID), EraseFunctions(EraseFunctions) { 78 init(GroupsOfBlocksToExtract); 79 } 80 81 BlockExtractor() : BlockExtractor(SmallVector<BasicBlock *, 0>(), false) {} 82 bool runOnModule(Module &M) override; 83 84 private: 85 void loadFile(); 86 void splitLandingPadPreds(Function &F); 87 }; 88 } // end anonymous namespace 89 90 char BlockExtractor::ID = 0; 91 INITIALIZE_PASS(BlockExtractor, "extract-blocks", 92 "Extract basic blocks from module", false, false) 93 94 ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractor(); } 95 ModulePass *llvm::createBlockExtractorPass( 96 const SmallVectorImpl<BasicBlock *> &BlocksToExtract, bool EraseFunctions) { 97 return new BlockExtractor(BlocksToExtract, EraseFunctions); 98 } 99 ModulePass *llvm::createBlockExtractorPass( 100 const SmallVectorImpl<SmallVector<BasicBlock *, 16>> 101 &GroupsOfBlocksToExtract, 102 bool EraseFunctions) { 103 return new BlockExtractor(GroupsOfBlocksToExtract, EraseFunctions); 104 } 105 106 /// Gets all of the blocks specified in the input file. 107 void BlockExtractor::loadFile() { 108 auto ErrOrBuf = MemoryBuffer::getFile(BlockExtractorFile); 109 if (ErrOrBuf.getError()) 110 report_fatal_error("BlockExtractor couldn't load the file."); 111 // Read the file. 112 auto &Buf = *ErrOrBuf; 113 SmallVector<StringRef, 16> Lines; 114 Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1, 115 /*KeepEmpty=*/false); 116 for (const auto &Line : Lines) { 117 SmallVector<StringRef, 4> LineSplit; 118 Line.split(LineSplit, ' ', /*MaxSplit=*/-1, 119 /*KeepEmpty=*/false); 120 if (LineSplit.empty()) 121 continue; 122 if (LineSplit.size()!=2) 123 report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'"); 124 SmallVector<StringRef, 4> BBNames; 125 LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1, 126 /*KeepEmpty=*/false); 127 if (BBNames.empty()) 128 report_fatal_error("Missing bbs name"); 129 BlocksByName.push_back({LineSplit[0], {BBNames.begin(), BBNames.end()}}); 130 } 131 } 132 133 /// Extracts the landing pads to make sure all of them have only one 134 /// predecessor. 135 void BlockExtractor::splitLandingPadPreds(Function &F) { 136 for (BasicBlock &BB : F) { 137 for (Instruction &I : BB) { 138 if (!isa<InvokeInst>(&I)) 139 continue; 140 InvokeInst *II = cast<InvokeInst>(&I); 141 BasicBlock *Parent = II->getParent(); 142 BasicBlock *LPad = II->getUnwindDest(); 143 144 // Look through the landing pad's predecessors. If one of them ends in an 145 // 'invoke', then we want to split the landing pad. 146 bool Split = false; 147 for (auto PredBB : predecessors(LPad)) { 148 if (PredBB->isLandingPad() && PredBB != Parent && 149 isa<InvokeInst>(Parent->getTerminator())) { 150 Split = true; 151 break; 152 } 153 } 154 155 if (!Split) 156 continue; 157 158 SmallVector<BasicBlock *, 2> NewBBs; 159 SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs); 160 } 161 } 162 } 163 164 bool BlockExtractor::runOnModule(Module &M) { 165 166 bool Changed = false; 167 168 // Get all the functions. 169 SmallVector<Function *, 4> Functions; 170 for (Function &F : M) { 171 splitLandingPadPreds(F); 172 Functions.push_back(&F); 173 } 174 175 // Get all the blocks specified in the input file. 176 unsigned NextGroupIdx = GroupsOfBlocks.size(); 177 GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size()); 178 for (const auto &BInfo : BlocksByName) { 179 Function *F = M.getFunction(BInfo.first); 180 if (!F) 181 report_fatal_error("Invalid function name specified in the input file"); 182 for (const auto &BBInfo : BInfo.second) { 183 auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) { 184 return BB.getName().equals(BBInfo); 185 }); 186 if (Res == F->end()) 187 report_fatal_error("Invalid block name specified in the input file"); 188 GroupsOfBlocks[NextGroupIdx].push_back(&*Res); 189 } 190 ++NextGroupIdx; 191 } 192 193 // Extract each group of basic blocks. 194 for (auto &BBs : GroupsOfBlocks) { 195 SmallVector<BasicBlock *, 32> BlocksToExtractVec; 196 for (BasicBlock *BB : BBs) { 197 // Check if the module contains BB. 198 if (BB->getParent()->getParent() != &M) 199 report_fatal_error("Invalid basic block"); 200 LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting " 201 << BB->getParent()->getName() << ":" << BB->getName() 202 << "\n"); 203 BlocksToExtractVec.push_back(BB); 204 if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) 205 BlocksToExtractVec.push_back(II->getUnwindDest()); 206 ++NumExtracted; 207 Changed = true; 208 } 209 CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); 210 Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC); 211 if (F) 212 LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() 213 << "' in: " << F->getName() << '\n'); 214 else 215 LLVM_DEBUG(dbgs() << "Failed to extract for group '" 216 << (*BBs.begin())->getName() << "'\n"); 217 } 218 219 // Erase the functions. 220 if (EraseFunctions || BlockExtractorEraseFuncs) { 221 for (Function *F : Functions) { 222 LLVM_DEBUG(dbgs() << "BlockExtractor: Trying to delete " << F->getName() 223 << "\n"); 224 F->deleteBody(); 225 } 226 // Set linkage as ExternalLinkage to avoid erasing unreachable functions. 227 for (Function &F : M) 228 F.setLinkage(GlobalValue::ExternalLinkage); 229 Changed = true; 230 } 231 232 return Changed; 233 } 234