1 //===- JumpThreading.cpp - Thread control through conditional blocks ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the Jump Threading pass. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #define DEBUG_TYPE "jump-threading" 15 #include "llvm/Transforms/Scalar.h" 16 #include "llvm/IntrinsicInst.h" 17 #include "llvm/Pass.h" 18 #include "llvm/ADT/DenseMap.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 21 #include "llvm/Transforms/Utils/Local.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Compiler.h" 24 #include "llvm/Support/Debug.h" 25 using namespace llvm; 26 27 STATISTIC(NumThreads, "Number of jumps threaded"); 28 STATISTIC(NumFolds, "Number of terminators folded"); 29 30 static cl::opt<unsigned> 31 Threshold("jump-threading-threshold", 32 cl::desc("Max block size to duplicate for jump threading"), 33 cl::init(6), cl::Hidden); 34 35 namespace { 36 /// This pass performs 'jump threading', which looks at blocks that have 37 /// multiple predecessors and multiple successors. If one or more of the 38 /// predecessors of the block can be proven to always jump to one of the 39 /// successors, we forward the edge from the predecessor to the successor by 40 /// duplicating the contents of this block. 41 /// 42 /// An example of when this can occur is code like this: 43 /// 44 /// if () { ... 45 /// X = 4; 46 /// } 47 /// if (X < 3) { 48 /// 49 /// In this case, the unconditional branch at the end of the first if can be 50 /// revectored to the false side of the second if. 51 /// 52 class VISIBILITY_HIDDEN JumpThreading : public FunctionPass { 53 public: 54 static char ID; // Pass identification 55 JumpThreading() : FunctionPass((intptr_t)&ID) {} 56 57 bool runOnFunction(Function &F); 58 bool ThreadBlock(BasicBlock *BB); 59 void ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB); 60 }; 61 char JumpThreading::ID = 0; 62 RegisterPass<JumpThreading> X("jump-threading", "Jump Threading"); 63 } 64 65 // Public interface to the Jump Threading pass 66 FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } 67 68 /// runOnFunction - Top level algorithm. 69 /// 70 bool JumpThreading::runOnFunction(Function &F) { 71 DOUT << "Jump threading on function '" << F.getNameStart() << "'\n"; 72 73 bool AnotherIteration = true, EverChanged = false; 74 while (AnotherIteration) { 75 AnotherIteration = false; 76 bool Changed = false; 77 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 78 while (ThreadBlock(I)) 79 Changed = true; 80 AnotherIteration = Changed; 81 EverChanged |= Changed; 82 } 83 return EverChanged; 84 } 85 86 /// getJumpThreadDuplicationCost - Return the cost of duplicating this block to 87 /// thread across it. 88 static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { 89 BasicBlock::const_iterator I = BB->begin(); 90 /// Ignore PHI nodes, these will be flattened when duplication happens. 91 while (isa<PHINode>(*I)) ++I; 92 93 // Sum up the cost of each instruction until we get to the terminator. Don't 94 // include the terminator because the copy won't include it. 95 unsigned Size = 0; 96 for (; !isa<TerminatorInst>(I); ++I) { 97 // Debugger intrinsics don't incur code size. 98 if (isa<DbgInfoIntrinsic>(I)) continue; 99 100 // If this is a pointer->pointer bitcast, it is free. 101 if (isa<BitCastInst>(I) && isa<PointerType>(I->getType())) 102 continue; 103 104 // All other instructions count for at least one unit. 105 ++Size; 106 107 // Calls are more expensive. If they are non-intrinsic calls, we model them 108 // as having cost of 4. If they are a non-vector intrinsic, we model them 109 // as having cost of 2 total, and if they are a vector intrinsic, we model 110 // them as having cost 1. 111 if (const CallInst *CI = dyn_cast<CallInst>(I)) { 112 if (!isa<IntrinsicInst>(CI)) 113 Size += 3; 114 else if (isa<VectorType>(CI->getType())) 115 Size += 1; 116 } 117 } 118 119 // Threading through a switch statement is particularly profitable. If this 120 // block ends in a switch, decrease its cost to make it more likely to happen. 121 if (isa<SwitchInst>(I)) 122 Size = Size > 6 ? Size-6 : 0; 123 124 return Size; 125 } 126 127 128 /// ThreadBlock - If there are any predecessors whose control can be threaded 129 /// through to a successor, transform them now. 130 bool JumpThreading::ThreadBlock(BasicBlock *BB) { 131 // See if this block ends with a branch of switch. If so, see if the 132 // condition is a phi node. If so, and if an entry of the phi node is a 133 // constant, we can thread the block. 134 Value *Condition; 135 if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { 136 // Can't thread an unconditional jump. 137 if (BI->isUnconditional()) return false; 138 Condition = BI->getCondition(); 139 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) 140 Condition = SI->getCondition(); 141 else 142 return false; // Must be an invoke. 143 144 // If the terminator of this block is branching on a constant, simplify the 145 // terminator to an unconditional branch. This can occur do to threading in 146 // other blocks. 147 if (isa<ConstantInt>(Condition)) { 148 DOUT << " In block '" << BB->getNameStart() 149 << "' folding terminator: " << *BB->getTerminator(); 150 ++NumFolds; 151 ConstantFoldTerminator(BB); 152 return true; 153 } 154 155 // If there is only a single predecessor of this block, nothing to fold. 156 if (BB->getSinglePredecessor()) 157 return false; 158 159 // See if this is a phi node in the current block. 160 PHINode *PN = dyn_cast<PHINode>(Condition); 161 if (!PN || PN->getParent() != BB) return false; 162 163 // See if the phi node has any constant values. If so, we can determine where 164 // the corresponding predecessor will branch. 165 unsigned PredNo = ~0U; 166 ConstantInt *PredCst = 0; 167 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 168 if ((PredCst = dyn_cast<ConstantInt>(PN->getIncomingValue(i)))) { 169 PredNo = i; 170 break; 171 } 172 } 173 174 // If no incoming value has a constant, we don't know the destination of any 175 // predecessors. 176 if (PredNo == ~0U) 177 return false; 178 179 // See if the cost of duplicating this block is low enough. 180 unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); 181 if (JumpThreadCost > Threshold) { 182 DOUT << " Not threading BB '" << BB->getNameStart() 183 << "' - Cost is too high: " << JumpThreadCost << "\n"; 184 return false; 185 } 186 187 // If so, we can actually do this threading. Figure out which predecessor and 188 // which successor we are threading for. 189 BasicBlock *PredBB = PN->getIncomingBlock(PredNo); 190 BasicBlock *SuccBB; 191 if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) 192 SuccBB = BI->getSuccessor(PredCst == ConstantInt::getFalse()); 193 else { 194 SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); 195 SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst)); 196 } 197 198 // If there are multiple preds with the same incoming value for the PHI, 199 // factor them together so we get one block to thread for the whole group. 200 // This is important for things like "phi i1 [true, true, false, true, x]" 201 // where we only need to clone the block for the true blocks once. 202 SmallVector<BasicBlock*, 16> CommonPreds; 203 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 204 if (PN->getIncomingValue(i) == PredCst) 205 CommonPreds.push_back(PN->getIncomingBlock(i)); 206 if (CommonPreds.size() != 1) { 207 DOUT << " Factoring out " << CommonPreds.size() 208 << " common predecessors.\n"; 209 PredBB = SplitBlockPredecessors(BB, &CommonPreds[0], CommonPreds.size(), 210 ".thr_comm", this); 211 } 212 213 214 DOUT << " Threading edge from '" << PredBB->getNameStart() << "' to '" 215 << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost 216 << ", across block:\n " 217 << *BB; 218 219 ThreadEdge(BB, PredBB, SuccBB); 220 ++NumThreads; 221 return true; 222 } 223 224 /// ThreadEdge - We have decided that it is safe and profitable to thread an 225 /// edge from PredBB to SuccBB across BB. Transform the IR to reflect this 226 /// change. 227 void JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, 228 BasicBlock *SuccBB) { 229 230 // Jump Threading can not update SSA properties correctly if the values 231 // defined in the duplicated block are used outside of the block itself. For 232 // this reason, we spill all values that are used outside of BB to the stack. 233 for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) 234 if (I->isUsedOutsideOfBlock(BB)) { 235 // We found a use of I outside of BB. Create a new stack slot to 236 // break this inter-block usage pattern. 237 DemoteRegToStack(*I); 238 } 239 240 // We are going to have to map operands from the original BB block to the new 241 // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to 242 // account for entry from PredBB. 243 DenseMap<Instruction*, Value*> ValueMapping; 244 245 BasicBlock *NewBB = 246 BasicBlock::Create(BB->getName()+".thread", BB->getParent(), BB); 247 NewBB->moveAfter(PredBB); 248 249 BasicBlock::iterator BI = BB->begin(); 250 for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) 251 ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); 252 253 // Clone the non-phi instructions of BB into NewBB, keeping track of the 254 // mapping and using it to remap operands in the cloned instructions. 255 for (; !isa<TerminatorInst>(BI); ++BI) { 256 Instruction *New = BI->clone(); 257 New->setName(BI->getNameStart()); 258 NewBB->getInstList().push_back(New); 259 ValueMapping[BI] = New; 260 261 // Remap operands to patch up intra-block references. 262 for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) 263 if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) 264 if (Value *Remapped = ValueMapping[Inst]) 265 New->setOperand(i, Remapped); 266 } 267 268 // We didn't copy the terminator from BB over to NewBB, because there is now 269 // an unconditional jump to SuccBB. Insert the unconditional jump. 270 BranchInst::Create(SuccBB, NewBB); 271 272 // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the 273 // PHI nodes for NewBB now. 274 for (BasicBlock::iterator PNI = SuccBB->begin(); isa<PHINode>(PNI); ++PNI) { 275 PHINode *PN = cast<PHINode>(PNI); 276 // Ok, we have a PHI node. Figure out what the incoming value was for the 277 // DestBlock. 278 Value *IV = PN->getIncomingValueForBlock(BB); 279 280 // Remap the value if necessary. 281 if (Instruction *Inst = dyn_cast<Instruction>(IV)) 282 if (Value *MappedIV = ValueMapping[Inst]) 283 IV = MappedIV; 284 PN->addIncoming(IV, NewBB); 285 } 286 287 // Finally, NewBB is good to go. Update the terminator of PredBB to jump to 288 // NewBB instead of BB. This eliminates predecessors from BB, which requires 289 // us to simplify any PHI nodes in BB. 290 TerminatorInst *PredTerm = PredBB->getTerminator(); 291 for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) 292 if (PredTerm->getSuccessor(i) == BB) { 293 BB->removePredecessor(PredBB); 294 PredTerm->setSuccessor(i, NewBB); 295 } 296 } 297