1 //===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs global common subexpression elimination on machine 11 // instructions using a scoped hash table based value numbering scheme. It 12 // must be run while the machine function is still in SSA form. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "machine-cse" 17 #include "llvm/CodeGen/Passes.h" 18 #include "llvm/CodeGen/MachineDominators.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Target/TargetInstrInfo.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/ScopedHashTable.h" 25 #include "llvm/ADT/Statistic.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 29 using namespace llvm; 30 31 STATISTIC(NumCoalesces, "Number of copies coalesced"); 32 STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 33 STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated"); 34 35 namespace { 36 class MachineCSE : public MachineFunctionPass { 37 const TargetInstrInfo *TII; 38 const TargetRegisterInfo *TRI; 39 AliasAnalysis *AA; 40 MachineDominatorTree *DT; 41 MachineRegisterInfo *MRI; 42 public: 43 static char ID; // Pass identification 44 MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {} 45 46 virtual bool runOnMachineFunction(MachineFunction &MF); 47 48 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 49 AU.setPreservesCFG(); 50 MachineFunctionPass::getAnalysisUsage(AU); 51 AU.addRequired<AliasAnalysis>(); 52 AU.addPreservedID(MachineLoopInfoID); 53 AU.addRequired<MachineDominatorTree>(); 54 AU.addPreserved<MachineDominatorTree>(); 55 } 56 57 virtual void releaseMemory() { 58 ScopeMap.clear(); 59 Exps.clear(); 60 } 61 62 private: 63 const unsigned LookAheadLimit; 64 typedef ScopedHashTableScope<MachineInstr*, unsigned, 65 MachineInstrExpressionTrait> ScopeType; 66 DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; 67 ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT; 68 SmallVector<MachineInstr*, 64> Exps; 69 unsigned CurrVN; 70 71 bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); 72 bool isPhysDefTriviallyDead(unsigned Reg, 73 MachineBasicBlock::const_iterator I, 74 MachineBasicBlock::const_iterator E) const ; 75 bool hasLivePhysRegDefUse(const MachineInstr *MI, 76 const MachineBasicBlock *MBB, 77 unsigned &PhysDef) const; 78 bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 79 unsigned PhysDef) const; 80 bool isCSECandidate(MachineInstr *MI); 81 bool isProfitableToCSE(unsigned CSReg, unsigned Reg, 82 MachineInstr *CSMI, MachineInstr *MI); 83 void EnterScope(MachineBasicBlock *MBB); 84 void ExitScope(MachineBasicBlock *MBB); 85 bool ProcessBlock(MachineBasicBlock *MBB); 86 void ExitScopeIfDone(MachineDomTreeNode *Node, 87 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 88 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap); 89 bool PerformCSE(MachineDomTreeNode *Node); 90 }; 91 } // end anonymous namespace 92 93 char MachineCSE::ID = 0; 94 INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", 95 "Machine Common Subexpression Elimination", false, false) 96 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 97 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 98 INITIALIZE_PASS_END(MachineCSE, "machine-cse", 99 "Machine Common Subexpression Elimination", false, false) 100 101 FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); } 102 103 bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, 104 MachineBasicBlock *MBB) { 105 bool Changed = false; 106 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 107 MachineOperand &MO = MI->getOperand(i); 108 if (!MO.isReg() || !MO.isUse()) 109 continue; 110 unsigned Reg = MO.getReg(); 111 if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) 112 continue; 113 if (!MRI->hasOneNonDBGUse(Reg)) 114 // Only coalesce single use copies. This ensure the copy will be 115 // deleted. 116 continue; 117 MachineInstr *DefMI = MRI->getVRegDef(Reg); 118 if (DefMI->getParent() != MBB) 119 continue; 120 if (!DefMI->isCopy()) 121 continue; 122 unsigned SrcReg = DefMI->getOperand(1).getReg(); 123 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 124 continue; 125 if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) 126 continue; 127 if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg))) 128 continue; 129 DEBUG(dbgs() << "Coalescing: " << *DefMI); 130 DEBUG(dbgs() << "*** to: " << *MI); 131 MO.setReg(SrcReg); 132 MRI->clearKillFlags(SrcReg); 133 DefMI->eraseFromParent(); 134 ++NumCoalesces; 135 Changed = true; 136 } 137 138 return Changed; 139 } 140 141 bool 142 MachineCSE::isPhysDefTriviallyDead(unsigned Reg, 143 MachineBasicBlock::const_iterator I, 144 MachineBasicBlock::const_iterator E) const { 145 unsigned LookAheadLeft = LookAheadLimit; 146 while (LookAheadLeft) { 147 // Skip over dbg_value's. 148 while (I != E && I->isDebugValue()) 149 ++I; 150 151 if (I == E) 152 // Reached end of block, register is obviously dead. 153 return true; 154 155 bool SeenDef = false; 156 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 157 const MachineOperand &MO = I->getOperand(i); 158 if (!MO.isReg() || !MO.getReg()) 159 continue; 160 if (!TRI->regsOverlap(MO.getReg(), Reg)) 161 continue; 162 if (MO.isUse()) 163 // Found a use! 164 return false; 165 SeenDef = true; 166 } 167 if (SeenDef) 168 // See a def of Reg (or an alias) before encountering any use, it's 169 // trivially dead. 170 return true; 171 172 --LookAheadLeft; 173 ++I; 174 } 175 return false; 176 } 177 178 /// hasLivePhysRegDefUse - Return true if the specified instruction read / write 179 /// physical registers (except for dead defs of physical registers). It also 180 /// returns the physical register def by reference if it's the only one and the 181 /// instruction does not uses a physical register. 182 bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, 183 const MachineBasicBlock *MBB, 184 unsigned &PhysDef) const { 185 PhysDef = 0; 186 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 187 const MachineOperand &MO = MI->getOperand(i); 188 if (!MO.isReg()) 189 continue; 190 unsigned Reg = MO.getReg(); 191 if (!Reg) 192 continue; 193 if (TargetRegisterInfo::isVirtualRegister(Reg)) 194 continue; 195 if (MO.isUse()) { 196 // Can't touch anything to read a physical register. 197 PhysDef = 0; 198 return true; 199 } 200 if (MO.isDead()) 201 // If the def is dead, it's ok. 202 continue; 203 // Ok, this is a physical register def that's not marked "dead". That's 204 // common since this pass is run before livevariables. We can scan 205 // forward a few instructions and check if it is obviously dead. 206 if (PhysDef) { 207 // Multiple physical register defs. These are rare, forget about it. 208 PhysDef = 0; 209 return true; 210 } 211 PhysDef = Reg; 212 } 213 214 if (PhysDef) { 215 MachineBasicBlock::const_iterator I = MI; I = llvm::next(I); 216 if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end())) 217 return true; 218 } 219 return false; 220 } 221 222 bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, 223 unsigned PhysDef) const { 224 // For now conservatively returns false if the common subexpression is 225 // not in the same basic block as the given instruction. 226 MachineBasicBlock *MBB = MI->getParent(); 227 if (CSMI->getParent() != MBB) 228 return false; 229 MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I); 230 MachineBasicBlock::const_iterator E = MI; 231 unsigned LookAheadLeft = LookAheadLimit; 232 while (LookAheadLeft) { 233 // Skip over dbg_value's. 234 while (I != E && I->isDebugValue()) 235 ++I; 236 237 if (I == E) 238 return true; 239 if (I->modifiesRegister(PhysDef, TRI)) 240 return false; 241 242 --LookAheadLeft; 243 ++I; 244 } 245 246 return false; 247 } 248 249 bool MachineCSE::isCSECandidate(MachineInstr *MI) { 250 if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || 251 MI->isKill() || MI->isInlineAsm() || MI->isDebugValue()) 252 return false; 253 254 // Ignore copies. 255 if (MI->isCopyLike()) 256 return false; 257 258 // Ignore stuff that we obviously can't move. 259 const TargetInstrDesc &TID = MI->getDesc(); 260 if (TID.mayStore() || TID.isCall() || TID.isTerminator() || 261 TID.hasUnmodeledSideEffects()) 262 return false; 263 264 if (TID.mayLoad()) { 265 // Okay, this instruction does a load. As a refinement, we allow the target 266 // to decide whether the loaded value is actually a constant. If so, we can 267 // actually use it as a load. 268 if (!MI->isInvariantLoad(AA)) 269 // FIXME: we should be able to hoist loads with no other side effects if 270 // there are no other instructions which can change memory in this loop. 271 // This is a trivial form of alias analysis. 272 return false; 273 } 274 return true; 275 } 276 277 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 278 /// common expression that defines Reg. 279 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, 280 MachineInstr *CSMI, MachineInstr *MI) { 281 // FIXME: Heuristics that works around the lack the live range splitting. 282 283 // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an 284 // immediate predecessor. We don't want to increase register pressure and end up 285 // causing other computation to be spilled. 286 if (MI->getDesc().isAsCheapAsAMove()) { 287 MachineBasicBlock *CSBB = CSMI->getParent(); 288 MachineBasicBlock *BB = MI->getParent(); 289 if (CSBB != BB && 290 find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end()) 291 return false; 292 } 293 294 // Heuristics #2: If the expression doesn't not use a vr and the only use 295 // of the redundant computation are copies, do not cse. 296 bool HasVRegUse = false; 297 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 298 const MachineOperand &MO = MI->getOperand(i); 299 if (MO.isReg() && MO.isUse() && MO.getReg() && 300 TargetRegisterInfo::isVirtualRegister(MO.getReg())) { 301 HasVRegUse = true; 302 break; 303 } 304 } 305 if (!HasVRegUse) { 306 bool HasNonCopyUse = false; 307 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), 308 E = MRI->use_nodbg_end(); I != E; ++I) { 309 MachineInstr *Use = &*I; 310 // Ignore copies. 311 if (!Use->isCopyLike()) { 312 HasNonCopyUse = true; 313 break; 314 } 315 } 316 if (!HasNonCopyUse) 317 return false; 318 } 319 320 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 321 // it unless the defined value is already used in the BB of the new use. 322 bool HasPHI = false; 323 SmallPtrSet<MachineBasicBlock*, 4> CSBBs; 324 for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(CSReg), 325 E = MRI->use_nodbg_end(); I != E; ++I) { 326 MachineInstr *Use = &*I; 327 HasPHI |= Use->isPHI(); 328 CSBBs.insert(Use->getParent()); 329 } 330 331 if (!HasPHI) 332 return true; 333 return CSBBs.count(MI->getParent()); 334 } 335 336 void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 337 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 338 ScopeType *Scope = new ScopeType(VNT); 339 ScopeMap[MBB] = Scope; 340 } 341 342 void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 343 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 344 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 345 assert(SI != ScopeMap.end()); 346 ScopeMap.erase(SI); 347 delete SI->second; 348 } 349 350 bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { 351 bool Changed = false; 352 353 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 354 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { 355 MachineInstr *MI = &*I; 356 ++I; 357 358 if (!isCSECandidate(MI)) 359 continue; 360 361 bool DefPhys = false; 362 bool FoundCSE = VNT.count(MI); 363 if (!FoundCSE) { 364 // Look for trivial copy coalescing opportunities. 365 if (PerformTrivialCoalescing(MI, MBB)) { 366 // After coalescing MI itself may become a copy. 367 if (MI->isCopyLike()) 368 continue; 369 FoundCSE = VNT.count(MI); 370 } 371 } 372 // FIXME: commute commutable instructions? 373 374 // If the instruction defines a physical register and the value *may* be 375 // used, then it's not safe to replace it with a common subexpression. 376 unsigned PhysDef = 0; 377 if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) { 378 FoundCSE = false; 379 380 // ... Unless the CS is local and it also defines the physical register 381 // which is not clobbered in between. 382 if (PhysDef) { 383 unsigned CSVN = VNT.lookup(MI); 384 MachineInstr *CSMI = Exps[CSVN]; 385 if (PhysRegDefReaches(CSMI, MI, PhysDef)) { 386 FoundCSE = true; 387 DefPhys = true; 388 } 389 } 390 } 391 392 if (!FoundCSE) { 393 VNT.insert(MI, CurrVN++); 394 Exps.push_back(MI); 395 continue; 396 } 397 398 // Found a common subexpression, eliminate it. 399 unsigned CSVN = VNT.lookup(MI); 400 MachineInstr *CSMI = Exps[CSVN]; 401 DEBUG(dbgs() << "Examining: " << *MI); 402 DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 403 404 // Check if it's profitable to perform this CSE. 405 bool DoCSE = true; 406 unsigned NumDefs = MI->getDesc().getNumDefs(); 407 for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { 408 MachineOperand &MO = MI->getOperand(i); 409 if (!MO.isReg() || !MO.isDef()) 410 continue; 411 unsigned OldReg = MO.getReg(); 412 unsigned NewReg = CSMI->getOperand(i).getReg(); 413 if (OldReg == NewReg) 414 continue; 415 assert(TargetRegisterInfo::isVirtualRegister(OldReg) && 416 TargetRegisterInfo::isVirtualRegister(NewReg) && 417 "Do not CSE physical register defs!"); 418 if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { 419 DoCSE = false; 420 break; 421 } 422 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 423 --NumDefs; 424 } 425 426 // Actually perform the elimination. 427 if (DoCSE) { 428 for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { 429 MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); 430 MRI->clearKillFlags(CSEPairs[i].second); 431 } 432 MI->eraseFromParent(); 433 ++NumCSEs; 434 if (DefPhys) 435 ++NumPhysCSEs; 436 } else { 437 DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 438 VNT.insert(MI, CurrVN++); 439 Exps.push_back(MI); 440 } 441 CSEPairs.clear(); 442 } 443 444 return Changed; 445 } 446 447 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 448 /// dominator tree node if its a leaf or all of its children are done. Walk 449 /// up the dominator tree to destroy ancestors which are now done. 450 void 451 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 452 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 453 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 454 if (OpenChildren[Node]) 455 return; 456 457 // Pop scope. 458 ExitScope(Node->getBlock()); 459 460 // Now traverse upwards to pop ancestors whose offsprings are all done. 461 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 462 unsigned Left = --OpenChildren[Parent]; 463 if (Left != 0) 464 break; 465 ExitScope(Parent->getBlock()); 466 Node = Parent; 467 } 468 } 469 470 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 471 SmallVector<MachineDomTreeNode*, 32> Scopes; 472 SmallVector<MachineDomTreeNode*, 8> WorkList; 473 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 474 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 475 476 CurrVN = 0; 477 478 // Perform a DFS walk to determine the order of visit. 479 WorkList.push_back(Node); 480 do { 481 Node = WorkList.pop_back_val(); 482 Scopes.push_back(Node); 483 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 484 unsigned NumChildren = Children.size(); 485 OpenChildren[Node] = NumChildren; 486 for (unsigned i = 0; i != NumChildren; ++i) { 487 MachineDomTreeNode *Child = Children[i]; 488 ParentMap[Child] = Node; 489 WorkList.push_back(Child); 490 } 491 } while (!WorkList.empty()); 492 493 // Now perform CSE. 494 bool Changed = false; 495 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 496 MachineDomTreeNode *Node = Scopes[i]; 497 MachineBasicBlock *MBB = Node->getBlock(); 498 EnterScope(MBB); 499 Changed |= ProcessBlock(MBB); 500 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 501 ExitScopeIfDone(Node, OpenChildren, ParentMap); 502 } 503 504 return Changed; 505 } 506 507 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 508 TII = MF.getTarget().getInstrInfo(); 509 TRI = MF.getTarget().getRegisterInfo(); 510 MRI = &MF.getRegInfo(); 511 AA = &getAnalysis<AliasAnalysis>(); 512 DT = &getAnalysis<MachineDominatorTree>(); 513 return PerformCSE(DT->getRootNode()); 514 } 515