1 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs global common subexpression elimination on machine 10 // instructions using a scoped hash table based value numbering scheme. It 11 // must be run while the machine function is still in SSA form. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/ScopedHashTable.h" 17 #include "llvm/ADT/SmallPtrSet.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Analysis/CFG.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 25 #include "llvm/CodeGen/MachineDominators.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineOperand.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/CodeGen/Passes.h" 32 #include "llvm/CodeGen/TargetInstrInfo.h" 33 #include "llvm/CodeGen/TargetOpcodes.h" 34 #include "llvm/CodeGen/TargetRegisterInfo.h" 35 #include "llvm/CodeGen/TargetSubtargetInfo.h" 36 #include "llvm/InitializePasses.h" 37 #include "llvm/MC/MCRegister.h" 38 #include "llvm/MC/MCRegisterInfo.h" 39 #include "llvm/Pass.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/RecyclingAllocator.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include <cassert> 45 #include <iterator> 46 #include <utility> 47 #include <vector> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "machine-cse" 52 53 STATISTIC(NumCoalesces, "Number of copies coalesced"); 54 STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 55 STATISTIC(NumPREs, "Number of partial redundant expression" 56 " transformed to fully redundant"); 57 STATISTIC(NumPhysCSEs, 58 "Number of physreg referencing common subexpr eliminated"); 59 STATISTIC(NumCrossBBCSEs, 60 "Number of cross-MBB physreg referencing CS eliminated"); 61 STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); 62 63 // Threshold to avoid excessive cost to compute isProfitableToCSE. 64 static cl::opt<int> 65 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), 66 cl::desc("Threshold for the size of CSUses")); 67 68 static cl::opt<bool> AggressiveMachineCSE( 69 "aggressive-machine-cse", cl::Hidden, cl::init(false), 70 cl::desc("Override the profitability heuristics for Machine CSE")); 71 72 namespace { 73 74 class MachineCSE : public MachineFunctionPass { 75 const TargetInstrInfo *TII = nullptr; 76 const TargetRegisterInfo *TRI = nullptr; 77 AliasAnalysis *AA = nullptr; 78 MachineDominatorTree *DT = nullptr; 79 MachineRegisterInfo *MRI = nullptr; 80 MachineBlockFrequencyInfo *MBFI = nullptr; 81 82 public: 83 static char ID; // Pass identification 84 85 MachineCSE() : MachineFunctionPass(ID) { 86 initializeMachineCSEPass(*PassRegistry::getPassRegistry()); 87 } 88 89 bool runOnMachineFunction(MachineFunction &MF) override; 90 91 void getAnalysisUsage(AnalysisUsage &AU) const override { 92 AU.setPreservesCFG(); 93 MachineFunctionPass::getAnalysisUsage(AU); 94 AU.addRequired<AAResultsWrapperPass>(); 95 AU.addPreservedID(MachineLoopInfoID); 96 AU.addRequired<MachineDominatorTree>(); 97 AU.addPreserved<MachineDominatorTree>(); 98 AU.addRequired<MachineBlockFrequencyInfo>(); 99 AU.addPreserved<MachineBlockFrequencyInfo>(); 100 } 101 102 MachineFunctionProperties getRequiredProperties() const override { 103 return MachineFunctionProperties() 104 .set(MachineFunctionProperties::Property::IsSSA); 105 } 106 107 void releaseMemory() override { 108 ScopeMap.clear(); 109 PREMap.clear(); 110 Exps.clear(); 111 } 112 113 private: 114 using AllocatorTy = RecyclingAllocator<BumpPtrAllocator, 115 ScopedHashTableVal<MachineInstr *, unsigned>>; 116 using ScopedHTType = 117 ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, 118 AllocatorTy>; 119 using ScopeType = ScopedHTType::ScopeTy; 120 using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>; 121 122 unsigned LookAheadLimit = 0; 123 DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; 124 DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait> 125 PREMap; 126 ScopedHTType VNT; 127 SmallVector<MachineInstr *, 64> Exps; 128 unsigned CurrVN = 0; 129 130 bool PerformTrivialCopyPropagation(MachineInstr *MI, 131 MachineBasicBlock *MBB); 132 bool isPhysDefTriviallyDead(MCRegister Reg, 133 MachineBasicBlock::const_iterator I, 134 MachineBasicBlock::const_iterator E) const; 135 bool hasLivePhysRegDefUses(const MachineInstr *MI, 136 const MachineBasicBlock *MBB, 137 SmallSet<MCRegister, 8> &PhysRefs, 138 PhysDefVector &PhysDefs, bool &PhysUseDef) const; 139 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 140 SmallSet<MCRegister, 8> &PhysRefs, 141 PhysDefVector &PhysDefs, bool &NonLocal) const; 142 bool isCSECandidate(MachineInstr *MI); 143 bool isProfitableToCSE(Register CSReg, Register Reg, 144 MachineBasicBlock *CSBB, MachineInstr *MI); 145 void EnterScope(MachineBasicBlock *MBB); 146 void ExitScope(MachineBasicBlock *MBB); 147 bool ProcessBlockCSE(MachineBasicBlock *MBB); 148 void ExitScopeIfDone(MachineDomTreeNode *Node, 149 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); 150 bool PerformCSE(MachineDomTreeNode *Node); 151 152 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs); 153 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); 154 bool PerformSimplePRE(MachineDominatorTree *DT); 155 /// Heuristics to see if it's profitable to move common computations of MBB 156 /// and MBB1 to CandidateBB. 157 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB, 158 MachineBasicBlock *MBB, 159 MachineBasicBlock *MBB1); 160 }; 161 162 } // end anonymous namespace 163 164 char MachineCSE::ID = 0; 165 166 char &llvm::MachineCSEID = MachineCSE::ID; 167 168 INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, 169 "Machine Common Subexpression Elimination", false, false) 170 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 171 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 172 INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE, 173 "Machine Common Subexpression Elimination", false, false) 174 175 /// The source register of a COPY machine instruction can be propagated to all 176 /// its users, and this propagation could increase the probability of finding 177 /// common subexpressions. If the COPY has only one user, the COPY itself can 178 /// be removed. 179 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, 180 MachineBasicBlock *MBB) { 181 bool Changed = false; 182 for (MachineOperand &MO : MI->all_uses()) { 183 Register Reg = MO.getReg(); 184 if (!Reg.isVirtual()) 185 continue; 186 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); 187 MachineInstr *DefMI = MRI->getVRegDef(Reg); 188 if (!DefMI->isCopy()) 189 continue; 190 Register SrcReg = DefMI->getOperand(1).getReg(); 191 if (!SrcReg.isVirtual()) 192 continue; 193 if (DefMI->getOperand(0).getSubReg()) 194 continue; 195 // FIXME: We should trivially coalesce subregister copies to expose CSE 196 // opportunities on instructions with truncated operands (see 197 // cse-add-with-overflow.ll). This can be done here as follows: 198 // if (SrcSubReg) 199 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, 200 // SrcSubReg); 201 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); 202 // 203 // The 2-addr pass has been updated to handle coalesced subregs. However, 204 // some machine-specific code still can't handle it. 205 // To handle it properly we also need a way find a constrained subregister 206 // class given a super-reg class and subreg index. 207 if (DefMI->getOperand(1).getSubReg()) 208 continue; 209 if (!MRI->constrainRegAttrs(SrcReg, Reg)) 210 continue; 211 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); 212 LLVM_DEBUG(dbgs() << "*** to: " << *MI); 213 214 // Propagate SrcReg of copies to MI. 215 MO.setReg(SrcReg); 216 MRI->clearKillFlags(SrcReg); 217 // Coalesce single use copies. 218 if (OnlyOneUse) { 219 // If (and only if) we've eliminated all uses of the copy, also 220 // copy-propagate to any debug-users of MI, or they'll be left using 221 // an undefined value. 222 DefMI->changeDebugValuesDefReg(SrcReg); 223 224 DefMI->eraseFromParent(); 225 ++NumCoalesces; 226 } 227 Changed = true; 228 } 229 230 return Changed; 231 } 232 233 bool MachineCSE::isPhysDefTriviallyDead( 234 MCRegister Reg, MachineBasicBlock::const_iterator I, 235 MachineBasicBlock::const_iterator E) const { 236 unsigned LookAheadLeft = LookAheadLimit; 237 while (LookAheadLeft) { 238 // Skip over dbg_value's. 239 I = skipDebugInstructionsForward(I, E); 240 241 if (I == E) 242 // Reached end of block, we don't know if register is dead or not. 243 return false; 244 245 bool SeenDef = false; 246 for (const MachineOperand &MO : I->operands()) { 247 if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) 248 SeenDef = true; 249 if (!MO.isReg() || !MO.getReg()) 250 continue; 251 if (!TRI->regsOverlap(MO.getReg(), Reg)) 252 continue; 253 if (MO.isUse()) 254 // Found a use! 255 return false; 256 SeenDef = true; 257 } 258 if (SeenDef) 259 // See a def of Reg (or an alias) before encountering any use, it's 260 // trivially dead. 261 return true; 262 263 --LookAheadLeft; 264 ++I; 265 } 266 return false; 267 } 268 269 static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, 270 const MachineOperand &MO, 271 const MachineFunction &MF, 272 const TargetRegisterInfo &TRI, 273 const TargetInstrInfo &TII) { 274 // MachineRegisterInfo::isConstantPhysReg directly called by 275 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the 276 // reserved registers to be frozen. That doesn't cause a problem post-ISel as 277 // most (if not all) targets freeze reserved registers right after ISel. 278 // 279 // It does cause issues mid-GlobalISel, however, hence the additional 280 // reservedRegsFrozen check. 281 const MachineRegisterInfo &MRI = MF.getRegInfo(); 282 return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) || 283 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg)); 284 } 285 286 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write 287 /// physical registers (except for dead defs of physical registers). It also 288 /// returns the physical register def by reference if it's the only one and the 289 /// instruction does not uses a physical register. 290 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, 291 const MachineBasicBlock *MBB, 292 SmallSet<MCRegister, 8> &PhysRefs, 293 PhysDefVector &PhysDefs, 294 bool &PhysUseDef) const { 295 // First, add all uses to PhysRefs. 296 for (const MachineOperand &MO : MI->all_uses()) { 297 Register Reg = MO.getReg(); 298 if (!Reg) 299 continue; 300 if (Reg.isVirtual()) 301 continue; 302 // Reading either caller preserved or constant physregs is ok. 303 if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI, 304 *TII)) 305 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 306 PhysRefs.insert(*AI); 307 } 308 309 // Next, collect all defs into PhysDefs. If any is already in PhysRefs 310 // (which currently contains only uses), set the PhysUseDef flag. 311 PhysUseDef = false; 312 MachineBasicBlock::const_iterator I = MI; I = std::next(I); 313 for (const auto &MOP : llvm::enumerate(MI->operands())) { 314 const MachineOperand &MO = MOP.value(); 315 if (!MO.isReg() || !MO.isDef()) 316 continue; 317 Register Reg = MO.getReg(); 318 if (!Reg) 319 continue; 320 if (Reg.isVirtual()) 321 continue; 322 // Check against PhysRefs even if the def is "dead". 323 if (PhysRefs.count(Reg.asMCReg())) 324 PhysUseDef = true; 325 // If the def is dead, it's ok. But the def may not marked "dead". That's 326 // common since this pass is run before livevariables. We can scan 327 // forward a few instructions and check if it is obviously dead. 328 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end())) 329 PhysDefs.push_back(std::make_pair(MOP.index(), Reg)); 330 } 331 332 // Finally, add all defs to PhysRefs as well. 333 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) 334 for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid(); 335 ++AI) 336 PhysRefs.insert(*AI); 337 338 return !PhysRefs.empty(); 339 } 340 341 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 342 SmallSet<MCRegister, 8> &PhysRefs, 343 PhysDefVector &PhysDefs, 344 bool &NonLocal) const { 345 // For now conservatively returns false if the common subexpression is 346 // not in the same basic block as the given instruction. The only exception 347 // is if the common subexpression is in the sole predecessor block. 348 const MachineBasicBlock *MBB = MI->getParent(); 349 const MachineBasicBlock *CSMBB = CSMI->getParent(); 350 351 bool CrossMBB = false; 352 if (CSMBB != MBB) { 353 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) 354 return false; 355 356 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { 357 if (MRI->isAllocatable(PhysDefs[i].second) || 358 MRI->isReserved(PhysDefs[i].second)) 359 // Avoid extending live range of physical registers if they are 360 //allocatable or reserved. 361 return false; 362 } 363 CrossMBB = true; 364 } 365 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); 366 MachineBasicBlock::const_iterator E = MI; 367 MachineBasicBlock::const_iterator EE = CSMBB->end(); 368 unsigned LookAheadLeft = LookAheadLimit; 369 while (LookAheadLeft) { 370 // Skip over dbg_value's. 371 while (I != E && I != EE && I->isDebugInstr()) 372 ++I; 373 374 if (I == EE) { 375 assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); 376 (void)CrossMBB; 377 CrossMBB = false; 378 NonLocal = true; 379 I = MBB->begin(); 380 EE = MBB->end(); 381 continue; 382 } 383 384 if (I == E) 385 return true; 386 387 for (const MachineOperand &MO : I->operands()) { 388 // RegMasks go on instructions like calls that clobber lots of physregs. 389 // Don't attempt to CSE across such an instruction. 390 if (MO.isRegMask()) 391 return false; 392 if (!MO.isReg() || !MO.isDef()) 393 continue; 394 Register MOReg = MO.getReg(); 395 if (MOReg.isVirtual()) 396 continue; 397 if (PhysRefs.count(MOReg.asMCReg())) 398 return false; 399 } 400 401 --LookAheadLeft; 402 ++I; 403 } 404 405 return false; 406 } 407 408 bool MachineCSE::isCSECandidate(MachineInstr *MI) { 409 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || 410 MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo()) 411 return false; 412 413 // Ignore copies. 414 if (MI->isCopyLike()) 415 return false; 416 417 // Ignore stuff that we obviously can't move. 418 if (MI->mayStore() || MI->isCall() || MI->isTerminator() || 419 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects()) 420 return false; 421 422 if (MI->mayLoad()) { 423 // Okay, this instruction does a load. As a refinement, we allow the target 424 // to decide whether the loaded value is actually a constant. If so, we can 425 // actually use it as a load. 426 if (!MI->isDereferenceableInvariantLoad()) 427 // FIXME: we should be able to hoist loads with no other side effects if 428 // there are no other instructions which can change memory in this loop. 429 // This is a trivial form of alias analysis. 430 return false; 431 } 432 433 // Ignore stack guard loads, otherwise the register that holds CSEed value may 434 // be spilled and get loaded back with corrupted data. 435 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) 436 return false; 437 438 return true; 439 } 440 441 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 442 /// common expression that defines Reg. CSBB is basic block where CSReg is 443 /// defined. 444 bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg, 445 MachineBasicBlock *CSBB, MachineInstr *MI) { 446 if (AggressiveMachineCSE) 447 return true; 448 449 // FIXME: Heuristics that works around the lack the live range splitting. 450 451 // If CSReg is used at all uses of Reg, CSE should not increase register 452 // pressure of CSReg. 453 bool MayIncreasePressure = true; 454 if (CSReg.isVirtual() && Reg.isVirtual()) { 455 MayIncreasePressure = false; 456 SmallPtrSet<MachineInstr*, 8> CSUses; 457 int NumOfUses = 0; 458 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { 459 CSUses.insert(&MI); 460 // Too costly to compute if NumOfUses is very large. Conservatively assume 461 // MayIncreasePressure to avoid spending too much time here. 462 if (++NumOfUses > CSUsesThreshold) { 463 MayIncreasePressure = true; 464 break; 465 } 466 } 467 if (!MayIncreasePressure) 468 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 469 if (!CSUses.count(&MI)) { 470 MayIncreasePressure = true; 471 break; 472 } 473 } 474 } 475 if (!MayIncreasePressure) return true; 476 477 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in 478 // an immediate predecessor. We don't want to increase register pressure and 479 // end up causing other computation to be spilled. 480 if (TII->isAsCheapAsAMove(*MI)) { 481 MachineBasicBlock *BB = MI->getParent(); 482 if (CSBB != BB && !CSBB->isSuccessor(BB)) 483 return false; 484 } 485 486 // Heuristics #2: If the expression doesn't not use a vr and the only use 487 // of the redundant computation are copies, do not cse. 488 bool HasVRegUse = false; 489 for (const MachineOperand &MO : MI->all_uses()) { 490 if (MO.getReg().isVirtual()) { 491 HasVRegUse = true; 492 break; 493 } 494 } 495 if (!HasVRegUse) { 496 bool HasNonCopyUse = false; 497 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 498 // Ignore copies. 499 if (!MI.isCopyLike()) { 500 HasNonCopyUse = true; 501 break; 502 } 503 } 504 if (!HasNonCopyUse) 505 return false; 506 } 507 508 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 509 // it unless the defined value is already used in the BB of the new use. 510 bool HasPHI = false; 511 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) { 512 HasPHI |= UseMI.isPHI(); 513 if (UseMI.getParent() == MI->getParent()) 514 return true; 515 } 516 517 return !HasPHI; 518 } 519 520 void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 521 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 522 ScopeType *Scope = new ScopeType(VNT); 523 ScopeMap[MBB] = Scope; 524 } 525 526 void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 527 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 528 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 529 assert(SI != ScopeMap.end()); 530 delete SI->second; 531 ScopeMap.erase(SI); 532 } 533 534 bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { 535 bool Changed = false; 536 537 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 538 SmallVector<unsigned, 2> ImplicitDefsToUpdate; 539 SmallVector<unsigned, 2> ImplicitDefs; 540 for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { 541 if (!isCSECandidate(&MI)) 542 continue; 543 544 bool FoundCSE = VNT.count(&MI); 545 if (!FoundCSE) { 546 // Using trivial copy propagation to find more CSE opportunities. 547 if (PerformTrivialCopyPropagation(&MI, MBB)) { 548 Changed = true; 549 550 // After coalescing MI itself may become a copy. 551 if (MI.isCopyLike()) 552 continue; 553 554 // Try again to see if CSE is possible. 555 FoundCSE = VNT.count(&MI); 556 } 557 } 558 559 // Commute commutable instructions. 560 bool Commuted = false; 561 if (!FoundCSE && MI.isCommutable()) { 562 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) { 563 Commuted = true; 564 FoundCSE = VNT.count(NewMI); 565 if (NewMI != &MI) { 566 // New instruction. It doesn't need to be kept. 567 NewMI->eraseFromParent(); 568 Changed = true; 569 } else if (!FoundCSE) 570 // MI was changed but it didn't help, commute it back! 571 (void)TII->commuteInstruction(MI); 572 } 573 } 574 575 // If the instruction defines physical registers and the values *may* be 576 // used, then it's not safe to replace it with a common subexpression. 577 // It's also not safe if the instruction uses physical registers. 578 bool CrossMBBPhysDef = false; 579 SmallSet<MCRegister, 8> PhysRefs; 580 PhysDefVector PhysDefs; 581 bool PhysUseDef = false; 582 if (FoundCSE && 583 hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) { 584 FoundCSE = false; 585 586 // ... Unless the CS is local or is in the sole predecessor block 587 // and it also defines the physical register which is not clobbered 588 // in between and the physical register uses were not clobbered. 589 // This can never be the case if the instruction both uses and 590 // defines the same physical register, which was detected above. 591 if (!PhysUseDef) { 592 unsigned CSVN = VNT.lookup(&MI); 593 MachineInstr *CSMI = Exps[CSVN]; 594 if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) 595 FoundCSE = true; 596 } 597 } 598 599 if (!FoundCSE) { 600 VNT.insert(&MI, CurrVN++); 601 Exps.push_back(&MI); 602 continue; 603 } 604 605 // Found a common subexpression, eliminate it. 606 unsigned CSVN = VNT.lookup(&MI); 607 MachineInstr *CSMI = Exps[CSVN]; 608 LLVM_DEBUG(dbgs() << "Examining: " << MI); 609 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 610 611 // Prevent CSE-ing non-local convergent instructions. 612 // LLVM's current definition of `isConvergent` does not necessarily prove 613 // that non-local CSE is illegal. The following check extends the definition 614 // of `isConvergent` to assume a convergent instruction is dependent not 615 // only on additional conditions, but also on fewer conditions. LLVM does 616 // not have a MachineInstr attribute which expresses this extended 617 // definition, so it's necessary to use `isConvergent` to prevent illegally 618 // CSE-ing the subset of `isConvergent` instructions which do fall into this 619 // extended definition. 620 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) { 621 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in " 622 "different BBs, avoid CSE!\n"); 623 VNT.insert(&MI, CurrVN++); 624 Exps.push_back(&MI); 625 continue; 626 } 627 628 // Check if it's profitable to perform this CSE. 629 bool DoCSE = true; 630 unsigned NumDefs = MI.getNumDefs(); 631 632 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 633 MachineOperand &MO = MI.getOperand(i); 634 if (!MO.isReg() || !MO.isDef()) 635 continue; 636 Register OldReg = MO.getReg(); 637 Register NewReg = CSMI->getOperand(i).getReg(); 638 639 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 640 // we should make sure it is not dead at CSMI. 641 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) 642 ImplicitDefsToUpdate.push_back(i); 643 644 // Keep track of implicit defs of CSMI and MI, to clear possibly 645 // made-redundant kill flags. 646 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) 647 ImplicitDefs.push_back(OldReg); 648 649 if (OldReg == NewReg) { 650 --NumDefs; 651 continue; 652 } 653 654 assert(OldReg.isVirtual() && NewReg.isVirtual() && 655 "Do not CSE physical register defs!"); 656 657 if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) { 658 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 659 DoCSE = false; 660 break; 661 } 662 663 // Don't perform CSE if the result of the new instruction cannot exist 664 // within the constraints (register class, bank, or low-level type) of 665 // the old instruction. 666 if (!MRI->constrainRegAttrs(NewReg, OldReg)) { 667 LLVM_DEBUG( 668 dbgs() << "*** Not the same register constraints, avoid CSE!\n"); 669 DoCSE = false; 670 break; 671 } 672 673 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 674 --NumDefs; 675 } 676 677 // Actually perform the elimination. 678 if (DoCSE) { 679 for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) { 680 unsigned OldReg = CSEPair.first; 681 unsigned NewReg = CSEPair.second; 682 // OldReg may have been unused but is used now, clear the Dead flag 683 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); 684 assert(Def != nullptr && "CSEd register has no unique definition?"); 685 Def->clearRegisterDeads(NewReg); 686 // Replace with NewReg and clear kill flags which may be wrong now. 687 MRI->replaceRegWith(OldReg, NewReg); 688 MRI->clearKillFlags(NewReg); 689 } 690 691 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 692 // we should make sure it is not dead at CSMI. 693 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) 694 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); 695 for (const auto &PhysDef : PhysDefs) 696 if (!MI.getOperand(PhysDef.first).isDead()) 697 CSMI->getOperand(PhysDef.first).setIsDead(false); 698 699 // Go through implicit defs of CSMI and MI, and clear the kill flags on 700 // their uses in all the instructions between CSMI and MI. 701 // We might have made some of the kill flags redundant, consider: 702 // subs ... implicit-def %nzcv <- CSMI 703 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore 704 // subs ... implicit-def %nzcv <- MI, to be eliminated 705 // csinc ... implicit killed %nzcv 706 // Since we eliminated MI, and reused a register imp-def'd by CSMI 707 // (here %nzcv), that register, if it was killed before MI, should have 708 // that kill flag removed, because it's lifetime was extended. 709 if (CSMI->getParent() == MI.getParent()) { 710 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II) 711 for (auto ImplicitDef : ImplicitDefs) 712 if (MachineOperand *MO = II->findRegisterUseOperand( 713 ImplicitDef, /*isKill=*/true, TRI)) 714 MO->setIsKill(false); 715 } else { 716 // If the instructions aren't in the same BB, bail out and clear the 717 // kill flag on all uses of the imp-def'd register. 718 for (auto ImplicitDef : ImplicitDefs) 719 MRI->clearKillFlags(ImplicitDef); 720 } 721 722 if (CrossMBBPhysDef) { 723 // Add physical register defs now coming in from a predecessor to MBB 724 // livein list. 725 while (!PhysDefs.empty()) { 726 auto LiveIn = PhysDefs.pop_back_val(); 727 if (!MBB->isLiveIn(LiveIn.second)) 728 MBB->addLiveIn(LiveIn.second); 729 } 730 ++NumCrossBBCSEs; 731 } 732 733 MI.eraseFromParent(); 734 ++NumCSEs; 735 if (!PhysRefs.empty()) 736 ++NumPhysCSEs; 737 if (Commuted) 738 ++NumCommutes; 739 Changed = true; 740 } else { 741 VNT.insert(&MI, CurrVN++); 742 Exps.push_back(&MI); 743 } 744 CSEPairs.clear(); 745 ImplicitDefsToUpdate.clear(); 746 ImplicitDefs.clear(); 747 } 748 749 return Changed; 750 } 751 752 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 753 /// dominator tree node if its a leaf or all of its children are done. Walk 754 /// up the dominator tree to destroy ancestors which are now done. 755 void 756 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 757 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) { 758 if (OpenChildren[Node]) 759 return; 760 761 // Pop scope. 762 ExitScope(Node->getBlock()); 763 764 // Now traverse upwards to pop ancestors whose offsprings are all done. 765 while (MachineDomTreeNode *Parent = Node->getIDom()) { 766 unsigned Left = --OpenChildren[Parent]; 767 if (Left != 0) 768 break; 769 ExitScope(Parent->getBlock()); 770 Node = Parent; 771 } 772 } 773 774 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 775 SmallVector<MachineDomTreeNode*, 32> Scopes; 776 SmallVector<MachineDomTreeNode*, 8> WorkList; 777 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 778 779 CurrVN = 0; 780 781 // Perform a DFS walk to determine the order of visit. 782 WorkList.push_back(Node); 783 do { 784 Node = WorkList.pop_back_val(); 785 Scopes.push_back(Node); 786 OpenChildren[Node] = Node->getNumChildren(); 787 append_range(WorkList, Node->children()); 788 } while (!WorkList.empty()); 789 790 // Now perform CSE. 791 bool Changed = false; 792 for (MachineDomTreeNode *Node : Scopes) { 793 MachineBasicBlock *MBB = Node->getBlock(); 794 EnterScope(MBB); 795 Changed |= ProcessBlockCSE(MBB); 796 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 797 ExitScopeIfDone(Node, OpenChildren); 798 } 799 800 return Changed; 801 } 802 803 // We use stronger checks for PRE candidate rather than for CSE ones to embrace 804 // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps 805 // to exclude instrs created by PRE that won't be CSEed later. 806 bool MachineCSE::isPRECandidate(MachineInstr *MI, 807 SmallSet<MCRegister, 8> &PhysRefs) { 808 if (!isCSECandidate(MI) || 809 MI->isNotDuplicable() || 810 MI->mayLoad() || 811 TII->isAsCheapAsAMove(*MI) || 812 MI->getNumDefs() != 1 || 813 MI->getNumExplicitDefs() != 1) 814 return false; 815 816 for (const MachineOperand &MO : MI->operands()) { 817 if (MO.isReg() && !MO.getReg().isVirtual()) { 818 if (MO.isDef()) 819 return false; 820 else 821 PhysRefs.insert(MO.getReg()); 822 } 823 } 824 825 return true; 826 } 827 828 bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, 829 MachineBasicBlock *MBB) { 830 bool Changed = false; 831 for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { 832 SmallSet<MCRegister, 8> PhysRefs; 833 if (!isPRECandidate(&MI, PhysRefs)) 834 continue; 835 836 if (!PREMap.count(&MI)) { 837 PREMap[&MI] = MBB; 838 continue; 839 } 840 841 auto MBB1 = PREMap[&MI]; 842 assert( 843 !DT->properlyDominates(MBB, MBB1) && 844 "MBB cannot properly dominate MBB1 while DFS through dominators tree!"); 845 auto CMBB = DT->findNearestCommonDominator(MBB, MBB1); 846 if (!CMBB->isLegalToHoistInto()) 847 continue; 848 849 if (!isProfitableToHoistInto(CMBB, MBB, MBB1)) 850 continue; 851 852 // Two instrs are partial redundant if their basic blocks are reachable 853 // from one to another but one doesn't dominate another. 854 if (CMBB != MBB1) { 855 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock(); 856 if (BB != nullptr && BB1 != nullptr && 857 (isPotentiallyReachable(BB1, BB) || 858 isPotentiallyReachable(BB, BB1))) { 859 // The following check extends the definition of `isConvergent` to 860 // assume a convergent instruction is dependent not only on additional 861 // conditions, but also on fewer conditions. LLVM does not have a 862 // MachineInstr attribute which expresses this extended definition, so 863 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the 864 // subset of `isConvergent` instructions which do fall into this 865 // extended definition. 866 if (MI.isConvergent() && CMBB != MBB) 867 continue; 868 869 // If this instruction uses physical registers then we can only do PRE 870 // if it's using the value that is live at the place we're hoisting to. 871 bool NonLocal; 872 PhysDefVector PhysDefs; 873 if (!PhysRefs.empty() && 874 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs, 875 PhysDefs, NonLocal)) 876 continue; 877 878 assert(MI.getOperand(0).isDef() && 879 "First operand of instr with one explicit def must be this def"); 880 Register VReg = MI.getOperand(0).getReg(); 881 Register NewReg = MRI->cloneVirtualRegister(VReg); 882 if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI)) 883 continue; 884 MachineInstr &NewMI = 885 TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI); 886 887 // When hoisting, make sure we don't carry the debug location of 888 // the original instruction, as that's not correct and can cause 889 // unexpected jumps when debugging optimized code. 890 auto EmptyDL = DebugLoc(); 891 NewMI.setDebugLoc(EmptyDL); 892 893 NewMI.getOperand(0).setReg(NewReg); 894 895 PREMap[&MI] = CMBB; 896 ++NumPREs; 897 Changed = true; 898 } 899 } 900 } 901 return Changed; 902 } 903 904 // This simple PRE (partial redundancy elimination) pass doesn't actually 905 // eliminate partial redundancy but transforms it to full redundancy, 906 // anticipating that the next CSE step will eliminate this created redundancy. 907 // If CSE doesn't eliminate this, than created instruction will remain dead 908 // and eliminated later by Remove Dead Machine Instructions pass. 909 bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) { 910 SmallVector<MachineDomTreeNode *, 32> BBs; 911 912 PREMap.clear(); 913 bool Changed = false; 914 BBs.push_back(DT->getRootNode()); 915 do { 916 auto Node = BBs.pop_back_val(); 917 append_range(BBs, Node->children()); 918 919 MachineBasicBlock *MBB = Node->getBlock(); 920 Changed |= ProcessBlockPRE(DT, MBB); 921 922 } while (!BBs.empty()); 923 924 return Changed; 925 } 926 927 bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB, 928 MachineBasicBlock *MBB, 929 MachineBasicBlock *MBB1) { 930 if (CandidateBB->getParent()->getFunction().hasMinSize()) 931 return true; 932 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB"); 933 assert(DT->dominates(CandidateBB, MBB1) && 934 "CandidateBB should dominate MBB1"); 935 return MBFI->getBlockFreq(CandidateBB) <= 936 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1); 937 } 938 939 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 940 if (skipFunction(MF.getFunction())) 941 return false; 942 943 TII = MF.getSubtarget().getInstrInfo(); 944 TRI = MF.getSubtarget().getRegisterInfo(); 945 MRI = &MF.getRegInfo(); 946 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 947 DT = &getAnalysis<MachineDominatorTree>(); 948 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 949 LookAheadLimit = TII->getMachineCSELookAheadLimit(); 950 bool ChangedPRE, ChangedCSE; 951 ChangedPRE = PerformSimplePRE(DT); 952 ChangedCSE = PerformCSE(DT->getRootNode()); 953 return ChangedPRE || ChangedCSE; 954 } 955