1 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass performs global common subexpression elimination on machine 10 // instructions using a scoped hash table based value numbering scheme. It 11 // must be run while the machine function is still in SSA form. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/ScopedHashTable.h" 17 #include "llvm/ADT/SmallPtrSet.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/Analysis/AliasAnalysis.h" 22 #include "llvm/Analysis/CFG.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 25 #include "llvm/CodeGen/MachineDominators.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineOperand.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/CodeGen/Passes.h" 32 #include "llvm/CodeGen/TargetInstrInfo.h" 33 #include "llvm/CodeGen/TargetOpcodes.h" 34 #include "llvm/CodeGen/TargetRegisterInfo.h" 35 #include "llvm/CodeGen/TargetSubtargetInfo.h" 36 #include "llvm/InitializePasses.h" 37 #include "llvm/MC/MCRegister.h" 38 #include "llvm/MC/MCRegisterInfo.h" 39 #include "llvm/Pass.h" 40 #include "llvm/Support/Allocator.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/RecyclingAllocator.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include <cassert> 45 #include <iterator> 46 #include <utility> 47 #include <vector> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "machine-cse" 52 53 STATISTIC(NumCoalesces, "Number of copies coalesced"); 54 STATISTIC(NumCSEs, "Number of common subexpression eliminated"); 55 STATISTIC(NumPREs, "Number of partial redundant expression" 56 " transformed to fully redundant"); 57 STATISTIC(NumPhysCSEs, 58 "Number of physreg referencing common subexpr eliminated"); 59 STATISTIC(NumCrossBBCSEs, 60 "Number of cross-MBB physreg referencing CS eliminated"); 61 STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); 62 63 // Threshold to avoid excessive cost to compute isProfitableToCSE. 64 static cl::opt<int> 65 CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), 66 cl::desc("Threshold for the size of CSUses")); 67 68 namespace { 69 70 class MachineCSE : public MachineFunctionPass { 71 const TargetInstrInfo *TII; 72 const TargetRegisterInfo *TRI; 73 AliasAnalysis *AA; 74 MachineDominatorTree *DT; 75 MachineRegisterInfo *MRI; 76 MachineBlockFrequencyInfo *MBFI; 77 78 public: 79 static char ID; // Pass identification 80 81 MachineCSE() : MachineFunctionPass(ID) { 82 initializeMachineCSEPass(*PassRegistry::getPassRegistry()); 83 } 84 85 bool runOnMachineFunction(MachineFunction &MF) override; 86 87 void getAnalysisUsage(AnalysisUsage &AU) const override { 88 AU.setPreservesCFG(); 89 MachineFunctionPass::getAnalysisUsage(AU); 90 AU.addRequired<AAResultsWrapperPass>(); 91 AU.addPreservedID(MachineLoopInfoID); 92 AU.addRequired<MachineDominatorTree>(); 93 AU.addPreserved<MachineDominatorTree>(); 94 AU.addRequired<MachineBlockFrequencyInfo>(); 95 AU.addPreserved<MachineBlockFrequencyInfo>(); 96 } 97 98 MachineFunctionProperties getRequiredProperties() const override { 99 return MachineFunctionProperties() 100 .set(MachineFunctionProperties::Property::IsSSA); 101 } 102 103 void releaseMemory() override { 104 ScopeMap.clear(); 105 PREMap.clear(); 106 Exps.clear(); 107 } 108 109 private: 110 using AllocatorTy = RecyclingAllocator<BumpPtrAllocator, 111 ScopedHashTableVal<MachineInstr *, unsigned>>; 112 using ScopedHTType = 113 ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait, 114 AllocatorTy>; 115 using ScopeType = ScopedHTType::ScopeTy; 116 using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>; 117 118 unsigned LookAheadLimit = 0; 119 DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap; 120 DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait> 121 PREMap; 122 ScopedHTType VNT; 123 SmallVector<MachineInstr *, 64> Exps; 124 unsigned CurrVN = 0; 125 126 bool PerformTrivialCopyPropagation(MachineInstr *MI, 127 MachineBasicBlock *MBB); 128 bool isPhysDefTriviallyDead(MCRegister Reg, 129 MachineBasicBlock::const_iterator I, 130 MachineBasicBlock::const_iterator E) const; 131 bool hasLivePhysRegDefUses(const MachineInstr *MI, 132 const MachineBasicBlock *MBB, 133 SmallSet<MCRegister, 8> &PhysRefs, 134 PhysDefVector &PhysDefs, bool &PhysUseDef) const; 135 bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 136 SmallSet<MCRegister, 8> &PhysRefs, 137 PhysDefVector &PhysDefs, bool &NonLocal) const; 138 bool isCSECandidate(MachineInstr *MI); 139 bool isProfitableToCSE(Register CSReg, Register Reg, 140 MachineBasicBlock *CSBB, MachineInstr *MI); 141 void EnterScope(MachineBasicBlock *MBB); 142 void ExitScope(MachineBasicBlock *MBB); 143 bool ProcessBlockCSE(MachineBasicBlock *MBB); 144 void ExitScopeIfDone(MachineDomTreeNode *Node, 145 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); 146 bool PerformCSE(MachineDomTreeNode *Node); 147 148 bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs); 149 bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); 150 bool PerformSimplePRE(MachineDominatorTree *DT); 151 /// Heuristics to see if it's profitable to move common computations of MBB 152 /// and MBB1 to CandidateBB. 153 bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB, 154 MachineBasicBlock *MBB, 155 MachineBasicBlock *MBB1); 156 }; 157 158 } // end anonymous namespace 159 160 char MachineCSE::ID = 0; 161 162 char &llvm::MachineCSEID = MachineCSE::ID; 163 164 INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, 165 "Machine Common Subexpression Elimination", false, false) 166 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 167 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 168 INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE, 169 "Machine Common Subexpression Elimination", false, false) 170 171 /// The source register of a COPY machine instruction can be propagated to all 172 /// its users, and this propagation could increase the probability of finding 173 /// common subexpressions. If the COPY has only one user, the COPY itself can 174 /// be removed. 175 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, 176 MachineBasicBlock *MBB) { 177 bool Changed = false; 178 for (MachineOperand &MO : MI->operands()) { 179 if (!MO.isReg() || !MO.isUse()) 180 continue; 181 Register Reg = MO.getReg(); 182 if (!Register::isVirtualRegister(Reg)) 183 continue; 184 bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); 185 MachineInstr *DefMI = MRI->getVRegDef(Reg); 186 if (!DefMI->isCopy()) 187 continue; 188 Register SrcReg = DefMI->getOperand(1).getReg(); 189 if (!Register::isVirtualRegister(SrcReg)) 190 continue; 191 if (DefMI->getOperand(0).getSubReg()) 192 continue; 193 // FIXME: We should trivially coalesce subregister copies to expose CSE 194 // opportunities on instructions with truncated operands (see 195 // cse-add-with-overflow.ll). This can be done here as follows: 196 // if (SrcSubReg) 197 // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, 198 // SrcSubReg); 199 // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); 200 // 201 // The 2-addr pass has been updated to handle coalesced subregs. However, 202 // some machine-specific code still can't handle it. 203 // To handle it properly we also need a way find a constrained subregister 204 // class given a super-reg class and subreg index. 205 if (DefMI->getOperand(1).getSubReg()) 206 continue; 207 if (!MRI->constrainRegAttrs(SrcReg, Reg)) 208 continue; 209 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); 210 LLVM_DEBUG(dbgs() << "*** to: " << *MI); 211 212 // Propagate SrcReg of copies to MI. 213 MO.setReg(SrcReg); 214 MRI->clearKillFlags(SrcReg); 215 // Coalesce single use copies. 216 if (OnlyOneUse) { 217 // If (and only if) we've eliminated all uses of the copy, also 218 // copy-propagate to any debug-users of MI, or they'll be left using 219 // an undefined value. 220 DefMI->changeDebugValuesDefReg(SrcReg); 221 222 DefMI->eraseFromParent(); 223 ++NumCoalesces; 224 } 225 Changed = true; 226 } 227 228 return Changed; 229 } 230 231 bool MachineCSE::isPhysDefTriviallyDead( 232 MCRegister Reg, MachineBasicBlock::const_iterator I, 233 MachineBasicBlock::const_iterator E) const { 234 unsigned LookAheadLeft = LookAheadLimit; 235 while (LookAheadLeft) { 236 // Skip over dbg_value's. 237 I = skipDebugInstructionsForward(I, E); 238 239 if (I == E) 240 // Reached end of block, we don't know if register is dead or not. 241 return false; 242 243 bool SeenDef = false; 244 for (const MachineOperand &MO : I->operands()) { 245 if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) 246 SeenDef = true; 247 if (!MO.isReg() || !MO.getReg()) 248 continue; 249 if (!TRI->regsOverlap(MO.getReg(), Reg)) 250 continue; 251 if (MO.isUse()) 252 // Found a use! 253 return false; 254 SeenDef = true; 255 } 256 if (SeenDef) 257 // See a def of Reg (or an alias) before encountering any use, it's 258 // trivially dead. 259 return true; 260 261 --LookAheadLeft; 262 ++I; 263 } 264 return false; 265 } 266 267 static bool isCallerPreservedOrConstPhysReg(MCRegister Reg, 268 const MachineFunction &MF, 269 const TargetRegisterInfo &TRI) { 270 // MachineRegisterInfo::isConstantPhysReg directly called by 271 // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the 272 // reserved registers to be frozen. That doesn't cause a problem post-ISel as 273 // most (if not all) targets freeze reserved registers right after ISel. 274 // 275 // It does cause issues mid-GlobalISel, however, hence the additional 276 // reservedRegsFrozen check. 277 const MachineRegisterInfo &MRI = MF.getRegInfo(); 278 return TRI.isCallerPreservedPhysReg(Reg, MF) || 279 (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg)); 280 } 281 282 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write 283 /// physical registers (except for dead defs of physical registers). It also 284 /// returns the physical register def by reference if it's the only one and the 285 /// instruction does not uses a physical register. 286 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, 287 const MachineBasicBlock *MBB, 288 SmallSet<MCRegister, 8> &PhysRefs, 289 PhysDefVector &PhysDefs, 290 bool &PhysUseDef) const { 291 // First, add all uses to PhysRefs. 292 for (const MachineOperand &MO : MI->operands()) { 293 if (!MO.isReg() || MO.isDef()) 294 continue; 295 Register Reg = MO.getReg(); 296 if (!Reg) 297 continue; 298 if (Register::isVirtualRegister(Reg)) 299 continue; 300 // Reading either caller preserved or constant physregs is ok. 301 if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), *MI->getMF(), *TRI)) 302 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 303 PhysRefs.insert(*AI); 304 } 305 306 // Next, collect all defs into PhysDefs. If any is already in PhysRefs 307 // (which currently contains only uses), set the PhysUseDef flag. 308 PhysUseDef = false; 309 MachineBasicBlock::const_iterator I = MI; I = std::next(I); 310 for (const auto &MOP : llvm::enumerate(MI->operands())) { 311 const MachineOperand &MO = MOP.value(); 312 if (!MO.isReg() || !MO.isDef()) 313 continue; 314 Register Reg = MO.getReg(); 315 if (!Reg) 316 continue; 317 if (Register::isVirtualRegister(Reg)) 318 continue; 319 // Check against PhysRefs even if the def is "dead". 320 if (PhysRefs.count(Reg.asMCReg())) 321 PhysUseDef = true; 322 // If the def is dead, it's ok. But the def may not marked "dead". That's 323 // common since this pass is run before livevariables. We can scan 324 // forward a few instructions and check if it is obviously dead. 325 if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end())) 326 PhysDefs.push_back(std::make_pair(MOP.index(), Reg)); 327 } 328 329 // Finally, add all defs to PhysRefs as well. 330 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) 331 for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid(); 332 ++AI) 333 PhysRefs.insert(*AI); 334 335 return !PhysRefs.empty(); 336 } 337 338 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, 339 SmallSet<MCRegister, 8> &PhysRefs, 340 PhysDefVector &PhysDefs, 341 bool &NonLocal) const { 342 // For now conservatively returns false if the common subexpression is 343 // not in the same basic block as the given instruction. The only exception 344 // is if the common subexpression is in the sole predecessor block. 345 const MachineBasicBlock *MBB = MI->getParent(); 346 const MachineBasicBlock *CSMBB = CSMI->getParent(); 347 348 bool CrossMBB = false; 349 if (CSMBB != MBB) { 350 if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) 351 return false; 352 353 for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { 354 if (MRI->isAllocatable(PhysDefs[i].second) || 355 MRI->isReserved(PhysDefs[i].second)) 356 // Avoid extending live range of physical registers if they are 357 //allocatable or reserved. 358 return false; 359 } 360 CrossMBB = true; 361 } 362 MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); 363 MachineBasicBlock::const_iterator E = MI; 364 MachineBasicBlock::const_iterator EE = CSMBB->end(); 365 unsigned LookAheadLeft = LookAheadLimit; 366 while (LookAheadLeft) { 367 // Skip over dbg_value's. 368 while (I != E && I != EE && I->isDebugInstr()) 369 ++I; 370 371 if (I == EE) { 372 assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); 373 (void)CrossMBB; 374 CrossMBB = false; 375 NonLocal = true; 376 I = MBB->begin(); 377 EE = MBB->end(); 378 continue; 379 } 380 381 if (I == E) 382 return true; 383 384 for (const MachineOperand &MO : I->operands()) { 385 // RegMasks go on instructions like calls that clobber lots of physregs. 386 // Don't attempt to CSE across such an instruction. 387 if (MO.isRegMask()) 388 return false; 389 if (!MO.isReg() || !MO.isDef()) 390 continue; 391 Register MOReg = MO.getReg(); 392 if (Register::isVirtualRegister(MOReg)) 393 continue; 394 if (PhysRefs.count(MOReg.asMCReg())) 395 return false; 396 } 397 398 --LookAheadLeft; 399 ++I; 400 } 401 402 return false; 403 } 404 405 bool MachineCSE::isCSECandidate(MachineInstr *MI) { 406 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || 407 MI->isInlineAsm() || MI->isDebugInstr()) 408 return false; 409 410 // Ignore copies. 411 if (MI->isCopyLike()) 412 return false; 413 414 // Ignore stuff that we obviously can't move. 415 if (MI->mayStore() || MI->isCall() || MI->isTerminator() || 416 MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects()) 417 return false; 418 419 if (MI->mayLoad()) { 420 // Okay, this instruction does a load. As a refinement, we allow the target 421 // to decide whether the loaded value is actually a constant. If so, we can 422 // actually use it as a load. 423 if (!MI->isDereferenceableInvariantLoad()) 424 // FIXME: we should be able to hoist loads with no other side effects if 425 // there are no other instructions which can change memory in this loop. 426 // This is a trivial form of alias analysis. 427 return false; 428 } 429 430 // Ignore stack guard loads, otherwise the register that holds CSEed value may 431 // be spilled and get loaded back with corrupted data. 432 if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) 433 return false; 434 435 return true; 436 } 437 438 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a 439 /// common expression that defines Reg. CSBB is basic block where CSReg is 440 /// defined. 441 bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg, 442 MachineBasicBlock *CSBB, MachineInstr *MI) { 443 // FIXME: Heuristics that works around the lack the live range splitting. 444 445 // If CSReg is used at all uses of Reg, CSE should not increase register 446 // pressure of CSReg. 447 bool MayIncreasePressure = true; 448 if (Register::isVirtualRegister(CSReg) && Register::isVirtualRegister(Reg)) { 449 MayIncreasePressure = false; 450 SmallPtrSet<MachineInstr*, 8> CSUses; 451 int NumOfUses = 0; 452 for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { 453 CSUses.insert(&MI); 454 // Too costly to compute if NumOfUses is very large. Conservatively assume 455 // MayIncreasePressure to avoid spending too much time here. 456 if (++NumOfUses > CSUsesThreshold) { 457 MayIncreasePressure = true; 458 break; 459 } 460 } 461 if (!MayIncreasePressure) 462 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 463 if (!CSUses.count(&MI)) { 464 MayIncreasePressure = true; 465 break; 466 } 467 } 468 } 469 if (!MayIncreasePressure) return true; 470 471 // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in 472 // an immediate predecessor. We don't want to increase register pressure and 473 // end up causing other computation to be spilled. 474 if (TII->isAsCheapAsAMove(*MI)) { 475 MachineBasicBlock *BB = MI->getParent(); 476 if (CSBB != BB && !CSBB->isSuccessor(BB)) 477 return false; 478 } 479 480 // Heuristics #2: If the expression doesn't not use a vr and the only use 481 // of the redundant computation are copies, do not cse. 482 bool HasVRegUse = false; 483 for (const MachineOperand &MO : MI->operands()) { 484 if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) { 485 HasVRegUse = true; 486 break; 487 } 488 } 489 if (!HasVRegUse) { 490 bool HasNonCopyUse = false; 491 for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 492 // Ignore copies. 493 if (!MI.isCopyLike()) { 494 HasNonCopyUse = true; 495 break; 496 } 497 } 498 if (!HasNonCopyUse) 499 return false; 500 } 501 502 // Heuristics #3: If the common subexpression is used by PHIs, do not reuse 503 // it unless the defined value is already used in the BB of the new use. 504 bool HasPHI = false; 505 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) { 506 HasPHI |= UseMI.isPHI(); 507 if (UseMI.getParent() == MI->getParent()) 508 return true; 509 } 510 511 return !HasPHI; 512 } 513 514 void MachineCSE::EnterScope(MachineBasicBlock *MBB) { 515 LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 516 ScopeType *Scope = new ScopeType(VNT); 517 ScopeMap[MBB] = Scope; 518 } 519 520 void MachineCSE::ExitScope(MachineBasicBlock *MBB) { 521 LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 522 DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); 523 assert(SI != ScopeMap.end()); 524 delete SI->second; 525 ScopeMap.erase(SI); 526 } 527 528 bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) { 529 bool Changed = false; 530 531 SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; 532 SmallVector<unsigned, 2> ImplicitDefsToUpdate; 533 SmallVector<unsigned, 2> ImplicitDefs; 534 for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { 535 if (!isCSECandidate(&MI)) 536 continue; 537 538 bool FoundCSE = VNT.count(&MI); 539 if (!FoundCSE) { 540 // Using trivial copy propagation to find more CSE opportunities. 541 if (PerformTrivialCopyPropagation(&MI, MBB)) { 542 Changed = true; 543 544 // After coalescing MI itself may become a copy. 545 if (MI.isCopyLike()) 546 continue; 547 548 // Try again to see if CSE is possible. 549 FoundCSE = VNT.count(&MI); 550 } 551 } 552 553 // Commute commutable instructions. 554 bool Commuted = false; 555 if (!FoundCSE && MI.isCommutable()) { 556 if (MachineInstr *NewMI = TII->commuteInstruction(MI)) { 557 Commuted = true; 558 FoundCSE = VNT.count(NewMI); 559 if (NewMI != &MI) { 560 // New instruction. It doesn't need to be kept. 561 NewMI->eraseFromParent(); 562 Changed = true; 563 } else if (!FoundCSE) 564 // MI was changed but it didn't help, commute it back! 565 (void)TII->commuteInstruction(MI); 566 } 567 } 568 569 // If the instruction defines physical registers and the values *may* be 570 // used, then it's not safe to replace it with a common subexpression. 571 // It's also not safe if the instruction uses physical registers. 572 bool CrossMBBPhysDef = false; 573 SmallSet<MCRegister, 8> PhysRefs; 574 PhysDefVector PhysDefs; 575 bool PhysUseDef = false; 576 if (FoundCSE && 577 hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) { 578 FoundCSE = false; 579 580 // ... Unless the CS is local or is in the sole predecessor block 581 // and it also defines the physical register which is not clobbered 582 // in between and the physical register uses were not clobbered. 583 // This can never be the case if the instruction both uses and 584 // defines the same physical register, which was detected above. 585 if (!PhysUseDef) { 586 unsigned CSVN = VNT.lookup(&MI); 587 MachineInstr *CSMI = Exps[CSVN]; 588 if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) 589 FoundCSE = true; 590 } 591 } 592 593 if (!FoundCSE) { 594 VNT.insert(&MI, CurrVN++); 595 Exps.push_back(&MI); 596 continue; 597 } 598 599 // Found a common subexpression, eliminate it. 600 unsigned CSVN = VNT.lookup(&MI); 601 MachineInstr *CSMI = Exps[CSVN]; 602 LLVM_DEBUG(dbgs() << "Examining: " << MI); 603 LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); 604 605 // Prevent CSE-ing non-local convergent instructions. 606 // LLVM's current definition of `isConvergent` does not necessarily prove 607 // that non-local CSE is illegal. The following check extends the definition 608 // of `isConvergent` to assume a convergent instruction is dependent not 609 // only on additional conditions, but also on fewer conditions. LLVM does 610 // not have a MachineInstr attribute which expresses this extended 611 // definition, so it's necessary to use `isConvergent` to prevent illegally 612 // CSE-ing the subset of `isConvergent` instructions which do fall into this 613 // extended definition. 614 if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) { 615 LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in " 616 "different BBs, avoid CSE!\n"); 617 VNT.insert(&MI, CurrVN++); 618 Exps.push_back(&MI); 619 continue; 620 } 621 622 // Check if it's profitable to perform this CSE. 623 bool DoCSE = true; 624 unsigned NumDefs = MI.getNumDefs(); 625 626 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 627 MachineOperand &MO = MI.getOperand(i); 628 if (!MO.isReg() || !MO.isDef()) 629 continue; 630 Register OldReg = MO.getReg(); 631 Register NewReg = CSMI->getOperand(i).getReg(); 632 633 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 634 // we should make sure it is not dead at CSMI. 635 if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) 636 ImplicitDefsToUpdate.push_back(i); 637 638 // Keep track of implicit defs of CSMI and MI, to clear possibly 639 // made-redundant kill flags. 640 if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) 641 ImplicitDefs.push_back(OldReg); 642 643 if (OldReg == NewReg) { 644 --NumDefs; 645 continue; 646 } 647 648 assert(Register::isVirtualRegister(OldReg) && 649 Register::isVirtualRegister(NewReg) && 650 "Do not CSE physical register defs!"); 651 652 if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) { 653 LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); 654 DoCSE = false; 655 break; 656 } 657 658 // Don't perform CSE if the result of the new instruction cannot exist 659 // within the constraints (register class, bank, or low-level type) of 660 // the old instruction. 661 if (!MRI->constrainRegAttrs(NewReg, OldReg)) { 662 LLVM_DEBUG( 663 dbgs() << "*** Not the same register constraints, avoid CSE!\n"); 664 DoCSE = false; 665 break; 666 } 667 668 CSEPairs.push_back(std::make_pair(OldReg, NewReg)); 669 --NumDefs; 670 } 671 672 // Actually perform the elimination. 673 if (DoCSE) { 674 for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) { 675 unsigned OldReg = CSEPair.first; 676 unsigned NewReg = CSEPair.second; 677 // OldReg may have been unused but is used now, clear the Dead flag 678 MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); 679 assert(Def != nullptr && "CSEd register has no unique definition?"); 680 Def->clearRegisterDeads(NewReg); 681 // Replace with NewReg and clear kill flags which may be wrong now. 682 MRI->replaceRegWith(OldReg, NewReg); 683 MRI->clearKillFlags(NewReg); 684 } 685 686 // Go through implicit defs of CSMI and MI, if a def is not dead at MI, 687 // we should make sure it is not dead at CSMI. 688 for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) 689 CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); 690 for (const auto &PhysDef : PhysDefs) 691 if (!MI.getOperand(PhysDef.first).isDead()) 692 CSMI->getOperand(PhysDef.first).setIsDead(false); 693 694 // Go through implicit defs of CSMI and MI, and clear the kill flags on 695 // their uses in all the instructions between CSMI and MI. 696 // We might have made some of the kill flags redundant, consider: 697 // subs ... implicit-def %nzcv <- CSMI 698 // csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore 699 // subs ... implicit-def %nzcv <- MI, to be eliminated 700 // csinc ... implicit killed %nzcv 701 // Since we eliminated MI, and reused a register imp-def'd by CSMI 702 // (here %nzcv), that register, if it was killed before MI, should have 703 // that kill flag removed, because it's lifetime was extended. 704 if (CSMI->getParent() == MI.getParent()) { 705 for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II) 706 for (auto ImplicitDef : ImplicitDefs) 707 if (MachineOperand *MO = II->findRegisterUseOperand( 708 ImplicitDef, /*isKill=*/true, TRI)) 709 MO->setIsKill(false); 710 } else { 711 // If the instructions aren't in the same BB, bail out and clear the 712 // kill flag on all uses of the imp-def'd register. 713 for (auto ImplicitDef : ImplicitDefs) 714 MRI->clearKillFlags(ImplicitDef); 715 } 716 717 if (CrossMBBPhysDef) { 718 // Add physical register defs now coming in from a predecessor to MBB 719 // livein list. 720 while (!PhysDefs.empty()) { 721 auto LiveIn = PhysDefs.pop_back_val(); 722 if (!MBB->isLiveIn(LiveIn.second)) 723 MBB->addLiveIn(LiveIn.second); 724 } 725 ++NumCrossBBCSEs; 726 } 727 728 MI.eraseFromParent(); 729 ++NumCSEs; 730 if (!PhysRefs.empty()) 731 ++NumPhysCSEs; 732 if (Commuted) 733 ++NumCommutes; 734 Changed = true; 735 } else { 736 VNT.insert(&MI, CurrVN++); 737 Exps.push_back(&MI); 738 } 739 CSEPairs.clear(); 740 ImplicitDefsToUpdate.clear(); 741 ImplicitDefs.clear(); 742 } 743 744 return Changed; 745 } 746 747 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given 748 /// dominator tree node if its a leaf or all of its children are done. Walk 749 /// up the dominator tree to destroy ancestors which are now done. 750 void 751 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, 752 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) { 753 if (OpenChildren[Node]) 754 return; 755 756 // Pop scope. 757 ExitScope(Node->getBlock()); 758 759 // Now traverse upwards to pop ancestors whose offsprings are all done. 760 while (MachineDomTreeNode *Parent = Node->getIDom()) { 761 unsigned Left = --OpenChildren[Parent]; 762 if (Left != 0) 763 break; 764 ExitScope(Parent->getBlock()); 765 Node = Parent; 766 } 767 } 768 769 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { 770 SmallVector<MachineDomTreeNode*, 32> Scopes; 771 SmallVector<MachineDomTreeNode*, 8> WorkList; 772 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 773 774 CurrVN = 0; 775 776 // Perform a DFS walk to determine the order of visit. 777 WorkList.push_back(Node); 778 do { 779 Node = WorkList.pop_back_val(); 780 Scopes.push_back(Node); 781 OpenChildren[Node] = Node->getNumChildren(); 782 append_range(WorkList, Node->children()); 783 } while (!WorkList.empty()); 784 785 // Now perform CSE. 786 bool Changed = false; 787 for (MachineDomTreeNode *Node : Scopes) { 788 MachineBasicBlock *MBB = Node->getBlock(); 789 EnterScope(MBB); 790 Changed |= ProcessBlockCSE(MBB); 791 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 792 ExitScopeIfDone(Node, OpenChildren); 793 } 794 795 return Changed; 796 } 797 798 // We use stronger checks for PRE candidate rather than for CSE ones to embrace 799 // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps 800 // to exclude instrs created by PRE that won't be CSEed later. 801 bool MachineCSE::isPRECandidate(MachineInstr *MI, 802 SmallSet<MCRegister, 8> &PhysRefs) { 803 if (!isCSECandidate(MI) || 804 MI->isNotDuplicable() || 805 MI->mayLoad() || 806 TII->isAsCheapAsAMove(*MI) || 807 MI->getNumDefs() != 1 || 808 MI->getNumExplicitDefs() != 1) 809 return false; 810 811 for (const auto &def : MI->defs()) 812 if (!Register::isVirtualRegister(def.getReg())) 813 return false; 814 815 for (const auto &use : MI->uses()) 816 if (use.isReg() && !Register::isVirtualRegister(use.getReg())) 817 PhysRefs.insert(use.getReg()); 818 819 return true; 820 } 821 822 bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, 823 MachineBasicBlock *MBB) { 824 bool Changed = false; 825 for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { 826 SmallSet<MCRegister, 8> PhysRefs; 827 if (!isPRECandidate(&MI, PhysRefs)) 828 continue; 829 830 if (!PREMap.count(&MI)) { 831 PREMap[&MI] = MBB; 832 continue; 833 } 834 835 auto MBB1 = PREMap[&MI]; 836 assert( 837 !DT->properlyDominates(MBB, MBB1) && 838 "MBB cannot properly dominate MBB1 while DFS through dominators tree!"); 839 auto CMBB = DT->findNearestCommonDominator(MBB, MBB1); 840 if (!CMBB->isLegalToHoistInto()) 841 continue; 842 843 if (!isProfitableToHoistInto(CMBB, MBB, MBB1)) 844 continue; 845 846 // Two instrs are partial redundant if their basic blocks are reachable 847 // from one to another but one doesn't dominate another. 848 if (CMBB != MBB1) { 849 auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock(); 850 if (BB != nullptr && BB1 != nullptr && 851 (isPotentiallyReachable(BB1, BB) || 852 isPotentiallyReachable(BB, BB1))) { 853 // The following check extends the definition of `isConvergent` to 854 // assume a convergent instruction is dependent not only on additional 855 // conditions, but also on fewer conditions. LLVM does not have a 856 // MachineInstr attribute which expresses this extended definition, so 857 // it's necessary to use `isConvergent` to prevent illegally PRE-ing the 858 // subset of `isConvergent` instructions which do fall into this 859 // extended definition. 860 if (MI.isConvergent() && CMBB != MBB) 861 continue; 862 863 // If this instruction uses physical registers then we can only do PRE 864 // if it's using the value that is live at the place we're hoisting to. 865 bool NonLocal; 866 PhysDefVector PhysDefs; 867 if (!PhysRefs.empty() && 868 !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs, 869 PhysDefs, NonLocal)) 870 continue; 871 872 assert(MI.getOperand(0).isDef() && 873 "First operand of instr with one explicit def must be this def"); 874 Register VReg = MI.getOperand(0).getReg(); 875 Register NewReg = MRI->cloneVirtualRegister(VReg); 876 if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI)) 877 continue; 878 MachineInstr &NewMI = 879 TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI); 880 881 // When hoisting, make sure we don't carry the debug location of 882 // the original instruction, as that's not correct and can cause 883 // unexpected jumps when debugging optimized code. 884 auto EmptyDL = DebugLoc(); 885 NewMI.setDebugLoc(EmptyDL); 886 887 NewMI.getOperand(0).setReg(NewReg); 888 889 PREMap[&MI] = CMBB; 890 ++NumPREs; 891 Changed = true; 892 } 893 } 894 } 895 return Changed; 896 } 897 898 // This simple PRE (partial redundancy elimination) pass doesn't actually 899 // eliminate partial redundancy but transforms it to full redundancy, 900 // anticipating that the next CSE step will eliminate this created redundancy. 901 // If CSE doesn't eliminate this, than created instruction will remain dead 902 // and eliminated later by Remove Dead Machine Instructions pass. 903 bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) { 904 SmallVector<MachineDomTreeNode *, 32> BBs; 905 906 PREMap.clear(); 907 bool Changed = false; 908 BBs.push_back(DT->getRootNode()); 909 do { 910 auto Node = BBs.pop_back_val(); 911 append_range(BBs, Node->children()); 912 913 MachineBasicBlock *MBB = Node->getBlock(); 914 Changed |= ProcessBlockPRE(DT, MBB); 915 916 } while (!BBs.empty()); 917 918 return Changed; 919 } 920 921 bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB, 922 MachineBasicBlock *MBB, 923 MachineBasicBlock *MBB1) { 924 if (CandidateBB->getParent()->getFunction().hasMinSize()) 925 return true; 926 assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB"); 927 assert(DT->dominates(CandidateBB, MBB1) && 928 "CandidateBB should dominate MBB1"); 929 return MBFI->getBlockFreq(CandidateBB) <= 930 MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1); 931 } 932 933 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { 934 if (skipFunction(MF.getFunction())) 935 return false; 936 937 TII = MF.getSubtarget().getInstrInfo(); 938 TRI = MF.getSubtarget().getRegisterInfo(); 939 MRI = &MF.getRegInfo(); 940 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 941 DT = &getAnalysis<MachineDominatorTree>(); 942 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 943 LookAheadLimit = TII->getMachineCSELookAheadLimit(); 944 bool ChangedPRE, ChangedCSE; 945 ChangedPRE = PerformSimplePRE(DT); 946 ChangedCSE = PerformCSE(DT->getRootNode()); 947 return ChangedPRE || ChangedCSE; 948 } 949