1 //===- MachineSink.cpp - Sinking for machine instructions -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass moves instructions into successor blocks when possible, so that 10 // they aren't executed on paths where their results aren't needed. 11 // 12 // This pass is not intended to be a replacement or a complete alternative 13 // for an LLVM-IR-level sinking pass. It is only designed to sink simple 14 // constructs that are not exposed before lowering and instruction selection. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/DepthFirstIterator.h" 20 #include "llvm/ADT/MapVector.h" 21 #include "llvm/ADT/PointerIntPair.h" 22 #include "llvm/ADT/PostOrderIterator.h" 23 #include "llvm/ADT/SetVector.h" 24 #include "llvm/ADT/SmallSet.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/Analysis/CFG.h" 29 #include "llvm/Analysis/ProfileSummaryInfo.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 32 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 33 #include "llvm/CodeGen/MachineCycleAnalysis.h" 34 #include "llvm/CodeGen/MachineDominators.h" 35 #include "llvm/CodeGen/MachineFunction.h" 36 #include "llvm/CodeGen/MachineFunctionPass.h" 37 #include "llvm/CodeGen/MachineInstr.h" 38 #include "llvm/CodeGen/MachineLoopInfo.h" 39 #include "llvm/CodeGen/MachineOperand.h" 40 #include "llvm/CodeGen/MachinePostDominators.h" 41 #include "llvm/CodeGen/MachineRegisterInfo.h" 42 #include "llvm/CodeGen/MachineSizeOpts.h" 43 #include "llvm/CodeGen/RegisterClassInfo.h" 44 #include "llvm/CodeGen/RegisterPressure.h" 45 #include "llvm/CodeGen/TargetInstrInfo.h" 46 #include "llvm/CodeGen/TargetPassConfig.h" 47 #include "llvm/CodeGen/TargetRegisterInfo.h" 48 #include "llvm/CodeGen/TargetSubtargetInfo.h" 49 #include "llvm/IR/BasicBlock.h" 50 #include "llvm/IR/DebugInfoMetadata.h" 51 #include "llvm/IR/LLVMContext.h" 52 #include "llvm/InitializePasses.h" 53 #include "llvm/MC/MCRegisterInfo.h" 54 #include "llvm/Pass.h" 55 #include "llvm/Support/BranchProbability.h" 56 #include "llvm/Support/CommandLine.h" 57 #include "llvm/Support/Debug.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include <algorithm> 60 #include <cassert> 61 #include <cstdint> 62 #include <utility> 63 #include <vector> 64 65 using namespace llvm; 66 67 #define DEBUG_TYPE "machine-sink" 68 69 static cl::opt<bool> 70 SplitEdges("machine-sink-split", 71 cl::desc("Split critical edges during machine sinking"), 72 cl::init(true), cl::Hidden); 73 74 static cl::opt<bool> 75 UseBlockFreqInfo("machine-sink-bfi", 76 cl::desc("Use block frequency info to find successors to sink"), 77 cl::init(true), cl::Hidden); 78 79 static cl::opt<unsigned> SplitEdgeProbabilityThreshold( 80 "machine-sink-split-probability-threshold", 81 cl::desc( 82 "Percentage threshold for splitting single-instruction critical edge. " 83 "If the branch threshold is higher than this threshold, we allow " 84 "speculative execution of up to 1 instruction to avoid branching to " 85 "splitted critical edge"), 86 cl::init(40), cl::Hidden); 87 88 static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold( 89 "machine-sink-load-instrs-threshold", 90 cl::desc("Do not try to find alias store for a load if there is a in-path " 91 "block whose instruction number is higher than this threshold."), 92 cl::init(2000), cl::Hidden); 93 94 static cl::opt<unsigned> SinkLoadBlocksThreshold( 95 "machine-sink-load-blocks-threshold", 96 cl::desc("Do not try to find alias store for a load if the block number in " 97 "the straight line is higher than this threshold."), 98 cl::init(20), cl::Hidden); 99 100 static cl::opt<bool> 101 SinkInstsIntoCycle("sink-insts-to-avoid-spills", 102 cl::desc("Sink instructions into cycles to avoid " 103 "register spills"), 104 cl::init(false), cl::Hidden); 105 106 static cl::opt<unsigned> SinkIntoCycleLimit( 107 "machine-sink-cycle-limit", 108 cl::desc("The maximum number of instructions considered for cycle sinking."), 109 cl::init(50), cl::Hidden); 110 111 STATISTIC(NumSunk, "Number of machine instructions sunk"); 112 STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle"); 113 STATISTIC(NumSplit, "Number of critical edges split"); 114 STATISTIC(NumCoalesces, "Number of copies coalesced"); 115 STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); 116 117 namespace { 118 119 class MachineSinking : public MachineFunctionPass { 120 const TargetSubtargetInfo *STI = nullptr; 121 const TargetInstrInfo *TII = nullptr; 122 const TargetRegisterInfo *TRI = nullptr; 123 MachineRegisterInfo *MRI = nullptr; // Machine register information 124 MachineDominatorTree *DT = nullptr; // Machine dominator tree 125 MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree 126 MachineCycleInfo *CI = nullptr; 127 ProfileSummaryInfo *PSI = nullptr; 128 MachineBlockFrequencyInfo *MBFI = nullptr; 129 const MachineBranchProbabilityInfo *MBPI = nullptr; 130 AliasAnalysis *AA = nullptr; 131 RegisterClassInfo RegClassInfo; 132 133 // Remember which edges have been considered for breaking. 134 SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8> 135 CEBCandidates; 136 // Memorize the register that also wanted to sink into the same block along 137 // a different critical edge. 138 // {register to sink, sink-to block} -> the first sink-from block. 139 // We're recording the first sink-from block because that (critical) edge 140 // was deferred until we see another register that's going to sink into the 141 // same block. 142 DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *> 143 CEMergeCandidates; 144 // Remember which edges we are about to split. 145 // This is different from CEBCandidates since those edges 146 // will be split. 147 SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit; 148 149 DenseSet<Register> RegsToClearKillFlags; 150 151 using AllSuccsCache = 152 SmallDenseMap<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>; 153 154 /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is 155 /// post-dominated by another DBG_VALUE of the same variable location. 156 /// This is necessary to detect sequences such as: 157 /// %0 = someinst 158 /// DBG_VALUE %0, !123, !DIExpression() 159 /// %1 = anotherinst 160 /// DBG_VALUE %1, !123, !DIExpression() 161 /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that 162 /// would re-order assignments. 163 using SeenDbgUser = PointerIntPair<MachineInstr *, 1>; 164 165 /// Record of DBG_VALUE uses of vregs in a block, so that we can identify 166 /// debug instructions to sink. 167 SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers; 168 169 /// Record of debug variables that have had their locations set in the 170 /// current block. 171 DenseSet<DebugVariable> SeenDbgVars; 172 173 DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool> 174 HasStoreCache; 175 176 DenseMap<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 177 SmallVector<MachineInstr *>> 178 StoreInstrCache; 179 180 /// Cached BB's register pressure. 181 DenseMap<const MachineBasicBlock *, std::vector<unsigned>> 182 CachedRegisterPressure; 183 184 bool EnableSinkAndFold; 185 186 public: 187 static char ID; // Pass identification 188 189 MachineSinking() : MachineFunctionPass(ID) { 190 initializeMachineSinkingPass(*PassRegistry::getPassRegistry()); 191 } 192 193 bool runOnMachineFunction(MachineFunction &MF) override; 194 195 void getAnalysisUsage(AnalysisUsage &AU) const override { 196 MachineFunctionPass::getAnalysisUsage(AU); 197 AU.addRequired<AAResultsWrapperPass>(); 198 AU.addRequired<MachineDominatorTreeWrapperPass>(); 199 AU.addRequired<MachinePostDominatorTreeWrapperPass>(); 200 AU.addRequired<MachineCycleInfoWrapperPass>(); 201 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); 202 AU.addPreserved<MachineCycleInfoWrapperPass>(); 203 AU.addPreserved<MachineLoopInfoWrapperPass>(); 204 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 205 if (UseBlockFreqInfo) 206 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>(); 207 AU.addRequired<TargetPassConfig>(); 208 } 209 210 void releaseMemory() override { 211 CEBCandidates.clear(); 212 CEMergeCandidates.clear(); 213 } 214 215 private: 216 bool ProcessBlock(MachineBasicBlock &MBB); 217 void ProcessDbgInst(MachineInstr &MI); 218 bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, 219 MachineBasicBlock *To, bool BreakPHIEdge); 220 bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, 221 MachineBasicBlock *To, 222 MachineBasicBlock *&DeferredFromBlock); 223 224 bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To, 225 MachineInstr &MI); 226 227 /// Postpone the splitting of the given critical 228 /// edge (\p From, \p To). 229 /// 230 /// We do not split the edges on the fly. Indeed, this invalidates 231 /// the dominance information and thus triggers a lot of updates 232 /// of that information underneath. 233 /// Instead, we postpone all the splits after each iteration of 234 /// the main loop. That way, the information is at least valid 235 /// for the lifetime of an iteration. 236 /// 237 /// \return True if the edge is marked as toSplit, false otherwise. 238 /// False can be returned if, for instance, this is not profitable. 239 bool PostponeSplitCriticalEdge(MachineInstr &MI, 240 MachineBasicBlock *From, 241 MachineBasicBlock *To, 242 bool BreakPHIEdge); 243 bool SinkInstruction(MachineInstr &MI, bool &SawStore, 244 AllSuccsCache &AllSuccessors); 245 246 /// If we sink a COPY inst, some debug users of it's destination may no 247 /// longer be dominated by the COPY, and will eventually be dropped. 248 /// This is easily rectified by forwarding the non-dominated debug uses 249 /// to the copy source. 250 void SalvageUnsunkDebugUsersOfCopy(MachineInstr &, 251 MachineBasicBlock *TargetBlock); 252 bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB, 253 MachineBasicBlock *DefMBB, bool &BreakPHIEdge, 254 bool &LocalUse) const; 255 MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, 256 bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); 257 258 void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, 259 SmallVectorImpl<MachineInstr *> &Candidates); 260 bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I); 261 262 bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, 263 MachineBasicBlock *MBB, 264 MachineBasicBlock *SuccToSinkTo, 265 AllSuccsCache &AllSuccessors); 266 267 bool PerformTrivialForwardCoalescing(MachineInstr &MI, 268 MachineBasicBlock *MBB); 269 270 bool PerformSinkAndFold(MachineInstr &MI, MachineBasicBlock *MBB); 271 272 SmallVector<MachineBasicBlock *, 4> & 273 GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, 274 AllSuccsCache &AllSuccessors) const; 275 276 std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB); 277 278 bool registerPressureSetExceedsLimit(unsigned NRegs, 279 const TargetRegisterClass *RC, 280 const MachineBasicBlock &MBB); 281 }; 282 283 } // end anonymous namespace 284 285 char MachineSinking::ID = 0; 286 287 char &llvm::MachineSinkingID = MachineSinking::ID; 288 289 INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, 290 "Machine code sinking", false, false) 291 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 292 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) 293 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 294 INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass) 295 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 296 INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, 297 "Machine code sinking", false, false) 298 299 /// Return true if a target defined block prologue instruction interferes 300 /// with a sink candidate. 301 static bool blockPrologueInterferes(const MachineBasicBlock *BB, 302 MachineBasicBlock::const_iterator End, 303 const MachineInstr &MI, 304 const TargetRegisterInfo *TRI, 305 const TargetInstrInfo *TII, 306 const MachineRegisterInfo *MRI) { 307 for (MachineBasicBlock::const_iterator PI = BB->getFirstNonPHI(); PI != End; 308 ++PI) { 309 // Only check target defined prologue instructions 310 if (!TII->isBasicBlockPrologue(*PI)) 311 continue; 312 for (auto &MO : MI.operands()) { 313 if (!MO.isReg()) 314 continue; 315 Register Reg = MO.getReg(); 316 if (!Reg) 317 continue; 318 if (MO.isUse()) { 319 if (Reg.isPhysical() && 320 (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg)))) 321 continue; 322 if (PI->modifiesRegister(Reg, TRI)) 323 return true; 324 } else { 325 if (PI->readsRegister(Reg, TRI)) 326 return true; 327 // Check for interference with non-dead defs 328 auto *DefOp = PI->findRegisterDefOperand(Reg, TRI, false, true); 329 if (DefOp && !DefOp->isDead()) 330 return true; 331 } 332 } 333 } 334 335 return false; 336 } 337 338 bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, 339 MachineBasicBlock *MBB) { 340 if (!MI.isCopy()) 341 return false; 342 343 Register SrcReg = MI.getOperand(1).getReg(); 344 Register DstReg = MI.getOperand(0).getReg(); 345 if (!SrcReg.isVirtual() || !DstReg.isVirtual() || 346 !MRI->hasOneNonDBGUse(SrcReg)) 347 return false; 348 349 const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); 350 const TargetRegisterClass *DRC = MRI->getRegClass(DstReg); 351 if (SRC != DRC) 352 return false; 353 354 MachineInstr *DefMI = MRI->getVRegDef(SrcReg); 355 if (DefMI->isCopyLike()) 356 return false; 357 LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); 358 LLVM_DEBUG(dbgs() << "*** to: " << MI); 359 MRI->replaceRegWith(DstReg, SrcReg); 360 MI.eraseFromParent(); 361 362 // Conservatively, clear any kill flags, since it's possible that they are no 363 // longer correct. 364 MRI->clearKillFlags(SrcReg); 365 366 ++NumCoalesces; 367 return true; 368 } 369 370 bool MachineSinking::PerformSinkAndFold(MachineInstr &MI, 371 MachineBasicBlock *MBB) { 372 if (MI.isCopy() || MI.mayLoadOrStore() || 373 MI.getOpcode() == TargetOpcode::REG_SEQUENCE) 374 return false; 375 376 // Don't sink instructions that the target prefers not to sink. 377 if (!TII->shouldSink(MI)) 378 return false; 379 380 // Check if it's safe to move the instruction. 381 bool SawStore = true; 382 if (!MI.isSafeToMove(SawStore)) 383 return false; 384 385 // Convergent operations may not be made control-dependent on additional 386 // values. 387 if (MI.isConvergent()) 388 return false; 389 390 // Don't sink defs/uses of hard registers or if the instruction defines more 391 // than one register. 392 // Don't sink more than two register uses - it'll cover most of the cases and 393 // greatly simplifies the register pressure checks. 394 Register DefReg; 395 Register UsedRegA, UsedRegB; 396 for (const MachineOperand &MO : MI.operands()) { 397 if (MO.isImm() || MO.isRegMask() || MO.isRegLiveOut() || MO.isMetadata() || 398 MO.isMCSymbol() || MO.isDbgInstrRef() || MO.isCFIIndex() || 399 MO.isIntrinsicID() || MO.isPredicate() || MO.isShuffleMask()) 400 continue; 401 if (!MO.isReg()) 402 return false; 403 404 Register Reg = MO.getReg(); 405 if (Reg == 0) 406 continue; 407 408 if (Reg.isVirtual()) { 409 if (MO.isDef()) { 410 if (DefReg) 411 return false; 412 DefReg = Reg; 413 continue; 414 } 415 416 if (UsedRegA == 0) 417 UsedRegA = Reg; 418 else if (UsedRegB == 0) 419 UsedRegB = Reg; 420 else 421 return false; 422 continue; 423 } 424 425 if (Reg.isPhysical() && MO.isUse() && 426 (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO))) 427 continue; 428 429 return false; 430 } 431 432 // Scan uses of the destination register. Every use, except the last, must be 433 // a copy, with a chain of copies terminating with either a copy into a hard 434 // register, or a load/store instruction where the use is part of the 435 // address (*not* the stored value). 436 using SinkInfo = std::pair<MachineInstr *, ExtAddrMode>; 437 SmallVector<SinkInfo> SinkInto; 438 SmallVector<Register> Worklist; 439 440 const TargetRegisterClass *RC = MRI->getRegClass(DefReg); 441 const TargetRegisterClass *RCA = 442 UsedRegA == 0 ? nullptr : MRI->getRegClass(UsedRegA); 443 const TargetRegisterClass *RCB = 444 UsedRegB == 0 ? nullptr : MRI->getRegClass(UsedRegB); 445 446 Worklist.push_back(DefReg); 447 while (!Worklist.empty()) { 448 Register Reg = Worklist.pop_back_val(); 449 450 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { 451 ExtAddrMode MaybeAM; 452 MachineInstr &UseInst = *MO.getParent(); 453 if (UseInst.isCopy()) { 454 Register DstReg; 455 if (const MachineOperand &O = UseInst.getOperand(0); O.isReg()) 456 DstReg = O.getReg(); 457 if (DstReg == 0) 458 return false; 459 if (DstReg.isVirtual()) { 460 Worklist.push_back(DstReg); 461 continue; 462 } 463 // If we are going to replace a copy, the original instruction must be 464 // as cheap as a copy. 465 if (!TII->isAsCheapAsAMove(MI)) 466 return false; 467 // The hard register must be in the register class of the original 468 // instruction's destination register. 469 if (!RC->contains(DstReg)) 470 return false; 471 } else if (UseInst.mayLoadOrStore()) { 472 ExtAddrMode AM; 473 if (!TII->canFoldIntoAddrMode(UseInst, Reg, MI, AM)) 474 return false; 475 MaybeAM = AM; 476 } else { 477 return false; 478 } 479 480 if (UseInst.getParent() != MI.getParent()) { 481 // If the register class of the register we are replacing is a superset 482 // of any of the register classes of the operands of the materialized 483 // instruction don't consider that live range extended. 484 const TargetRegisterClass *RCS = MRI->getRegClass(Reg); 485 if (RCA && RCA->hasSuperClassEq(RCS)) 486 RCA = nullptr; 487 else if (RCB && RCB->hasSuperClassEq(RCS)) 488 RCB = nullptr; 489 if (RCA || RCB) { 490 if (RCA == nullptr) { 491 RCA = RCB; 492 RCB = nullptr; 493 } 494 495 unsigned NRegs = !!RCA + !!RCB; 496 if (RCA == RCB) 497 RCB = nullptr; 498 499 // Check we don't exceed register pressure at the destination. 500 const MachineBasicBlock &MBB = *UseInst.getParent(); 501 if (RCB == nullptr) { 502 if (registerPressureSetExceedsLimit(NRegs, RCA, MBB)) 503 return false; 504 } else if (registerPressureSetExceedsLimit(1, RCA, MBB) || 505 registerPressureSetExceedsLimit(1, RCB, MBB)) { 506 return false; 507 } 508 } 509 } 510 511 SinkInto.emplace_back(&UseInst, MaybeAM); 512 } 513 } 514 515 if (SinkInto.empty()) 516 return false; 517 518 // Now we know we can fold the instruction in all its users. 519 for (auto &[SinkDst, MaybeAM] : SinkInto) { 520 MachineInstr *New = nullptr; 521 LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into"; 522 SinkDst->dump()); 523 if (SinkDst->isCopy()) { 524 // TODO: After performing the sink-and-fold, the original instruction is 525 // deleted. Its value is still available (in a hard register), so if there 526 // are debug instructions which refer to the (now deleted) virtual 527 // register they could be updated to refer to the hard register, in 528 // principle. However, it's not clear how to do that, moreover in some 529 // cases the debug instructions may need to be replicated proportionally 530 // to the number of the COPY instructions replaced and in some extreme 531 // cases we can end up with quadratic increase in the number of debug 532 // instructions. 533 534 // Sink a copy of the instruction, replacing a COPY instruction. 535 MachineBasicBlock::iterator InsertPt = SinkDst->getIterator(); 536 Register DstReg = SinkDst->getOperand(0).getReg(); 537 TII->reMaterialize(*SinkDst->getParent(), InsertPt, DstReg, 0, MI, *TRI); 538 New = &*std::prev(InsertPt); 539 if (!New->getDebugLoc()) 540 New->setDebugLoc(SinkDst->getDebugLoc()); 541 542 // The operand registers of the "sunk" instruction have their live range 543 // extended and their kill flags may no longer be correct. Conservatively 544 // clear the kill flags. 545 if (UsedRegA) 546 MRI->clearKillFlags(UsedRegA); 547 if (UsedRegB) 548 MRI->clearKillFlags(UsedRegB); 549 } else { 550 // Fold instruction into the addressing mode of a memory instruction. 551 New = TII->emitLdStWithAddr(*SinkDst, MaybeAM); 552 553 // The registers of the addressing mode may have their live range extended 554 // and their kill flags may no longer be correct. Conservatively clear the 555 // kill flags. 556 if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual()) 557 MRI->clearKillFlags(R); 558 if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual()) 559 MRI->clearKillFlags(R); 560 } 561 LLVM_DEBUG(dbgs() << "yielding"; New->dump()); 562 // Clear the StoreInstrCache, since we may invalidate it by erasing. 563 if (SinkDst->mayStore() && !SinkDst->hasOrderedMemoryRef()) 564 StoreInstrCache.clear(); 565 SinkDst->eraseFromParent(); 566 } 567 568 // Collect operands that need to be cleaned up because the registers no longer 569 // exist (in COPYs and debug instructions). We cannot delete instructions or 570 // clear operands while traversing register uses. 571 SmallVector<MachineOperand *> Cleanup; 572 Worklist.push_back(DefReg); 573 while (!Worklist.empty()) { 574 Register Reg = Worklist.pop_back_val(); 575 for (MachineOperand &MO : MRI->use_operands(Reg)) { 576 MachineInstr *U = MO.getParent(); 577 assert((U->isCopy() || U->isDebugInstr()) && 578 "Only debug uses and copies must remain"); 579 if (U->isCopy()) 580 Worklist.push_back(U->getOperand(0).getReg()); 581 Cleanup.push_back(&MO); 582 } 583 } 584 585 // Delete the dead COPYs and clear operands in debug instructions 586 for (MachineOperand *MO : Cleanup) { 587 MachineInstr *I = MO->getParent(); 588 if (I->isCopy()) { 589 I->eraseFromParent(); 590 } else { 591 MO->setReg(0); 592 MO->setSubReg(0); 593 } 594 } 595 596 MI.eraseFromParent(); 597 return true; 598 } 599 600 /// AllUsesDominatedByBlock - Return true if all uses of the specified register 601 /// occur in blocks dominated by the specified block. If any use is in the 602 /// definition block, then return false since it is never legal to move def 603 /// after uses. 604 bool MachineSinking::AllUsesDominatedByBlock(Register Reg, 605 MachineBasicBlock *MBB, 606 MachineBasicBlock *DefMBB, 607 bool &BreakPHIEdge, 608 bool &LocalUse) const { 609 assert(Reg.isVirtual() && "Only makes sense for vregs"); 610 611 // Ignore debug uses because debug info doesn't affect the code. 612 if (MRI->use_nodbg_empty(Reg)) 613 return true; 614 615 // BreakPHIEdge is true if all the uses are in the successor MBB being sunken 616 // into and they are all PHI nodes. In this case, machine-sink must break 617 // the critical edge first. e.g. 618 // 619 // %bb.1: 620 // Predecessors according to CFG: %bb.0 621 // ... 622 // %def = DEC64_32r %x, implicit-def dead %eflags 623 // ... 624 // JE_4 <%bb.37>, implicit %eflags 625 // Successors according to CFG: %bb.37 %bb.2 626 // 627 // %bb.2: 628 // %p = PHI %y, %bb.0, %def, %bb.1 629 if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) { 630 MachineInstr *UseInst = MO.getParent(); 631 unsigned OpNo = MO.getOperandNo(); 632 MachineBasicBlock *UseBlock = UseInst->getParent(); 633 return UseBlock == MBB && UseInst->isPHI() && 634 UseInst->getOperand(OpNo + 1).getMBB() == DefMBB; 635 })) { 636 BreakPHIEdge = true; 637 return true; 638 } 639 640 for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { 641 // Determine the block of the use. 642 MachineInstr *UseInst = MO.getParent(); 643 unsigned OpNo = &MO - &UseInst->getOperand(0); 644 MachineBasicBlock *UseBlock = UseInst->getParent(); 645 if (UseInst->isPHI()) { 646 // PHI nodes use the operand in the predecessor block, not the block with 647 // the PHI. 648 UseBlock = UseInst->getOperand(OpNo+1).getMBB(); 649 } else if (UseBlock == DefMBB) { 650 LocalUse = true; 651 return false; 652 } 653 654 // Check that it dominates. 655 if (!DT->dominates(MBB, UseBlock)) 656 return false; 657 } 658 659 return true; 660 } 661 662 /// Return true if this machine instruction loads from global offset table or 663 /// constant pool. 664 static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { 665 assert(MI.mayLoad() && "Expected MI that loads!"); 666 667 // If we lost memory operands, conservatively assume that the instruction 668 // reads from everything.. 669 if (MI.memoperands_empty()) 670 return true; 671 672 for (MachineMemOperand *MemOp : MI.memoperands()) 673 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) 674 if (PSV->isGOT() || PSV->isConstantPool()) 675 return true; 676 677 return false; 678 } 679 680 void MachineSinking::FindCycleSinkCandidates( 681 MachineCycle *Cycle, MachineBasicBlock *BB, 682 SmallVectorImpl<MachineInstr *> &Candidates) { 683 for (auto &MI : *BB) { 684 LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); 685 if (!TII->shouldSink(MI)) { 686 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " 687 "target\n"); 688 continue; 689 } 690 if (!isCycleInvariant(Cycle, MI)) { 691 LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n"); 692 continue; 693 } 694 bool DontMoveAcrossStore = true; 695 if (!MI.isSafeToMove(DontMoveAcrossStore)) { 696 LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n"); 697 continue; 698 } 699 if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { 700 LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n"); 701 continue; 702 } 703 if (MI.isConvergent()) 704 continue; 705 706 const MachineOperand &MO = MI.getOperand(0); 707 if (!MO.isReg() || !MO.getReg() || !MO.isDef()) 708 continue; 709 if (!MRI->hasOneDef(MO.getReg())) 710 continue; 711 712 LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n"); 713 Candidates.push_back(&MI); 714 } 715 } 716 717 bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { 718 if (skipFunction(MF.getFunction())) 719 return false; 720 721 LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n"); 722 723 STI = &MF.getSubtarget(); 724 TII = STI->getInstrInfo(); 725 TRI = STI->getRegisterInfo(); 726 MRI = &MF.getRegInfo(); 727 DT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 728 PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree(); 729 CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo(); 730 PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 731 MBFI = UseBlockFreqInfo 732 ? &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI() 733 : nullptr; 734 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); 735 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 736 RegClassInfo.runOnMachineFunction(MF); 737 TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); 738 EnableSinkAndFold = PassConfig->getEnableSinkAndFold(); 739 740 bool EverMadeChange = false; 741 742 while (true) { 743 bool MadeChange = false; 744 745 // Process all basic blocks. 746 CEBCandidates.clear(); 747 CEMergeCandidates.clear(); 748 ToSplit.clear(); 749 for (auto &MBB: MF) 750 MadeChange |= ProcessBlock(MBB); 751 752 // If we have anything we marked as toSplit, split it now. 753 for (const auto &Pair : ToSplit) { 754 auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this); 755 if (NewSucc != nullptr) { 756 LLVM_DEBUG(dbgs() << " *** Splitting critical edge: " 757 << printMBBReference(*Pair.first) << " -- " 758 << printMBBReference(*NewSucc) << " -- " 759 << printMBBReference(*Pair.second) << '\n'); 760 if (MBFI) 761 MBFI->onEdgeSplit(*Pair.first, *NewSucc, *MBPI); 762 763 MadeChange = true; 764 ++NumSplit; 765 CI->splitCriticalEdge(Pair.first, Pair.second, NewSucc); 766 } else 767 LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n"); 768 } 769 // If this iteration over the code changed anything, keep iterating. 770 if (!MadeChange) break; 771 EverMadeChange = true; 772 } 773 774 if (SinkInstsIntoCycle) { 775 SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles()); 776 for (auto *Cycle : Cycles) { 777 MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); 778 if (!Preheader) { 779 LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); 780 continue; 781 } 782 SmallVector<MachineInstr *, 8> Candidates; 783 FindCycleSinkCandidates(Cycle, Preheader, Candidates); 784 785 // Walk the candidates in reverse order so that we start with the use 786 // of a def-use chain, if there is any. 787 // TODO: Sort the candidates using a cost-model. 788 unsigned i = 0; 789 for (MachineInstr *I : llvm::reverse(Candidates)) { 790 if (i++ == SinkIntoCycleLimit) { 791 LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to " 792 "be analysed."); 793 break; 794 } 795 796 if (!SinkIntoCycle(Cycle, *I)) 797 break; 798 EverMadeChange = true; 799 ++NumCycleSunk; 800 } 801 } 802 } 803 804 HasStoreCache.clear(); 805 StoreInstrCache.clear(); 806 807 // Now clear any kill flags for recorded registers. 808 for (auto I : RegsToClearKillFlags) 809 MRI->clearKillFlags(I); 810 RegsToClearKillFlags.clear(); 811 812 return EverMadeChange; 813 } 814 815 bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { 816 if ((!EnableSinkAndFold && MBB.succ_size() <= 1) || MBB.empty()) 817 return false; 818 819 // Don't bother sinking code out of unreachable blocks. In addition to being 820 // unprofitable, it can also lead to infinite looping, because in an 821 // unreachable cycle there may be nowhere to stop. 822 if (!DT->isReachableFromEntry(&MBB)) return false; 823 824 bool MadeChange = false; 825 826 // Cache all successors, sorted by frequency info and cycle depth. 827 AllSuccsCache AllSuccessors; 828 829 // Walk the basic block bottom-up. Remember if we saw a store. 830 MachineBasicBlock::iterator I = MBB.end(); 831 --I; 832 bool ProcessedBegin, SawStore = false; 833 do { 834 MachineInstr &MI = *I; // The instruction to sink. 835 836 // Predecrement I (if it's not begin) so that it isn't invalidated by 837 // sinking. 838 ProcessedBegin = I == MBB.begin(); 839 if (!ProcessedBegin) 840 --I; 841 842 if (MI.isDebugOrPseudoInstr() || MI.isFakeUse()) { 843 if (MI.isDebugValue()) 844 ProcessDbgInst(MI); 845 continue; 846 } 847 848 if (EnableSinkAndFold && PerformSinkAndFold(MI, &MBB)) { 849 MadeChange = true; 850 continue; 851 } 852 853 // Can't sink anything out of a block that has less than two successors. 854 if (MBB.succ_size() <= 1) 855 continue; 856 857 if (PerformTrivialForwardCoalescing(MI, &MBB)) { 858 MadeChange = true; 859 continue; 860 } 861 862 if (SinkInstruction(MI, SawStore, AllSuccessors)) { 863 ++NumSunk; 864 MadeChange = true; 865 } 866 867 // If we just processed the first instruction in the block, we're done. 868 } while (!ProcessedBegin); 869 870 SeenDbgUsers.clear(); 871 SeenDbgVars.clear(); 872 // recalculate the bb register pressure after sinking one BB. 873 CachedRegisterPressure.clear(); 874 return MadeChange; 875 } 876 877 void MachineSinking::ProcessDbgInst(MachineInstr &MI) { 878 // When we see DBG_VALUEs for registers, record any vreg it reads, so that 879 // we know what to sink if the vreg def sinks. 880 assert(MI.isDebugValue() && "Expected DBG_VALUE for processing"); 881 882 DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), 883 MI.getDebugLoc()->getInlinedAt()); 884 bool SeenBefore = SeenDbgVars.contains(Var); 885 886 for (MachineOperand &MO : MI.debug_operands()) { 887 if (MO.isReg() && MO.getReg().isVirtual()) 888 SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore)); 889 } 890 891 // Record the variable for any DBG_VALUE, to avoid re-ordering any of them. 892 SeenDbgVars.insert(Var); 893 } 894 895 bool MachineSinking::isWorthBreakingCriticalEdge( 896 MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To, 897 MachineBasicBlock *&DeferredFromBlock) { 898 // FIXME: Need much better heuristics. 899 900 // If the pass has already considered breaking this edge (during this pass 901 // through the function), then let's go ahead and break it. This means 902 // sinking multiple "cheap" instructions into the same block. 903 if (!CEBCandidates.insert(std::make_pair(From, To)).second) 904 return true; 905 906 if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI)) 907 return true; 908 909 // Check and record the register and the destination block we want to sink 910 // into. Note that we want to do the following before the next check on branch 911 // probability. Because we want to record the initial candidate even if it's 912 // on hot edge, so that other candidates that might not on hot edges can be 913 // sinked as well. 914 for (const auto &MO : MI.all_defs()) { 915 Register Reg = MO.getReg(); 916 if (!Reg) 917 continue; 918 Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(Reg, MRI) : Reg; 919 auto Key = std::make_pair(SrcReg, To); 920 auto Res = CEMergeCandidates.try_emplace(Key, From); 921 // We wanted to sink the same register into the same block, consider it to 922 // be profitable. 923 if (!Res.second) { 924 // Return the source block that was previously held off. 925 DeferredFromBlock = Res.first->second; 926 return true; 927 } 928 } 929 930 if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <= 931 BranchProbability(SplitEdgeProbabilityThreshold, 100)) 932 return true; 933 934 // MI is cheap, we probably don't want to break the critical edge for it. 935 // However, if this would allow some definitions of its source operands 936 // to be sunk then it's probably worth it. 937 for (const MachineOperand &MO : MI.all_uses()) { 938 Register Reg = MO.getReg(); 939 if (Reg == 0) 940 continue; 941 942 // We don't move live definitions of physical registers, 943 // so sinking their uses won't enable any opportunities. 944 if (Reg.isPhysical()) 945 continue; 946 947 // If this instruction is the only user of a virtual register, 948 // check if breaking the edge will enable sinking 949 // both this instruction and the defining instruction. 950 if (MRI->hasOneNonDBGUse(Reg)) { 951 // If the definition resides in same MBB, 952 // claim it's likely we can sink these together. 953 // If definition resides elsewhere, we aren't 954 // blocking it from being sunk so don't break the edge. 955 MachineInstr *DefMI = MRI->getVRegDef(Reg); 956 if (DefMI->getParent() == MI.getParent()) 957 return true; 958 } 959 } 960 961 return false; 962 } 963 964 bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI, 965 MachineBasicBlock *FromBB, 966 MachineBasicBlock *ToBB, 967 bool BreakPHIEdge) { 968 // Avoid breaking back edge. From == To means backedge for single BB cycle. 969 if (!SplitEdges || FromBB == ToBB || !FromBB->isSuccessor(ToBB)) 970 return false; 971 972 MachineCycle *FromCycle = CI->getCycle(FromBB); 973 MachineCycle *ToCycle = CI->getCycle(ToBB); 974 975 // Check for backedges of more "complex" cycles. 976 if (FromCycle == ToCycle && FromCycle && 977 (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB)) 978 return false; 979 980 // It's not always legal to break critical edges and sink the computation 981 // to the edge. 982 // 983 // %bb.1: 984 // v1024 985 // Beq %bb.3 986 // <fallthrough> 987 // %bb.2: 988 // ... no uses of v1024 989 // <fallthrough> 990 // %bb.3: 991 // ... 992 // = v1024 993 // 994 // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted: 995 // 996 // %bb.1: 997 // ... 998 // Bne %bb.2 999 // %bb.4: 1000 // v1024 = 1001 // B %bb.3 1002 // %bb.2: 1003 // ... no uses of v1024 1004 // <fallthrough> 1005 // %bb.3: 1006 // ... 1007 // = v1024 1008 // 1009 // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3 1010 // flow. We need to ensure the new basic block where the computation is 1011 // sunk to dominates all the uses. 1012 // It's only legal to break critical edge and sink the computation to the 1013 // new block if all the predecessors of "To", except for "From", are 1014 // not dominated by "From". Given SSA property, this means these 1015 // predecessors are dominated by "To". 1016 // 1017 // There is no need to do this check if all the uses are PHI nodes. PHI 1018 // sources are only defined on the specific predecessor edges. 1019 if (!BreakPHIEdge) { 1020 for (MachineBasicBlock *Pred : ToBB->predecessors()) 1021 if (Pred != FromBB && !DT->dominates(ToBB, Pred)) 1022 return false; 1023 } 1024 1025 return true; 1026 } 1027 1028 bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, 1029 MachineBasicBlock *FromBB, 1030 MachineBasicBlock *ToBB, 1031 bool BreakPHIEdge) { 1032 bool Status = false; 1033 MachineBasicBlock *DeferredFromBB = nullptr; 1034 if (isWorthBreakingCriticalEdge(MI, FromBB, ToBB, DeferredFromBB)) { 1035 // If there is a DeferredFromBB, we consider FromBB only if _both_ 1036 // of them are legal to split. 1037 if ((!DeferredFromBB || 1038 ToSplit.count(std::make_pair(DeferredFromBB, ToBB)) || 1039 isLegalToBreakCriticalEdge(MI, DeferredFromBB, ToBB, BreakPHIEdge)) && 1040 isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) { 1041 ToSplit.insert(std::make_pair(FromBB, ToBB)); 1042 if (DeferredFromBB) 1043 ToSplit.insert(std::make_pair(DeferredFromBB, ToBB)); 1044 Status = true; 1045 } 1046 } 1047 1048 return Status; 1049 } 1050 1051 std::vector<unsigned> & 1052 MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) { 1053 // Currently to save compiling time, MBB's register pressure will not change 1054 // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's 1055 // register pressure is changed after sinking any instructions into it. 1056 // FIXME: need a accurate and cheap register pressure estiminate model here. 1057 auto RP = CachedRegisterPressure.find(&MBB); 1058 if (RP != CachedRegisterPressure.end()) 1059 return RP->second; 1060 1061 RegionPressure Pressure; 1062 RegPressureTracker RPTracker(Pressure); 1063 1064 // Initialize the register pressure tracker. 1065 RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(), 1066 /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); 1067 1068 for (MachineBasicBlock::const_iterator MII = MBB.instr_end(), 1069 MIE = MBB.instr_begin(); 1070 MII != MIE; --MII) { 1071 const MachineInstr &MI = *std::prev(MII); 1072 if (MI.isDebugInstr() || MI.isPseudoProbe()) 1073 continue; 1074 RegisterOperands RegOpers; 1075 RegOpers.collect(MI, *TRI, *MRI, false, false); 1076 RPTracker.recedeSkipDebugValues(); 1077 assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!"); 1078 RPTracker.recede(RegOpers); 1079 } 1080 1081 RPTracker.closeRegion(); 1082 auto It = CachedRegisterPressure.insert( 1083 std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure)); 1084 return It.first->second; 1085 } 1086 1087 bool MachineSinking::registerPressureSetExceedsLimit( 1088 unsigned NRegs, const TargetRegisterClass *RC, 1089 const MachineBasicBlock &MBB) { 1090 unsigned Weight = NRegs * TRI->getRegClassWeight(RC).RegWeight; 1091 const int *PS = TRI->getRegClassPressureSets(RC); 1092 std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB); 1093 for (; *PS != -1; PS++) 1094 if (Weight + BBRegisterPressure[*PS] >= 1095 TRI->getRegPressureSetLimit(*MBB.getParent(), *PS)) 1096 return true; 1097 return false; 1098 } 1099 1100 /// isProfitableToSinkTo - Return true if it is profitable to sink MI. 1101 bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, 1102 MachineBasicBlock *MBB, 1103 MachineBasicBlock *SuccToSinkTo, 1104 AllSuccsCache &AllSuccessors) { 1105 assert (SuccToSinkTo && "Invalid SinkTo Candidate BB"); 1106 1107 if (MBB == SuccToSinkTo) 1108 return false; 1109 1110 // It is profitable if SuccToSinkTo does not post dominate current block. 1111 if (!PDT->dominates(SuccToSinkTo, MBB)) 1112 return true; 1113 1114 // It is profitable to sink an instruction from a deeper cycle to a shallower 1115 // cycle, even if the latter post-dominates the former (PR21115). 1116 if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo)) 1117 return true; 1118 1119 // Check if only use in post dominated block is PHI instruction. 1120 bool NonPHIUse = false; 1121 for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) { 1122 MachineBasicBlock *UseBlock = UseInst.getParent(); 1123 if (UseBlock == SuccToSinkTo && !UseInst.isPHI()) 1124 NonPHIUse = true; 1125 } 1126 if (!NonPHIUse) 1127 return true; 1128 1129 // If SuccToSinkTo post dominates then also it may be profitable if MI 1130 // can further profitably sinked into another block in next round. 1131 bool BreakPHIEdge = false; 1132 // FIXME - If finding successor is compile time expensive then cache results. 1133 if (MachineBasicBlock *MBB2 = 1134 FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) 1135 return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); 1136 1137 MachineCycle *MCycle = CI->getCycle(MBB); 1138 1139 // If the instruction is not inside a cycle, it is not profitable to sink MI to 1140 // a post dominate block SuccToSinkTo. 1141 if (!MCycle) 1142 return false; 1143 1144 // If this instruction is inside a Cycle and sinking this instruction can make 1145 // more registers live range shorten, it is still prifitable. 1146 for (const MachineOperand &MO : MI.operands()) { 1147 // Ignore non-register operands. 1148 if (!MO.isReg()) 1149 continue; 1150 Register Reg = MO.getReg(); 1151 if (Reg == 0) 1152 continue; 1153 1154 if (Reg.isPhysical()) { 1155 // Don't handle non-constant and non-ignorable physical register uses. 1156 if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO)) 1157 return false; 1158 continue; 1159 } 1160 1161 // Users for the defs are all dominated by SuccToSinkTo. 1162 if (MO.isDef()) { 1163 // This def register's live range is shortened after sinking. 1164 bool LocalUse = false; 1165 if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, 1166 LocalUse)) 1167 return false; 1168 } else { 1169 MachineInstr *DefMI = MRI->getVRegDef(Reg); 1170 if (!DefMI) 1171 continue; 1172 MachineCycle *Cycle = CI->getCycle(DefMI->getParent()); 1173 // DefMI is defined outside of cycle. There should be no live range 1174 // impact for this operand. Defination outside of cycle means: 1175 // 1: defination is outside of cycle. 1176 // 2: defination is in this cycle, but it is a PHI in the cycle header. 1177 if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() && 1178 Cycle->getHeader() == DefMI->getParent())) 1179 continue; 1180 // The DefMI is defined inside the cycle. 1181 // If sinking this operand makes some register pressure set exceed limit, 1182 // it is not profitable. 1183 if (registerPressureSetExceedsLimit(1, MRI->getRegClass(Reg), 1184 *SuccToSinkTo)) { 1185 LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable."); 1186 return false; 1187 } 1188 } 1189 } 1190 1191 // If MI is in cycle and all its operands are alive across the whole cycle or 1192 // if no operand sinking make register pressure set exceed limit, it is 1193 // profitable to sink MI. 1194 return true; 1195 } 1196 1197 /// Get the sorted sequence of successors for this MachineBasicBlock, possibly 1198 /// computing it if it was not already cached. 1199 SmallVector<MachineBasicBlock *, 4> & 1200 MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, 1201 AllSuccsCache &AllSuccessors) const { 1202 // Do we have the sorted successors in cache ? 1203 auto Succs = AllSuccessors.find(MBB); 1204 if (Succs != AllSuccessors.end()) 1205 return Succs->second; 1206 1207 SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors()); 1208 1209 // Handle cases where sinking can happen but where the sink point isn't a 1210 // successor. For example: 1211 // 1212 // x = computation 1213 // if () {} else {} 1214 // use x 1215 // 1216 for (MachineDomTreeNode *DTChild : DT->getNode(MBB)->children()) { 1217 // DomTree children of MBB that have MBB as immediate dominator are added. 1218 if (DTChild->getIDom()->getBlock() == MI.getParent() && 1219 // Skip MBBs already added to the AllSuccs vector above. 1220 !MBB->isSuccessor(DTChild->getBlock())) 1221 AllSuccs.push_back(DTChild->getBlock()); 1222 } 1223 1224 // Sort Successors according to their cycle depth or block frequency info. 1225 llvm::stable_sort( 1226 AllSuccs, [&](const MachineBasicBlock *L, const MachineBasicBlock *R) { 1227 uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; 1228 uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; 1229 if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI) || !LHSFreq || !RHSFreq) 1230 return CI->getCycleDepth(L) < CI->getCycleDepth(R); 1231 return LHSFreq < RHSFreq; 1232 }); 1233 1234 auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); 1235 1236 return it.first->second; 1237 } 1238 1239 /// FindSuccToSinkTo - Find a successor to sink this instruction to. 1240 MachineBasicBlock * 1241 MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, 1242 bool &BreakPHIEdge, 1243 AllSuccsCache &AllSuccessors) { 1244 assert (MBB && "Invalid MachineBasicBlock!"); 1245 1246 // loop over all the operands of the specified instruction. If there is 1247 // anything we can't handle, bail out. 1248 1249 // SuccToSinkTo - This is the successor to sink this instruction to, once we 1250 // decide. 1251 MachineBasicBlock *SuccToSinkTo = nullptr; 1252 for (const MachineOperand &MO : MI.operands()) { 1253 if (!MO.isReg()) continue; // Ignore non-register operands. 1254 1255 Register Reg = MO.getReg(); 1256 if (Reg == 0) continue; 1257 1258 if (Reg.isPhysical()) { 1259 if (MO.isUse()) { 1260 // If the physreg has no defs anywhere, it's just an ambient register 1261 // and we can freely move its uses. Alternatively, if it's allocatable, 1262 // it could get allocated to something with a def during allocation. 1263 if (!MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO)) 1264 return nullptr; 1265 } else if (!MO.isDead()) { 1266 // A def that isn't dead. We can't move it. 1267 return nullptr; 1268 } 1269 } else { 1270 // Virtual register uses are always safe to sink. 1271 if (MO.isUse()) continue; 1272 1273 // If it's not safe to move defs of the register class, then abort. 1274 if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) 1275 return nullptr; 1276 1277 // Virtual register defs can only be sunk if all their uses are in blocks 1278 // dominated by one of the successors. 1279 if (SuccToSinkTo) { 1280 // If a previous operand picked a block to sink to, then this operand 1281 // must be sinkable to the same block. 1282 bool LocalUse = false; 1283 if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, 1284 BreakPHIEdge, LocalUse)) 1285 return nullptr; 1286 1287 continue; 1288 } 1289 1290 // Otherwise, we should look at all the successors and decide which one 1291 // we should sink to. If we have reliable block frequency information 1292 // (frequency != 0) available, give successors with smaller frequencies 1293 // higher priority, otherwise prioritize smaller cycle depths. 1294 for (MachineBasicBlock *SuccBlock : 1295 GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { 1296 bool LocalUse = false; 1297 if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, 1298 BreakPHIEdge, LocalUse)) { 1299 SuccToSinkTo = SuccBlock; 1300 break; 1301 } 1302 if (LocalUse) 1303 // Def is used locally, it's never safe to move this def. 1304 return nullptr; 1305 } 1306 1307 // If we couldn't find a block to sink to, ignore this instruction. 1308 if (!SuccToSinkTo) 1309 return nullptr; 1310 if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors)) 1311 return nullptr; 1312 } 1313 } 1314 1315 // It is not possible to sink an instruction into its own block. This can 1316 // happen with cycles. 1317 if (MBB == SuccToSinkTo) 1318 return nullptr; 1319 1320 // It's not safe to sink instructions to EH landing pad. Control flow into 1321 // landing pad is implicitly defined. 1322 if (SuccToSinkTo && SuccToSinkTo->isEHPad()) 1323 return nullptr; 1324 1325 // It ought to be okay to sink instructions into an INLINEASM_BR target, but 1326 // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in 1327 // the source block (which this code does not yet do). So for now, forbid 1328 // doing so. 1329 if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget()) 1330 return nullptr; 1331 1332 if (SuccToSinkTo && !TII->isSafeToSink(MI, SuccToSinkTo, CI)) 1333 return nullptr; 1334 1335 return SuccToSinkTo; 1336 } 1337 1338 /// Return true if MI is likely to be usable as a memory operation by the 1339 /// implicit null check optimization. 1340 /// 1341 /// This is a "best effort" heuristic, and should not be relied upon for 1342 /// correctness. This returning true does not guarantee that the implicit null 1343 /// check optimization is legal over MI, and this returning false does not 1344 /// guarantee MI cannot possibly be used to do a null check. 1345 static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, 1346 const TargetInstrInfo *TII, 1347 const TargetRegisterInfo *TRI) { 1348 using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate; 1349 1350 auto *MBB = MI.getParent(); 1351 if (MBB->pred_size() != 1) 1352 return false; 1353 1354 auto *PredMBB = *MBB->pred_begin(); 1355 auto *PredBB = PredMBB->getBasicBlock(); 1356 1357 // Frontends that don't use implicit null checks have no reason to emit 1358 // branches with make.implicit metadata, and this function should always 1359 // return false for them. 1360 if (!PredBB || 1361 !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit)) 1362 return false; 1363 1364 const MachineOperand *BaseOp; 1365 int64_t Offset; 1366 bool OffsetIsScalable; 1367 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) 1368 return false; 1369 1370 if (!BaseOp->isReg()) 1371 return false; 1372 1373 if (!(MI.mayLoad() && !MI.isPredicable())) 1374 return false; 1375 1376 MachineBranchPredicate MBP; 1377 if (TII->analyzeBranchPredicate(*PredMBB, MBP, false)) 1378 return false; 1379 1380 return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 && 1381 (MBP.Predicate == MachineBranchPredicate::PRED_NE || 1382 MBP.Predicate == MachineBranchPredicate::PRED_EQ) && 1383 MBP.LHS.getReg() == BaseOp->getReg(); 1384 } 1385 1386 /// If the sunk instruction is a copy, try to forward the copy instead of 1387 /// leaving an 'undef' DBG_VALUE in the original location. Don't do this if 1388 /// there's any subregister weirdness involved. Returns true if copy 1389 /// propagation occurred. 1390 static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI, 1391 Register Reg) { 1392 const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo(); 1393 const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo(); 1394 1395 // Copy DBG_VALUE operand and set the original to undef. We then check to 1396 // see whether this is something that can be copy-forwarded. If it isn't, 1397 // continue around the loop. 1398 1399 const MachineOperand *SrcMO = nullptr, *DstMO = nullptr; 1400 auto CopyOperands = TII.isCopyInstr(SinkInst); 1401 if (!CopyOperands) 1402 return false; 1403 SrcMO = CopyOperands->Source; 1404 DstMO = CopyOperands->Destination; 1405 1406 // Check validity of forwarding this copy. 1407 bool PostRA = MRI.getNumVirtRegs() == 0; 1408 1409 // Trying to forward between physical and virtual registers is too hard. 1410 if (Reg.isVirtual() != SrcMO->getReg().isVirtual()) 1411 return false; 1412 1413 // Only try virtual register copy-forwarding before regalloc, and physical 1414 // register copy-forwarding after regalloc. 1415 bool arePhysRegs = !Reg.isVirtual(); 1416 if (arePhysRegs != PostRA) 1417 return false; 1418 1419 // Pre-regalloc, only forward if all subregisters agree (or there are no 1420 // subregs at all). More analysis might recover some forwardable copies. 1421 if (!PostRA) 1422 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) 1423 if (DbgMO.getSubReg() != SrcMO->getSubReg() || 1424 DbgMO.getSubReg() != DstMO->getSubReg()) 1425 return false; 1426 1427 // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register 1428 // of this copy. Only forward the copy if the DBG_VALUE operand exactly 1429 // matches the copy destination. 1430 if (PostRA && Reg != DstMO->getReg()) 1431 return false; 1432 1433 for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) { 1434 DbgMO.setReg(SrcMO->getReg()); 1435 DbgMO.setSubReg(SrcMO->getSubReg()); 1436 } 1437 return true; 1438 } 1439 1440 using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>; 1441 /// Sink an instruction and its associated debug instructions. 1442 static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, 1443 MachineBasicBlock::iterator InsertPos, 1444 ArrayRef<MIRegs> DbgValuesToSink) { 1445 // If we cannot find a location to use (merge with), then we erase the debug 1446 // location to prevent debug-info driven tools from potentially reporting 1447 // wrong location information. 1448 if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end()) 1449 MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(), 1450 InsertPos->getDebugLoc())); 1451 else 1452 MI.setDebugLoc(DebugLoc()); 1453 1454 // Move the instruction. 1455 MachineBasicBlock *ParentBlock = MI.getParent(); 1456 SuccToSinkTo.splice(InsertPos, ParentBlock, MI, 1457 ++MachineBasicBlock::iterator(MI)); 1458 1459 // Sink a copy of debug users to the insert position. Mark the original 1460 // DBG_VALUE location as 'undef', indicating that any earlier variable 1461 // location should be terminated as we've optimised away the value at this 1462 // point. 1463 for (const auto &DbgValueToSink : DbgValuesToSink) { 1464 MachineInstr *DbgMI = DbgValueToSink.first; 1465 MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI); 1466 SuccToSinkTo.insert(InsertPos, NewDbgMI); 1467 1468 bool PropagatedAllSunkOps = true; 1469 for (unsigned Reg : DbgValueToSink.second) { 1470 if (DbgMI->hasDebugOperandForReg(Reg)) { 1471 if (!attemptDebugCopyProp(MI, *DbgMI, Reg)) { 1472 PropagatedAllSunkOps = false; 1473 break; 1474 } 1475 } 1476 } 1477 if (!PropagatedAllSunkOps) 1478 DbgMI->setDebugValueUndef(); 1479 } 1480 } 1481 1482 /// hasStoreBetween - check if there is store betweeen straight line blocks From 1483 /// and To. 1484 bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, 1485 MachineBasicBlock *To, MachineInstr &MI) { 1486 // Make sure From and To are in straight line which means From dominates To 1487 // and To post dominates From. 1488 if (!DT->dominates(From, To) || !PDT->dominates(To, From)) 1489 return true; 1490 1491 auto BlockPair = std::make_pair(From, To); 1492 1493 // Does these two blocks pair be queried before and have a definite cached 1494 // result? 1495 if (auto It = HasStoreCache.find(BlockPair); It != HasStoreCache.end()) 1496 return It->second; 1497 1498 if (auto It = StoreInstrCache.find(BlockPair); It != StoreInstrCache.end()) 1499 return llvm::any_of(It->second, [&](MachineInstr *I) { 1500 return I->mayAlias(AA, MI, false); 1501 }); 1502 1503 bool SawStore = false; 1504 bool HasAliasedStore = false; 1505 DenseSet<MachineBasicBlock *> HandledBlocks; 1506 DenseSet<MachineBasicBlock *> HandledDomBlocks; 1507 // Go through all reachable blocks from From. 1508 for (MachineBasicBlock *BB : depth_first(From)) { 1509 // We insert the instruction at the start of block To, so no need to worry 1510 // about stores inside To. 1511 // Store in block From should be already considered when just enter function 1512 // SinkInstruction. 1513 if (BB == To || BB == From) 1514 continue; 1515 1516 // We already handle this BB in previous iteration. 1517 if (HandledBlocks.count(BB)) 1518 continue; 1519 1520 HandledBlocks.insert(BB); 1521 // To post dominates BB, it must be a path from block From. 1522 if (PDT->dominates(To, BB)) { 1523 if (!HandledDomBlocks.count(BB)) 1524 HandledDomBlocks.insert(BB); 1525 1526 // If this BB is too big or the block number in straight line between From 1527 // and To is too big, stop searching to save compiling time. 1528 if (BB->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold) || 1529 HandledDomBlocks.size() > SinkLoadBlocksThreshold) { 1530 for (auto *DomBB : HandledDomBlocks) { 1531 if (DomBB != BB && DT->dominates(DomBB, BB)) 1532 HasStoreCache[std::make_pair(DomBB, To)] = true; 1533 else if(DomBB != BB && DT->dominates(BB, DomBB)) 1534 HasStoreCache[std::make_pair(From, DomBB)] = true; 1535 } 1536 HasStoreCache[BlockPair] = true; 1537 return true; 1538 } 1539 1540 for (MachineInstr &I : *BB) { 1541 // Treat as alias conservatively for a call or an ordered memory 1542 // operation. 1543 if (I.isCall() || I.hasOrderedMemoryRef()) { 1544 for (auto *DomBB : HandledDomBlocks) { 1545 if (DomBB != BB && DT->dominates(DomBB, BB)) 1546 HasStoreCache[std::make_pair(DomBB, To)] = true; 1547 else if(DomBB != BB && DT->dominates(BB, DomBB)) 1548 HasStoreCache[std::make_pair(From, DomBB)] = true; 1549 } 1550 HasStoreCache[BlockPair] = true; 1551 return true; 1552 } 1553 1554 if (I.mayStore()) { 1555 SawStore = true; 1556 // We still have chance to sink MI if all stores between are not 1557 // aliased to MI. 1558 // Cache all store instructions, so that we don't need to go through 1559 // all From reachable blocks for next load instruction. 1560 if (I.mayAlias(AA, MI, false)) 1561 HasAliasedStore = true; 1562 StoreInstrCache[BlockPair].push_back(&I); 1563 } 1564 } 1565 } 1566 } 1567 // If there is no store at all, cache the result. 1568 if (!SawStore) 1569 HasStoreCache[BlockPair] = false; 1570 return HasAliasedStore; 1571 } 1572 1573 /// Sink instructions into cycles if profitable. This especially tries to 1574 /// prevent register spills caused by register pressure if there is little to no 1575 /// overhead moving instructions into cycles. 1576 bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) { 1577 LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I); 1578 MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); 1579 assert(Preheader && "Cycle sink needs a preheader block"); 1580 MachineBasicBlock *SinkBlock = nullptr; 1581 bool CanSink = true; 1582 const MachineOperand &MO = I.getOperand(0); 1583 1584 for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { 1585 LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI); 1586 if (!Cycle->contains(MI.getParent())) { 1587 LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n"); 1588 CanSink = false; 1589 break; 1590 } 1591 1592 // FIXME: Come up with a proper cost model that estimates whether sinking 1593 // the instruction (and thus possibly executing it on every cycle 1594 // iteration) is more expensive than a register. 1595 // For now assumes that copies are cheap and thus almost always worth it. 1596 if (!MI.isCopy()) { 1597 LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n"); 1598 CanSink = false; 1599 break; 1600 } 1601 if (!SinkBlock) { 1602 SinkBlock = MI.getParent(); 1603 LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: " 1604 << printMBBReference(*SinkBlock) << "\n"); 1605 continue; 1606 } 1607 SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); 1608 if (!SinkBlock) { 1609 LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n"); 1610 CanSink = false; 1611 break; 1612 } 1613 LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " << 1614 printMBBReference(*SinkBlock) << "\n"); 1615 } 1616 1617 if (!CanSink) { 1618 LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n"); 1619 return false; 1620 } 1621 if (!SinkBlock) { 1622 LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n"); 1623 return false; 1624 } 1625 if (SinkBlock == Preheader) { 1626 LLVM_DEBUG( 1627 dbgs() << "CycleSink: Not sinking, sink block is the preheader\n"); 1628 return false; 1629 } 1630 if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) { 1631 LLVM_DEBUG( 1632 dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n"); 1633 return false; 1634 } 1635 1636 LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n"); 1637 SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, 1638 I); 1639 1640 // Conservatively clear any kill flags on uses of sunk instruction 1641 for (MachineOperand &MO : I.operands()) { 1642 if (MO.isReg() && MO.readsReg()) 1643 RegsToClearKillFlags.insert(MO.getReg()); 1644 } 1645 1646 // The instruction is moved from its basic block, so do not retain the 1647 // debug information. 1648 assert(!I.isDebugInstr() && "Should not sink debug inst"); 1649 I.setDebugLoc(DebugLoc()); 1650 return true; 1651 } 1652 1653 /// SinkInstruction - Determine whether it is safe to sink the specified machine 1654 /// instruction out of its current block into a successor. 1655 bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, 1656 AllSuccsCache &AllSuccessors) { 1657 // Don't sink instructions that the target prefers not to sink. 1658 if (!TII->shouldSink(MI)) 1659 return false; 1660 1661 // Check if it's safe to move the instruction. 1662 if (!MI.isSafeToMove(SawStore)) 1663 return false; 1664 1665 // Convergent operations may not be made control-dependent on additional 1666 // values. 1667 if (MI.isConvergent()) 1668 return false; 1669 1670 // Don't break implicit null checks. This is a performance heuristic, and not 1671 // required for correctness. 1672 if (SinkingPreventsImplicitNullCheck(MI, TII, TRI)) 1673 return false; 1674 1675 // FIXME: This should include support for sinking instructions within the 1676 // block they are currently in to shorten the live ranges. We often get 1677 // instructions sunk into the top of a large block, but it would be better to 1678 // also sink them down before their first use in the block. This xform has to 1679 // be careful not to *increase* register pressure though, e.g. sinking 1680 // "x = y + z" down if it kills y and z would increase the live ranges of y 1681 // and z and only shrink the live range of x. 1682 1683 bool BreakPHIEdge = false; 1684 MachineBasicBlock *ParentBlock = MI.getParent(); 1685 MachineBasicBlock *SuccToSinkTo = 1686 FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors); 1687 1688 // If there are no outputs, it must have side-effects. 1689 if (!SuccToSinkTo) 1690 return false; 1691 1692 // If the instruction to move defines a dead physical register which is live 1693 // when leaving the basic block, don't move it because it could turn into a 1694 // "zombie" define of that preg. E.g., EFLAGS. 1695 for (const MachineOperand &MO : MI.all_defs()) { 1696 Register Reg = MO.getReg(); 1697 if (Reg == 0 || !Reg.isPhysical()) 1698 continue; 1699 if (SuccToSinkTo->isLiveIn(Reg)) 1700 return false; 1701 } 1702 1703 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo); 1704 1705 // If the block has multiple predecessors, this is a critical edge. 1706 // Decide if we can sink along it or need to break the edge. 1707 if (SuccToSinkTo->pred_size() > 1) { 1708 // We cannot sink a load across a critical edge - there may be stores in 1709 // other code paths. 1710 bool TryBreak = false; 1711 bool Store = 1712 MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true; 1713 if (!MI.isSafeToMove(Store)) { 1714 LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); 1715 TryBreak = true; 1716 } 1717 1718 // We don't want to sink across a critical edge if we don't dominate the 1719 // successor. We could be introducing calculations to new code paths. 1720 if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { 1721 LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); 1722 TryBreak = true; 1723 } 1724 1725 // Don't sink instructions into a cycle. 1726 if (!TryBreak && CI->getCycle(SuccToSinkTo) && 1727 (!CI->getCycle(SuccToSinkTo)->isReducible() || 1728 CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) { 1729 LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n"); 1730 TryBreak = true; 1731 } 1732 1733 // Otherwise we are OK with sinking along a critical edge. 1734 if (!TryBreak) 1735 LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n"); 1736 else { 1737 // Mark this edge as to be split. 1738 // If the edge can actually be split, the next iteration of the main loop 1739 // will sink MI in the newly created block. 1740 bool Status = 1741 PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); 1742 if (!Status) 1743 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " 1744 "break critical edge\n"); 1745 // The instruction will not be sunk this time. 1746 return false; 1747 } 1748 } 1749 1750 if (BreakPHIEdge) { 1751 // BreakPHIEdge is true if all the uses are in the successor MBB being 1752 // sunken into and they are all PHI nodes. In this case, machine-sink must 1753 // break the critical edge first. 1754 bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, 1755 SuccToSinkTo, BreakPHIEdge); 1756 if (!Status) 1757 LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " 1758 "break critical edge\n"); 1759 // The instruction will not be sunk this time. 1760 return false; 1761 } 1762 1763 // Determine where to insert into. Skip phi nodes. 1764 MachineBasicBlock::iterator InsertPos = 1765 SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin()); 1766 if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) { 1767 LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n"); 1768 return false; 1769 } 1770 1771 // Collect debug users of any vreg that this inst defines. 1772 SmallVector<MIRegs, 4> DbgUsersToSink; 1773 for (auto &MO : MI.all_defs()) { 1774 if (!MO.getReg().isVirtual()) 1775 continue; 1776 if (!SeenDbgUsers.count(MO.getReg())) 1777 continue; 1778 1779 // Sink any users that don't pass any other DBG_VALUEs for this variable. 1780 auto &Users = SeenDbgUsers[MO.getReg()]; 1781 for (auto &User : Users) { 1782 MachineInstr *DbgMI = User.getPointer(); 1783 if (User.getInt()) { 1784 // This DBG_VALUE would re-order assignments. If we can't copy-propagate 1785 // it, it can't be recovered. Set it undef. 1786 if (!attemptDebugCopyProp(MI, *DbgMI, MO.getReg())) 1787 DbgMI->setDebugValueUndef(); 1788 } else { 1789 DbgUsersToSink.push_back( 1790 {DbgMI, SmallVector<unsigned, 2>(1, MO.getReg())}); 1791 } 1792 } 1793 } 1794 1795 // After sinking, some debug users may not be dominated any more. If possible, 1796 // copy-propagate their operands. As it's expensive, don't do this if there's 1797 // no debuginfo in the program. 1798 if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy()) 1799 SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo); 1800 1801 performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink); 1802 1803 // Conservatively, clear any kill flags, since it's possible that they are no 1804 // longer correct. 1805 // Note that we have to clear the kill flags for any register this instruction 1806 // uses as we may sink over another instruction which currently kills the 1807 // used registers. 1808 for (MachineOperand &MO : MI.all_uses()) 1809 RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags. 1810 1811 return true; 1812 } 1813 1814 void MachineSinking::SalvageUnsunkDebugUsersOfCopy( 1815 MachineInstr &MI, MachineBasicBlock *TargetBlock) { 1816 assert(MI.isCopy()); 1817 assert(MI.getOperand(1).isReg()); 1818 1819 // Enumerate all users of vreg operands that are def'd. Skip those that will 1820 // be sunk. For the rest, if they are not dominated by the block we will sink 1821 // MI into, propagate the copy source to them. 1822 SmallVector<MachineInstr *, 4> DbgDefUsers; 1823 SmallVector<Register, 4> DbgUseRegs; 1824 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); 1825 for (auto &MO : MI.all_defs()) { 1826 if (!MO.getReg().isVirtual()) 1827 continue; 1828 DbgUseRegs.push_back(MO.getReg()); 1829 for (auto &User : MRI.use_instructions(MO.getReg())) { 1830 if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent())) 1831 continue; 1832 1833 // If is in same block, will either sink or be use-before-def. 1834 if (User.getParent() == MI.getParent()) 1835 continue; 1836 1837 assert(User.hasDebugOperandForReg(MO.getReg()) && 1838 "DBG_VALUE user of vreg, but has no operand for it?"); 1839 DbgDefUsers.push_back(&User); 1840 } 1841 } 1842 1843 // Point the users of this copy that are no longer dominated, at the source 1844 // of the copy. 1845 for (auto *User : DbgDefUsers) { 1846 for (auto &Reg : DbgUseRegs) { 1847 for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) { 1848 DbgOp.setReg(MI.getOperand(1).getReg()); 1849 DbgOp.setSubReg(MI.getOperand(1).getSubReg()); 1850 } 1851 } 1852 } 1853 } 1854 1855 //===----------------------------------------------------------------------===// 1856 // This pass is not intended to be a replacement or a complete alternative 1857 // for the pre-ra machine sink pass. It is only designed to sink COPY 1858 // instructions which should be handled after RA. 1859 // 1860 // This pass sinks COPY instructions into a successor block, if the COPY is not 1861 // used in the current block and the COPY is live-in to a single successor 1862 // (i.e., doesn't require the COPY to be duplicated). This avoids executing the 1863 // copy on paths where their results aren't needed. This also exposes 1864 // additional opportunites for dead copy elimination and shrink wrapping. 1865 // 1866 // These copies were either not handled by or are inserted after the MachineSink 1867 // pass. As an example of the former case, the MachineSink pass cannot sink 1868 // COPY instructions with allocatable source registers; for AArch64 these type 1869 // of copy instructions are frequently used to move function parameters (PhyReg) 1870 // into virtual registers in the entry block. 1871 // 1872 // For the machine IR below, this pass will sink %w19 in the entry into its 1873 // successor (%bb.1) because %w19 is only live-in in %bb.1. 1874 // %bb.0: 1875 // %wzr = SUBSWri %w1, 1 1876 // %w19 = COPY %w0 1877 // Bcc 11, %bb.2 1878 // %bb.1: 1879 // Live Ins: %w19 1880 // BL @fun 1881 // %w0 = ADDWrr %w0, %w19 1882 // RET %w0 1883 // %bb.2: 1884 // %w0 = COPY %wzr 1885 // RET %w0 1886 // As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be 1887 // able to see %bb.0 as a candidate. 1888 //===----------------------------------------------------------------------===// 1889 namespace { 1890 1891 class PostRAMachineSinking : public MachineFunctionPass { 1892 public: 1893 bool runOnMachineFunction(MachineFunction &MF) override; 1894 1895 static char ID; 1896 PostRAMachineSinking() : MachineFunctionPass(ID) {} 1897 StringRef getPassName() const override { return "PostRA Machine Sink"; } 1898 1899 void getAnalysisUsage(AnalysisUsage &AU) const override { 1900 AU.setPreservesCFG(); 1901 MachineFunctionPass::getAnalysisUsage(AU); 1902 } 1903 1904 MachineFunctionProperties getRequiredProperties() const override { 1905 return MachineFunctionProperties().set( 1906 MachineFunctionProperties::Property::NoVRegs); 1907 } 1908 1909 private: 1910 /// Track which register units have been modified and used. 1911 LiveRegUnits ModifiedRegUnits, UsedRegUnits; 1912 1913 /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an 1914 /// entry in this map for each unit it touches. The DBG_VALUE's entry 1915 /// consists of a pointer to the instruction itself, and a vector of registers 1916 /// referred to by the instruction that overlap the key register unit. 1917 DenseMap<unsigned, SmallVector<MIRegs, 2>> SeenDbgInstrs; 1918 1919 /// Sink Copy instructions unused in the same block close to their uses in 1920 /// successors. 1921 bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF, 1922 const TargetRegisterInfo *TRI, const TargetInstrInfo *TII); 1923 }; 1924 } // namespace 1925 1926 char PostRAMachineSinking::ID = 0; 1927 char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID; 1928 1929 INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink", 1930 "PostRA Machine Sink", false, false) 1931 1932 static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg, 1933 const TargetRegisterInfo *TRI) { 1934 LiveRegUnits LiveInRegUnits(*TRI); 1935 LiveInRegUnits.addLiveIns(MBB); 1936 return !LiveInRegUnits.available(Reg); 1937 } 1938 1939 static MachineBasicBlock * 1940 getSingleLiveInSuccBB(MachineBasicBlock &CurBB, 1941 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs, 1942 unsigned Reg, const TargetRegisterInfo *TRI) { 1943 // Try to find a single sinkable successor in which Reg is live-in. 1944 MachineBasicBlock *BB = nullptr; 1945 for (auto *SI : SinkableBBs) { 1946 if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) { 1947 // If BB is set here, Reg is live-in to at least two sinkable successors, 1948 // so quit. 1949 if (BB) 1950 return nullptr; 1951 BB = SI; 1952 } 1953 } 1954 // Reg is not live-in to any sinkable successors. 1955 if (!BB) 1956 return nullptr; 1957 1958 // Check if any register aliased with Reg is live-in in other successors. 1959 for (auto *SI : CurBB.successors()) { 1960 if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI)) 1961 return nullptr; 1962 } 1963 return BB; 1964 } 1965 1966 static MachineBasicBlock * 1967 getSingleLiveInSuccBB(MachineBasicBlock &CurBB, 1968 const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs, 1969 ArrayRef<unsigned> DefedRegsInCopy, 1970 const TargetRegisterInfo *TRI) { 1971 MachineBasicBlock *SingleBB = nullptr; 1972 for (auto DefReg : DefedRegsInCopy) { 1973 MachineBasicBlock *BB = 1974 getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI); 1975 if (!BB || (SingleBB && SingleBB != BB)) 1976 return nullptr; 1977 SingleBB = BB; 1978 } 1979 return SingleBB; 1980 } 1981 1982 static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, 1983 SmallVectorImpl<unsigned> &UsedOpsInCopy, 1984 LiveRegUnits &UsedRegUnits, 1985 const TargetRegisterInfo *TRI) { 1986 for (auto U : UsedOpsInCopy) { 1987 MachineOperand &MO = MI->getOperand(U); 1988 Register SrcReg = MO.getReg(); 1989 if (!UsedRegUnits.available(SrcReg)) { 1990 MachineBasicBlock::iterator NI = std::next(MI->getIterator()); 1991 for (MachineInstr &UI : make_range(NI, CurBB.end())) { 1992 if (UI.killsRegister(SrcReg, TRI)) { 1993 UI.clearRegisterKills(SrcReg, TRI); 1994 MO.setIsKill(true); 1995 break; 1996 } 1997 } 1998 } 1999 } 2000 } 2001 2002 static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, 2003 SmallVectorImpl<unsigned> &UsedOpsInCopy, 2004 SmallVectorImpl<unsigned> &DefedRegsInCopy) { 2005 MachineFunction &MF = *SuccBB->getParent(); 2006 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 2007 for (unsigned DefReg : DefedRegsInCopy) 2008 for (MCPhysReg S : TRI->subregs_inclusive(DefReg)) 2009 SuccBB->removeLiveIn(S); 2010 for (auto U : UsedOpsInCopy) 2011 SuccBB->addLiveIn(MI->getOperand(U).getReg()); 2012 SuccBB->sortUniqueLiveIns(); 2013 } 2014 2015 static bool hasRegisterDependency(MachineInstr *MI, 2016 SmallVectorImpl<unsigned> &UsedOpsInCopy, 2017 SmallVectorImpl<unsigned> &DefedRegsInCopy, 2018 LiveRegUnits &ModifiedRegUnits, 2019 LiveRegUnits &UsedRegUnits) { 2020 bool HasRegDependency = false; 2021 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2022 MachineOperand &MO = MI->getOperand(i); 2023 if (!MO.isReg()) 2024 continue; 2025 Register Reg = MO.getReg(); 2026 if (!Reg) 2027 continue; 2028 if (MO.isDef()) { 2029 if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) { 2030 HasRegDependency = true; 2031 break; 2032 } 2033 DefedRegsInCopy.push_back(Reg); 2034 2035 // FIXME: instead of isUse(), readsReg() would be a better fix here, 2036 // For example, we can ignore modifications in reg with undef. However, 2037 // it's not perfectly clear if skipping the internal read is safe in all 2038 // other targets. 2039 } else if (MO.isUse()) { 2040 if (!ModifiedRegUnits.available(Reg)) { 2041 HasRegDependency = true; 2042 break; 2043 } 2044 UsedOpsInCopy.push_back(i); 2045 } 2046 } 2047 return HasRegDependency; 2048 } 2049 2050 bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, 2051 MachineFunction &MF, 2052 const TargetRegisterInfo *TRI, 2053 const TargetInstrInfo *TII) { 2054 SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs; 2055 // FIXME: For now, we sink only to a successor which has a single predecessor 2056 // so that we can directly sink COPY instructions to the successor without 2057 // adding any new block or branch instruction. 2058 for (MachineBasicBlock *SI : CurBB.successors()) 2059 if (!SI->livein_empty() && SI->pred_size() == 1) 2060 SinkableBBs.insert(SI); 2061 2062 if (SinkableBBs.empty()) 2063 return false; 2064 2065 bool Changed = false; 2066 2067 // Track which registers have been modified and used between the end of the 2068 // block and the current instruction. 2069 ModifiedRegUnits.clear(); 2070 UsedRegUnits.clear(); 2071 SeenDbgInstrs.clear(); 2072 2073 for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(CurBB))) { 2074 // Track the operand index for use in Copy. 2075 SmallVector<unsigned, 2> UsedOpsInCopy; 2076 // Track the register number defed in Copy. 2077 SmallVector<unsigned, 2> DefedRegsInCopy; 2078 2079 // We must sink this DBG_VALUE if its operand is sunk. To avoid searching 2080 // for DBG_VALUEs later, record them when they're encountered. 2081 if (MI.isDebugValue() && !MI.isDebugRef()) { 2082 SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits; 2083 bool IsValid = true; 2084 for (MachineOperand &MO : MI.debug_operands()) { 2085 if (MO.isReg() && MO.getReg().isPhysical()) { 2086 // Bail if we can already tell the sink would be rejected, rather 2087 // than needlessly accumulating lots of DBG_VALUEs. 2088 if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy, 2089 ModifiedRegUnits, UsedRegUnits)) { 2090 IsValid = false; 2091 break; 2092 } 2093 2094 // Record debug use of each reg unit. 2095 for (MCRegUnit Unit : TRI->regunits(MO.getReg())) 2096 MIUnits[Unit].push_back(MO.getReg()); 2097 } 2098 } 2099 if (IsValid) { 2100 for (auto &RegOps : MIUnits) 2101 SeenDbgInstrs[RegOps.first].emplace_back(&MI, 2102 std::move(RegOps.second)); 2103 } 2104 continue; 2105 } 2106 2107 if (MI.isDebugOrPseudoInstr()) 2108 continue; 2109 2110 // Do not move any instruction across function call. 2111 if (MI.isCall()) 2112 return false; 2113 2114 if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) { 2115 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, 2116 TRI); 2117 continue; 2118 } 2119 2120 // Don't sink the COPY if it would violate a register dependency. 2121 if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy, 2122 ModifiedRegUnits, UsedRegUnits)) { 2123 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, 2124 TRI); 2125 continue; 2126 } 2127 assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) && 2128 "Unexpect SrcReg or DefReg"); 2129 MachineBasicBlock *SuccBB = 2130 getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI); 2131 // Don't sink if we cannot find a single sinkable successor in which Reg 2132 // is live-in. 2133 if (!SuccBB) { 2134 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, 2135 TRI); 2136 continue; 2137 } 2138 assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && 2139 "Unexpected predecessor"); 2140 2141 // Collect DBG_VALUEs that must sink with this copy. We've previously 2142 // recorded which reg units that DBG_VALUEs read, if this instruction 2143 // writes any of those units then the corresponding DBG_VALUEs must sink. 2144 MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap; 2145 for (auto &MO : MI.all_defs()) { 2146 for (MCRegUnit Unit : TRI->regunits(MO.getReg())) { 2147 for (const auto &MIRegs : SeenDbgInstrs.lookup(Unit)) { 2148 auto &Regs = DbgValsToSinkMap[MIRegs.first]; 2149 for (unsigned Reg : MIRegs.second) 2150 Regs.push_back(Reg); 2151 } 2152 } 2153 } 2154 auto DbgValsToSink = DbgValsToSinkMap.takeVector(); 2155 2156 LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB); 2157 2158 MachineBasicBlock::iterator InsertPos = 2159 SuccBB->SkipPHIsAndLabels(SuccBB->begin()); 2160 if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) { 2161 LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, 2162 TRI); 2163 LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n"); 2164 continue; 2165 } 2166 2167 // Clear the kill flag if SrcReg is killed between MI and the end of the 2168 // block. 2169 clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); 2170 performSink(MI, *SuccBB, InsertPos, DbgValsToSink); 2171 updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); 2172 2173 Changed = true; 2174 ++NumPostRACopySink; 2175 } 2176 return Changed; 2177 } 2178 2179 bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) { 2180 if (skipFunction(MF.getFunction())) 2181 return false; 2182 2183 bool Changed = false; 2184 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 2185 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 2186 2187 ModifiedRegUnits.init(*TRI); 2188 UsedRegUnits.init(*TRI); 2189 for (auto &BB : MF) 2190 Changed |= tryToSinkCopy(BB, MF, TRI, TII); 2191 2192 return Changed; 2193 } 2194