10b57cec5SDimitry Andric //===-- SIFormMemoryClauses.cpp -------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 9fe6060f1SDimitry Andric /// \file This pass extends the live ranges of registers used as pointers in 10fe6060f1SDimitry Andric /// sequences of adjacent SMEM and VMEM instructions if XNACK is enabled. A 11fe6060f1SDimitry Andric /// load that would overwrite a pointer would require breaking the soft clause. 12fe6060f1SDimitry Andric /// Artificially extend the live ranges of the pointer operands by adding 13fe6060f1SDimitry Andric /// implicit-def early-clobber operands throughout the soft clause. 140b57cec5SDimitry Andric /// 150b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #include "AMDGPU.h" 180b57cec5SDimitry Andric #include "GCNRegPressure.h" 190b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 20480093f4SDimitry Andric #include "llvm/InitializePasses.h" 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric using namespace llvm; 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric #define DEBUG_TYPE "si-form-memory-clauses" 250b57cec5SDimitry Andric 260b57cec5SDimitry Andric // Clauses longer then 15 instructions would overflow one of the counters 270b57cec5SDimitry Andric // and stall. They can stall even earlier if there are outstanding counters. 280b57cec5SDimitry Andric static cl::opt<unsigned> 290b57cec5SDimitry Andric MaxClause("amdgpu-max-memory-clause", cl::Hidden, cl::init(15), 300b57cec5SDimitry Andric cl::desc("Maximum length of a memory clause, instructions")); 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric namespace { 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric class SIFormMemoryClauses : public MachineFunctionPass { 35*0fca6ea1SDimitry Andric using RegUse = DenseMap<unsigned, std::pair<unsigned, LaneBitmask>>; 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric public: 380b57cec5SDimitry Andric static char ID; 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric public: 410b57cec5SDimitry Andric SIFormMemoryClauses() : MachineFunctionPass(ID) { 420b57cec5SDimitry Andric initializeSIFormMemoryClausesPass(*PassRegistry::getPassRegistry()); 430b57cec5SDimitry Andric } 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric StringRef getPassName() const override { 480b57cec5SDimitry Andric return "SI Form memory clauses"; 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 52*0fca6ea1SDimitry Andric AU.addRequired<LiveIntervalsWrapperPass>(); 530b57cec5SDimitry Andric AU.setPreservesAll(); 540b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 550b57cec5SDimitry Andric } 560b57cec5SDimitry Andric 57e8d8bef9SDimitry Andric MachineFunctionProperties getClearedProperties() const override { 58e8d8bef9SDimitry Andric return MachineFunctionProperties().set( 59e8d8bef9SDimitry Andric MachineFunctionProperties::Property::IsSSA); 60e8d8bef9SDimitry Andric } 61e8d8bef9SDimitry Andric 620b57cec5SDimitry Andric private: 63fe6060f1SDimitry Andric bool canBundle(const MachineInstr &MI, const RegUse &Defs, 64fe6060f1SDimitry Andric const RegUse &Uses) const; 650b57cec5SDimitry Andric bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT); 660b57cec5SDimitry Andric void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const; 670b57cec5SDimitry Andric bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses, 680b57cec5SDimitry Andric GCNDownwardRPTracker &RPT); 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric const GCNSubtarget *ST; 710b57cec5SDimitry Andric const SIRegisterInfo *TRI; 720b57cec5SDimitry Andric const MachineRegisterInfo *MRI; 730b57cec5SDimitry Andric SIMachineFunctionInfo *MFI; 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric unsigned LastRecordedOccupancy; 760b57cec5SDimitry Andric unsigned MaxVGPRs; 770b57cec5SDimitry Andric unsigned MaxSGPRs; 780b57cec5SDimitry Andric }; 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric } // End anonymous namespace. 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(SIFormMemoryClauses, DEBUG_TYPE, 830b57cec5SDimitry Andric "SI Form memory clauses", false, false) 84*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 850b57cec5SDimitry Andric INITIALIZE_PASS_END(SIFormMemoryClauses, DEBUG_TYPE, 860b57cec5SDimitry Andric "SI Form memory clauses", false, false) 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric char SIFormMemoryClauses::ID = 0; 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric char &llvm::SIFormMemoryClausesID = SIFormMemoryClauses::ID; 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric FunctionPass *llvm::createSIFormMemoryClausesPass() { 940b57cec5SDimitry Andric return new SIFormMemoryClauses(); 950b57cec5SDimitry Andric } 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric static bool isVMEMClauseInst(const MachineInstr &MI) { 980b57cec5SDimitry Andric return SIInstrInfo::isFLAT(MI) || SIInstrInfo::isVMEM(MI); 990b57cec5SDimitry Andric } 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric static bool isSMEMClauseInst(const MachineInstr &MI) { 1020b57cec5SDimitry Andric return SIInstrInfo::isSMRD(MI); 1030b57cec5SDimitry Andric } 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric // There no sense to create store clauses, they do not define anything, 1060b57cec5SDimitry Andric // thus there is nothing to set early-clobber. 1070b57cec5SDimitry Andric static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) { 108fe6060f1SDimitry Andric assert(!MI.isDebugInstr() && "debug instructions should not reach here"); 109fe6060f1SDimitry Andric if (MI.isBundled()) 1100b57cec5SDimitry Andric return false; 1110b57cec5SDimitry Andric if (!MI.mayLoad() || MI.mayStore()) 1120b57cec5SDimitry Andric return false; 113fe6060f1SDimitry Andric if (SIInstrInfo::isAtomic(MI)) 1140b57cec5SDimitry Andric return false; 1150b57cec5SDimitry Andric if (IsVMEMClause && !isVMEMClauseInst(MI)) 1160b57cec5SDimitry Andric return false; 1170b57cec5SDimitry Andric if (!IsVMEMClause && !isSMEMClauseInst(MI)) 1180b57cec5SDimitry Andric return false; 1190b57cec5SDimitry Andric // If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it. 1200b57cec5SDimitry Andric for (const MachineOperand &ResMO : MI.defs()) { 1218bcb0991SDimitry Andric Register ResReg = ResMO.getReg(); 12206c3fb27SDimitry Andric for (const MachineOperand &MO : MI.all_uses()) { 1230b57cec5SDimitry Andric if (MO.getReg() == ResReg) 1240b57cec5SDimitry Andric return false; 1250b57cec5SDimitry Andric } 1260b57cec5SDimitry Andric break; // Only check the first def. 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric return true; 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric static unsigned getMopState(const MachineOperand &MO) { 1320b57cec5SDimitry Andric unsigned S = 0; 1330b57cec5SDimitry Andric if (MO.isImplicit()) 1340b57cec5SDimitry Andric S |= RegState::Implicit; 1350b57cec5SDimitry Andric if (MO.isDead()) 1360b57cec5SDimitry Andric S |= RegState::Dead; 1370b57cec5SDimitry Andric if (MO.isUndef()) 1380b57cec5SDimitry Andric S |= RegState::Undef; 1390b57cec5SDimitry Andric if (MO.isKill()) 1400b57cec5SDimitry Andric S |= RegState::Kill; 1410b57cec5SDimitry Andric if (MO.isEarlyClobber()) 1420b57cec5SDimitry Andric S |= RegState::EarlyClobber; 143e8d8bef9SDimitry Andric if (MO.getReg().isPhysical() && MO.isRenamable()) 1440b57cec5SDimitry Andric S |= RegState::Renamable; 1450b57cec5SDimitry Andric return S; 1460b57cec5SDimitry Andric } 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric // Returns false if there is a use of a def already in the map. 1490b57cec5SDimitry Andric // In this case we must break the clause. 150fe6060f1SDimitry Andric bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, const RegUse &Defs, 151fe6060f1SDimitry Andric const RegUse &Uses) const { 1520b57cec5SDimitry Andric // Check interference with defs. 1530b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) { 1540b57cec5SDimitry Andric // TODO: Prologue/Epilogue Insertion pass does not process bundled 1550b57cec5SDimitry Andric // instructions. 1560b57cec5SDimitry Andric if (MO.isFI()) 1570b57cec5SDimitry Andric return false; 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric if (!MO.isReg()) 1600b57cec5SDimitry Andric continue; 1610b57cec5SDimitry Andric 1628bcb0991SDimitry Andric Register Reg = MO.getReg(); 1630b57cec5SDimitry Andric 1640b57cec5SDimitry Andric // If it is tied we will need to write same register as we read. 1650b57cec5SDimitry Andric if (MO.isTied()) 1660b57cec5SDimitry Andric return false; 1670b57cec5SDimitry Andric 168fe6060f1SDimitry Andric const RegUse &Map = MO.isDef() ? Uses : Defs; 1690b57cec5SDimitry Andric auto Conflict = Map.find(Reg); 1700b57cec5SDimitry Andric if (Conflict == Map.end()) 1710b57cec5SDimitry Andric continue; 1720b57cec5SDimitry Andric 173e8d8bef9SDimitry Andric if (Reg.isPhysical()) 1740b57cec5SDimitry Andric return false; 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andric LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); 1770b57cec5SDimitry Andric if ((Conflict->second.second & Mask).any()) 1780b57cec5SDimitry Andric return false; 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric return true; 1820b57cec5SDimitry Andric } 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric // Since all defs in the clause are early clobber we can run out of registers. 1850b57cec5SDimitry Andric // Function returns false if pressure would hit the limit if instruction is 1860b57cec5SDimitry Andric // bundled into a memory clause. 1870b57cec5SDimitry Andric bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI, 1880b57cec5SDimitry Andric GCNDownwardRPTracker &RPT) { 1890b57cec5SDimitry Andric // NB: skip advanceBeforeNext() call. Since all defs will be marked 1900b57cec5SDimitry Andric // early-clobber they will all stay alive at least to the end of the 1910b57cec5SDimitry Andric // clause. Therefor we should not decrease pressure even if load 1920b57cec5SDimitry Andric // pointer becomes dead and could otherwise be reused for destination. 1930b57cec5SDimitry Andric RPT.advanceToNext(); 1940b57cec5SDimitry Andric GCNRegPressure MaxPressure = RPT.moveMaxPressure(); 1950b57cec5SDimitry Andric unsigned Occupancy = MaxPressure.getOccupancy(*ST); 196fe6060f1SDimitry Andric 197fe6060f1SDimitry Andric // Don't push over half the register budget. We don't want to introduce 198fe6060f1SDimitry Andric // spilling just to form a soft clause. 199fe6060f1SDimitry Andric // 200fe6060f1SDimitry Andric // FIXME: This pressure check is fundamentally broken. First, this is checking 201fe6060f1SDimitry Andric // the global pressure, not the pressure at this specific point in the 202fe6060f1SDimitry Andric // program. Second, it's not accounting for the increased liveness of the use 203fe6060f1SDimitry Andric // operands due to the early clobber we will introduce. Third, the pressure 204fe6060f1SDimitry Andric // tracking does not account for the alignment requirements for SGPRs, or the 205fe6060f1SDimitry Andric // fragmentation of registers the allocator will need to satisfy. 2060b57cec5SDimitry Andric if (Occupancy >= MFI->getMinAllowedOccupancy() && 207fe6060f1SDimitry Andric MaxPressure.getVGPRNum(ST->hasGFX90AInsts()) <= MaxVGPRs / 2 && 208fe6060f1SDimitry Andric MaxPressure.getSGPRNum() <= MaxSGPRs / 2) { 2090b57cec5SDimitry Andric LastRecordedOccupancy = Occupancy; 2100b57cec5SDimitry Andric return true; 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric return false; 2130b57cec5SDimitry Andric } 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric // Collect register defs and uses along with their lane masks and states. 2160b57cec5SDimitry Andric void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI, 2170b57cec5SDimitry Andric RegUse &Defs, RegUse &Uses) const { 2180b57cec5SDimitry Andric for (const MachineOperand &MO : MI.operands()) { 2190b57cec5SDimitry Andric if (!MO.isReg()) 2200b57cec5SDimitry Andric continue; 2218bcb0991SDimitry Andric Register Reg = MO.getReg(); 2220b57cec5SDimitry Andric if (!Reg) 2230b57cec5SDimitry Andric continue; 2240b57cec5SDimitry Andric 225e8d8bef9SDimitry Andric LaneBitmask Mask = Reg.isVirtual() 2268bcb0991SDimitry Andric ? TRI->getSubRegIndexLaneMask(MO.getSubReg()) 2278bcb0991SDimitry Andric : LaneBitmask::getAll(); 2280b57cec5SDimitry Andric RegUse &Map = MO.isDef() ? Defs : Uses; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric auto Loc = Map.find(Reg); 2310b57cec5SDimitry Andric unsigned State = getMopState(MO); 2320b57cec5SDimitry Andric if (Loc == Map.end()) { 233bdd1243dSDimitry Andric Map[Reg] = std::pair(State, Mask); 2340b57cec5SDimitry Andric } else { 2350b57cec5SDimitry Andric Loc->second.first |= State; 2360b57cec5SDimitry Andric Loc->second.second |= Mask; 2370b57cec5SDimitry Andric } 2380b57cec5SDimitry Andric } 2390b57cec5SDimitry Andric } 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric // Check register def/use conflicts, occupancy limits and collect def/use maps. 24281ad6265SDimitry Andric // Return true if instruction can be bundled with previous. If it cannot 2430b57cec5SDimitry Andric // def/use maps are not updated. 2440b57cec5SDimitry Andric bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI, 2450b57cec5SDimitry Andric RegUse &Defs, RegUse &Uses, 2460b57cec5SDimitry Andric GCNDownwardRPTracker &RPT) { 2470b57cec5SDimitry Andric if (!canBundle(MI, Defs, Uses)) 2480b57cec5SDimitry Andric return false; 2490b57cec5SDimitry Andric 2500b57cec5SDimitry Andric if (!checkPressure(MI, RPT)) 2510b57cec5SDimitry Andric return false; 2520b57cec5SDimitry Andric 2530b57cec5SDimitry Andric collectRegUses(MI, Defs, Uses); 2540b57cec5SDimitry Andric return true; 2550b57cec5SDimitry Andric } 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) { 2580b57cec5SDimitry Andric if (skipFunction(MF.getFunction())) 2590b57cec5SDimitry Andric return false; 2600b57cec5SDimitry Andric 2610b57cec5SDimitry Andric ST = &MF.getSubtarget<GCNSubtarget>(); 2620b57cec5SDimitry Andric if (!ST->isXNACKEnabled()) 2630b57cec5SDimitry Andric return false; 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric const SIInstrInfo *TII = ST->getInstrInfo(); 2660b57cec5SDimitry Andric TRI = ST->getRegisterInfo(); 2670b57cec5SDimitry Andric MRI = &MF.getRegInfo(); 2680b57cec5SDimitry Andric MFI = MF.getInfo<SIMachineFunctionInfo>(); 269*0fca6ea1SDimitry Andric LiveIntervals *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); 2700b57cec5SDimitry Andric SlotIndexes *Ind = LIS->getSlotIndexes(); 2710b57cec5SDimitry Andric bool Changed = false; 2720b57cec5SDimitry Andric 2730b57cec5SDimitry Andric MaxVGPRs = TRI->getAllocatableSet(MF, &AMDGPU::VGPR_32RegClass).count(); 2740b57cec5SDimitry Andric MaxSGPRs = TRI->getAllocatableSet(MF, &AMDGPU::SGPR_32RegClass).count(); 275bdd1243dSDimitry Andric unsigned FuncMaxClause = MF.getFunction().getFnAttributeAsParsedInteger( 276bdd1243dSDimitry Andric "amdgpu-max-memory-clause", MaxClause); 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) { 279e8d8bef9SDimitry Andric GCNDownwardRPTracker RPT(*LIS); 2800b57cec5SDimitry Andric MachineBasicBlock::instr_iterator Next; 2810b57cec5SDimitry Andric for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) { 2820b57cec5SDimitry Andric MachineInstr &MI = *I; 2830b57cec5SDimitry Andric Next = std::next(I); 2840b57cec5SDimitry Andric 285fe6060f1SDimitry Andric if (MI.isMetaInstruction()) 286fe6060f1SDimitry Andric continue; 287fe6060f1SDimitry Andric 2880b57cec5SDimitry Andric bool IsVMEM = isVMEMClauseInst(MI); 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric if (!isValidClauseInst(MI, IsVMEM)) 2910b57cec5SDimitry Andric continue; 2920b57cec5SDimitry Andric 293e8d8bef9SDimitry Andric if (!RPT.getNext().isValid()) 2940b57cec5SDimitry Andric RPT.reset(MI); 295e8d8bef9SDimitry Andric else { // Advance the state to the current MI. 296e8d8bef9SDimitry Andric RPT.advance(MachineBasicBlock::const_iterator(MI)); 297e8d8bef9SDimitry Andric RPT.advanceBeforeNext(); 298e8d8bef9SDimitry Andric } 2990b57cec5SDimitry Andric 300e8d8bef9SDimitry Andric const GCNRPTracker::LiveRegSet LiveRegsCopy(RPT.getLiveRegs()); 301e8d8bef9SDimitry Andric RegUse Defs, Uses; 302e8d8bef9SDimitry Andric if (!processRegUses(MI, Defs, Uses, RPT)) { 303e8d8bef9SDimitry Andric RPT.reset(MI, &LiveRegsCopy); 3040b57cec5SDimitry Andric continue; 305e8d8bef9SDimitry Andric } 3060b57cec5SDimitry Andric 307fe6060f1SDimitry Andric MachineBasicBlock::iterator LastClauseInst = Next; 3080b57cec5SDimitry Andric unsigned Length = 1; 3090b57cec5SDimitry Andric for ( ; Next != E && Length < FuncMaxClause; ++Next) { 310fe6060f1SDimitry Andric // Debug instructions should not change the kill insertion. 311fe6060f1SDimitry Andric if (Next->isMetaInstruction()) 312fe6060f1SDimitry Andric continue; 313fe6060f1SDimitry Andric 3140b57cec5SDimitry Andric if (!isValidClauseInst(*Next, IsVMEM)) 3150b57cec5SDimitry Andric break; 3160b57cec5SDimitry Andric 3170b57cec5SDimitry Andric // A load from pointer which was loaded inside the same bundle is an 3180b57cec5SDimitry Andric // impossible clause because we will need to write and read the same 3190b57cec5SDimitry Andric // register inside. In this case processRegUses will return false. 3200b57cec5SDimitry Andric if (!processRegUses(*Next, Defs, Uses, RPT)) 3210b57cec5SDimitry Andric break; 3220b57cec5SDimitry Andric 323fe6060f1SDimitry Andric LastClauseInst = Next; 3240b57cec5SDimitry Andric ++Length; 3250b57cec5SDimitry Andric } 326e8d8bef9SDimitry Andric if (Length < 2) { 327e8d8bef9SDimitry Andric RPT.reset(MI, &LiveRegsCopy); 3280b57cec5SDimitry Andric continue; 329e8d8bef9SDimitry Andric } 3300b57cec5SDimitry Andric 3310b57cec5SDimitry Andric Changed = true; 3320b57cec5SDimitry Andric MFI->limitOccupancy(LastRecordedOccupancy); 3330b57cec5SDimitry Andric 334fe6060f1SDimitry Andric assert(!LastClauseInst->isMetaInstruction()); 3350b57cec5SDimitry Andric 336fe6060f1SDimitry Andric SlotIndex ClauseLiveInIdx = LIS->getInstructionIndex(MI); 337fe6060f1SDimitry Andric SlotIndex ClauseLiveOutIdx = 338fe6060f1SDimitry Andric LIS->getInstructionIndex(*LastClauseInst).getNextIndex(); 339e8d8bef9SDimitry Andric 340fe6060f1SDimitry Andric // Track the last inserted kill. 341fe6060f1SDimitry Andric MachineInstrBuilder Kill; 3420b57cec5SDimitry Andric 343fe6060f1SDimitry Andric // Insert one kill per register, with operands covering all necessary 344fe6060f1SDimitry Andric // subregisters. 3450b57cec5SDimitry Andric for (auto &&R : Uses) { 346fe6060f1SDimitry Andric Register Reg = R.first; 347fe6060f1SDimitry Andric if (Reg.isPhysical()) 348fe6060f1SDimitry Andric continue; 349fe6060f1SDimitry Andric 350fe6060f1SDimitry Andric // Collect the register operands we should extend the live ranges of. 351fe6060f1SDimitry Andric SmallVector<std::tuple<unsigned, unsigned>> KillOps; 352fe6060f1SDimitry Andric const LiveInterval &LI = LIS->getInterval(R.first); 353fe6060f1SDimitry Andric 354fe6060f1SDimitry Andric if (!LI.hasSubRanges()) { 355fe6060f1SDimitry Andric if (!LI.liveAt(ClauseLiveOutIdx)) { 356fe6060f1SDimitry Andric KillOps.emplace_back(R.second.first | RegState::Kill, 357fe6060f1SDimitry Andric AMDGPU::NoSubRegister); 3580b57cec5SDimitry Andric } 359fe6060f1SDimitry Andric } else { 360fe6060f1SDimitry Andric LaneBitmask KilledMask; 361fe6060f1SDimitry Andric for (const LiveInterval::SubRange &SR : LI.subranges()) { 362fe6060f1SDimitry Andric if (SR.liveAt(ClauseLiveInIdx) && !SR.liveAt(ClauseLiveOutIdx)) 363fe6060f1SDimitry Andric KilledMask |= SR.LaneMask; 364fe6060f1SDimitry Andric } 365fe6060f1SDimitry Andric 366fe6060f1SDimitry Andric if (KilledMask.none()) 367fe6060f1SDimitry Andric continue; 368fe6060f1SDimitry Andric 369fe6060f1SDimitry Andric SmallVector<unsigned> KilledIndexes; 370fe6060f1SDimitry Andric bool Success = TRI->getCoveringSubRegIndexes( 371fe6060f1SDimitry Andric *MRI, MRI->getRegClass(Reg), KilledMask, KilledIndexes); 372fe6060f1SDimitry Andric (void)Success; 373fe6060f1SDimitry Andric assert(Success && "Failed to find subregister mask to cover lanes"); 374fe6060f1SDimitry Andric for (unsigned SubReg : KilledIndexes) { 375fe6060f1SDimitry Andric KillOps.emplace_back(R.second.first | RegState::Kill, SubReg); 376fe6060f1SDimitry Andric } 377fe6060f1SDimitry Andric } 378fe6060f1SDimitry Andric 379fe6060f1SDimitry Andric if (KillOps.empty()) 380fe6060f1SDimitry Andric continue; 381fe6060f1SDimitry Andric 382fe6060f1SDimitry Andric // We only want to extend the live ranges of used registers. If they 383fe6060f1SDimitry Andric // already have existing uses beyond the bundle, we don't need the kill. 384fe6060f1SDimitry Andric // 385fe6060f1SDimitry Andric // It's possible all of the use registers were already live past the 386fe6060f1SDimitry Andric // bundle. 387fe6060f1SDimitry Andric Kill = BuildMI(*MI.getParent(), std::next(LastClauseInst), 388fe6060f1SDimitry Andric DebugLoc(), TII->get(AMDGPU::KILL)); 389fe6060f1SDimitry Andric for (auto &Op : KillOps) 390fe6060f1SDimitry Andric Kill.addUse(Reg, std::get<0>(Op), std::get<1>(Op)); 391fe6060f1SDimitry Andric Ind->insertMachineInstrInMaps(*Kill); 392fe6060f1SDimitry Andric } 393fe6060f1SDimitry Andric 394fe6060f1SDimitry Andric // Restore the state after processing the end of the bundle. 395bdd1243dSDimitry Andric RPT.reset(MI, &LiveRegsCopy); 396bdd1243dSDimitry Andric 397bdd1243dSDimitry Andric if (!Kill) 398bdd1243dSDimitry Andric continue; 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric for (auto &&R : Defs) { 401e8d8bef9SDimitry Andric Register Reg = R.first; 4020b57cec5SDimitry Andric Uses.erase(Reg); 403e8d8bef9SDimitry Andric if (Reg.isPhysical()) 4040b57cec5SDimitry Andric continue; 4050b57cec5SDimitry Andric LIS->removeInterval(Reg); 4060b57cec5SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric for (auto &&R : Uses) { 410e8d8bef9SDimitry Andric Register Reg = R.first; 411e8d8bef9SDimitry Andric if (Reg.isPhysical()) 4120b57cec5SDimitry Andric continue; 4130b57cec5SDimitry Andric LIS->removeInterval(Reg); 4140b57cec5SDimitry Andric LIS->createAndComputeVirtRegInterval(Reg); 4150b57cec5SDimitry Andric } 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric } 4180b57cec5SDimitry Andric 4190b57cec5SDimitry Andric return Changed; 4200b57cec5SDimitry Andric } 421