1 //===-- GCNNSAReassign.cpp - Reassign registers in NSA instructions -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// \brief Try to reassign registers on GFX10+ from non-sequential to sequential 11 /// in NSA image instructions. Later SIShrinkInstructions pass will replace NSA 12 /// with sequential versions where possible. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "GCNSubtarget.h" 18 #include "SIMachineFunctionInfo.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/LiveIntervals.h" 21 #include "llvm/CodeGen/LiveRegMatrix.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/VirtRegMap.h" 24 #include "llvm/InitializePasses.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "amdgpu-nsa-reassign" 29 30 STATISTIC(NumNSAInstructions, 31 "Number of NSA instructions with non-sequential address found"); 32 STATISTIC(NumNSAConverted, 33 "Number of NSA instructions changed to sequential"); 34 35 namespace { 36 37 class GCNNSAReassign : public MachineFunctionPass { 38 public: 39 static char ID; 40 41 GCNNSAReassign() : MachineFunctionPass(ID) { 42 initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry()); 43 } 44 45 bool runOnMachineFunction(MachineFunction &MF) override; 46 47 StringRef getPassName() const override { return "GCN NSA Reassign"; } 48 49 void getAnalysisUsage(AnalysisUsage &AU) const override { 50 AU.addRequired<LiveIntervals>(); 51 AU.addRequired<VirtRegMap>(); 52 AU.addRequired<LiveRegMatrix>(); 53 AU.setPreservesAll(); 54 MachineFunctionPass::getAnalysisUsage(AU); 55 } 56 57 private: 58 typedef enum { 59 NOT_NSA, // Not an NSA instruction 60 FIXED, // NSA which we cannot modify 61 NON_CONTIGUOUS, // NSA with non-sequential address which we can try 62 // to optimize. 63 CONTIGUOUS // NSA with all sequential address registers 64 } NSA_Status; 65 66 const GCNSubtarget *ST; 67 68 const MachineRegisterInfo *MRI; 69 70 const SIRegisterInfo *TRI; 71 72 VirtRegMap *VRM; 73 74 LiveRegMatrix *LRM; 75 76 LiveIntervals *LIS; 77 78 unsigned MaxNumVGPRs; 79 80 const MCPhysReg *CSRegs; 81 82 NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const; 83 84 bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals, 85 unsigned StartReg) const; 86 87 bool canAssign(unsigned StartReg, unsigned NumRegs) const; 88 89 bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const; 90 }; 91 92 } // End anonymous namespace. 93 94 INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign", 95 false, false) 96 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 97 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 98 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) 99 INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign", 100 false, false) 101 102 103 char GCNNSAReassign::ID = 0; 104 105 char &llvm::GCNNSAReassignID = GCNNSAReassign::ID; 106 107 bool 108 GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals, 109 unsigned StartReg) const { 110 unsigned NumRegs = Intervals.size(); 111 112 for (unsigned N = 0; N < NumRegs; ++N) 113 if (VRM->hasPhys(Intervals[N]->reg())) 114 LRM->unassign(*Intervals[N]); 115 116 for (unsigned N = 0; N < NumRegs; ++N) 117 if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N))) 118 return false; 119 120 for (unsigned N = 0; N < NumRegs; ++N) 121 LRM->assign(*Intervals[N], MCRegister::from(StartReg + N)); 122 123 return true; 124 } 125 126 bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const { 127 for (unsigned N = 0; N < NumRegs; ++N) { 128 unsigned Reg = StartReg + N; 129 if (!MRI->isAllocatable(Reg)) 130 return false; 131 132 for (unsigned I = 0; CSRegs[I]; ++I) 133 if (TRI->isSubRegisterEq(Reg, CSRegs[I]) && 134 !LRM->isPhysRegUsed(CSRegs[I])) 135 return false; 136 } 137 138 return true; 139 } 140 141 bool 142 GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const { 143 unsigned NumRegs = Intervals.size(); 144 145 if (NumRegs > MaxNumVGPRs) 146 return false; 147 unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0; 148 149 for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) { 150 if (!canAssign(Reg, NumRegs)) 151 continue; 152 153 if (tryAssignRegisters(Intervals, Reg)) 154 return true; 155 } 156 157 return false; 158 } 159 160 GCNNSAReassign::NSA_Status 161 GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { 162 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 163 if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA) 164 return NSA_Status::NOT_NSA; 165 166 int VAddr0Idx = 167 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); 168 169 unsigned VgprBase = 0; 170 bool NSA = false; 171 for (unsigned I = 0; I < Info->VAddrDwords; ++I) { 172 const MachineOperand &Op = MI.getOperand(VAddr0Idx + I); 173 Register Reg = Op.getReg(); 174 if (Reg.isPhysical() || !VRM->isAssignedReg(Reg)) 175 return NSA_Status::FIXED; 176 177 Register PhysReg = VRM->getPhys(Reg); 178 179 if (!Fast) { 180 if (!PhysReg) 181 return NSA_Status::FIXED; 182 183 // Bail if address is not a VGPR32. That should be possible to extend the 184 // optimization to work with subregs of a wider register tuples, but the 185 // logic to find free registers will be much more complicated with much 186 // less chances for success. That seems reasonable to assume that in most 187 // cases a tuple is used because a vector variable contains different 188 // parts of an address and it is either already consequitive or cannot 189 // be reassigned if not. If needed it is better to rely on register 190 // coalescer to process such address tuples. 191 if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg()) 192 return NSA_Status::FIXED; 193 194 // InlineSpiller does not call LRM::assign() after an LI split leaving 195 // it in an inconsistent state, so we cannot call LRM::unassign(). 196 // See llvm bug #48911. 197 // Skip reassign if a register has originated from such split. 198 // FIXME: Remove the workaround when bug #48911 is fixed. 199 if (VRM->getPreSplitReg(Reg)) 200 return NSA_Status::FIXED; 201 202 const MachineInstr *Def = MRI->getUniqueVRegDef(Reg); 203 204 if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg) 205 return NSA_Status::FIXED; 206 207 for (auto U : MRI->use_nodbg_operands(Reg)) { 208 if (U.isImplicit()) 209 return NSA_Status::FIXED; 210 const MachineInstr *UseInst = U.getParent(); 211 if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg) 212 return NSA_Status::FIXED; 213 } 214 215 if (!LIS->hasInterval(Reg)) 216 return NSA_Status::FIXED; 217 } 218 219 if (I == 0) 220 VgprBase = PhysReg; 221 else if (VgprBase + I != PhysReg) 222 NSA = true; 223 } 224 225 return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS; 226 } 227 228 bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { 229 ST = &MF.getSubtarget<GCNSubtarget>(); 230 if (ST->getGeneration() < GCNSubtarget::GFX10) 231 return false; 232 233 MRI = &MF.getRegInfo(); 234 TRI = ST->getRegisterInfo(); 235 VRM = &getAnalysis<VirtRegMap>(); 236 LRM = &getAnalysis<LiveRegMatrix>(); 237 LIS = &getAnalysis<LiveIntervals>(); 238 239 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 240 MaxNumVGPRs = ST->getMaxNumVGPRs(MF); 241 MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs); 242 CSRegs = MRI->getCalleeSavedRegs(); 243 244 using Candidate = std::pair<const MachineInstr*, bool>; 245 SmallVector<Candidate, 32> Candidates; 246 for (const MachineBasicBlock &MBB : MF) { 247 for (const MachineInstr &MI : MBB) { 248 switch (CheckNSA(MI)) { 249 default: 250 continue; 251 case NSA_Status::CONTIGUOUS: 252 Candidates.push_back(std::make_pair(&MI, true)); 253 break; 254 case NSA_Status::NON_CONTIGUOUS: 255 Candidates.push_back(std::make_pair(&MI, false)); 256 ++NumNSAInstructions; 257 break; 258 } 259 } 260 } 261 262 bool Changed = false; 263 for (auto &C : Candidates) { 264 if (C.second) 265 continue; 266 267 const MachineInstr *MI = C.first; 268 if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) { 269 // Already happen to be fixed. 270 C.second = true; 271 ++NumNSAConverted; 272 continue; 273 } 274 275 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode()); 276 int VAddr0Idx = 277 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0); 278 279 SmallVector<LiveInterval *, 16> Intervals; 280 SmallVector<MCRegister, 16> OrigRegs; 281 SlotIndex MinInd, MaxInd; 282 for (unsigned I = 0; I < Info->VAddrDwords; ++I) { 283 const MachineOperand &Op = MI->getOperand(VAddr0Idx + I); 284 Register Reg = Op.getReg(); 285 LiveInterval *LI = &LIS->getInterval(Reg); 286 if (llvm::is_contained(Intervals, LI)) { 287 // Same register used, unable to make sequential 288 Intervals.clear(); 289 break; 290 } 291 Intervals.push_back(LI); 292 OrigRegs.push_back(VRM->getPhys(Reg)); 293 if (LI->empty()) { 294 // The address input is undef, so it doesn't contribute to the relevant 295 // range. Seed a reasonable index range if required. 296 if (I == 0) 297 MinInd = MaxInd = LIS->getInstructionIndex(*MI); 298 continue; 299 } 300 MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex(); 301 MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex(); 302 } 303 304 if (Intervals.empty()) 305 continue; 306 307 LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI 308 << "\tOriginal allocation:\t"; 309 for (auto *LI 310 : Intervals) dbgs() 311 << " " << llvm::printReg((VRM->getPhys(LI->reg())), TRI); 312 dbgs() << '\n'); 313 314 bool Success = scavengeRegs(Intervals); 315 if (!Success) { 316 LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n"); 317 if (VRM->hasPhys(Intervals.back()->reg())) // Did not change allocation. 318 continue; 319 } else { 320 // Check we did not make it worse for other instructions. 321 auto I = std::lower_bound(Candidates.begin(), &C, MinInd, 322 [this](const Candidate &C, SlotIndex I) { 323 return LIS->getInstructionIndex(*C.first) < I; 324 }); 325 for (auto E = Candidates.end(); Success && I != E && 326 LIS->getInstructionIndex(*I->first) < MaxInd; ++I) { 327 if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) { 328 Success = false; 329 LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first); 330 } 331 } 332 } 333 334 if (!Success) { 335 for (unsigned I = 0; I < Info->VAddrDwords; ++I) 336 if (VRM->hasPhys(Intervals[I]->reg())) 337 LRM->unassign(*Intervals[I]); 338 339 for (unsigned I = 0; I < Info->VAddrDwords; ++I) 340 LRM->assign(*Intervals[I], OrigRegs[I]); 341 342 continue; 343 } 344 345 C.second = true; 346 ++NumNSAConverted; 347 LLVM_DEBUG( 348 dbgs() << "\tNew allocation:\t\t [" 349 << llvm::printReg((VRM->getPhys(Intervals.front()->reg())), TRI) 350 << " : " 351 << llvm::printReg((VRM->getPhys(Intervals.back()->reg())), TRI) 352 << "]\n"); 353 Changed = true; 354 } 355 356 return Changed; 357 } 358