1 //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass implements instructions packetization for R600. It unsets isLast 11 /// bit of instructions inside a bundle and substitutes src register with 12 /// PreviousVector when applicable. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "llvm/CodeGen/DFAPacketizer.h" 19 #include "llvm/CodeGen/MachineDominators.h" 20 #include "llvm/CodeGen/MachineLoopInfo.h" 21 #include "llvm/CodeGen/ScheduleDAG.h" 22 23 using namespace llvm; 24 25 #define DEBUG_TYPE "packets" 26 27 namespace { 28 29 class R600Packetizer : public MachineFunctionPass { 30 31 public: 32 static char ID; 33 R600Packetizer() : MachineFunctionPass(ID) {} 34 35 void getAnalysisUsage(AnalysisUsage &AU) const override { 36 AU.setPreservesCFG(); 37 AU.addRequired<MachineDominatorTree>(); 38 AU.addPreserved<MachineDominatorTree>(); 39 AU.addRequired<MachineLoopInfo>(); 40 AU.addPreserved<MachineLoopInfo>(); 41 MachineFunctionPass::getAnalysisUsage(AU); 42 } 43 44 StringRef getPassName() const override { return "R600 Packetizer"; } 45 46 bool runOnMachineFunction(MachineFunction &Fn) override; 47 }; 48 49 class R600PacketizerList : public VLIWPacketizerList { 50 private: 51 const R600InstrInfo *TII; 52 const R600RegisterInfo &TRI; 53 bool VLIW5; 54 bool ConsideredInstUsesAlreadyWrittenVectorElement; 55 56 unsigned getSlot(const MachineInstr &MI) const { 57 return TRI.getHWRegChan(MI.getOperand(0).getReg()); 58 } 59 60 /// \returns register to PV chan mapping for bundle/single instructions that 61 /// immediately precedes I. 62 DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) 63 const { 64 DenseMap<unsigned, unsigned> Result; 65 I--; 66 if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()) 67 return Result; 68 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 69 if (I->isBundle()) 70 BI++; 71 int LastDstChan = -1; 72 do { 73 bool isTrans = false; 74 int BISlot = getSlot(*BI); 75 if (LastDstChan >= BISlot) 76 isTrans = true; 77 LastDstChan = BISlot; 78 if (TII->isPredicated(*BI)) 79 continue; 80 int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write); 81 if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0) 82 continue; 83 int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst); 84 if (DstIdx == -1) { 85 continue; 86 } 87 Register Dst = BI->getOperand(DstIdx).getReg(); 88 if (isTrans || TII->isTransOnly(*BI)) { 89 Result[Dst] = R600::PS; 90 continue; 91 } 92 if (BI->getOpcode() == R600::DOT4_r600 || 93 BI->getOpcode() == R600::DOT4_eg) { 94 Result[Dst] = R600::PV_X; 95 continue; 96 } 97 if (Dst == R600::OQAP) { 98 continue; 99 } 100 unsigned PVReg = 0; 101 switch (TRI.getHWRegChan(Dst)) { 102 case 0: 103 PVReg = R600::PV_X; 104 break; 105 case 1: 106 PVReg = R600::PV_Y; 107 break; 108 case 2: 109 PVReg = R600::PV_Z; 110 break; 111 case 3: 112 PVReg = R600::PV_W; 113 break; 114 default: 115 llvm_unreachable("Invalid Chan"); 116 } 117 Result[Dst] = PVReg; 118 } while ((++BI)->isBundledWithPred()); 119 return Result; 120 } 121 122 void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs) 123 const { 124 unsigned Ops[] = { 125 R600::OpName::src0, 126 R600::OpName::src1, 127 R600::OpName::src2 128 }; 129 for (unsigned i = 0; i < 3; i++) { 130 int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]); 131 if (OperandIdx < 0) 132 continue; 133 Register Src = MI.getOperand(OperandIdx).getReg(); 134 const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); 135 if (It != PVs.end()) 136 MI.getOperand(OperandIdx).setReg(It->second); 137 } 138 } 139 public: 140 // Ctor. 141 R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST, 142 MachineLoopInfo &MLI) 143 : VLIWPacketizerList(MF, MLI, nullptr), 144 TII(ST.getInstrInfo()), 145 TRI(TII->getRegisterInfo()) { 146 VLIW5 = !ST.hasCaymanISA(); 147 } 148 149 // initPacketizerState - initialize some internal flags. 150 void initPacketizerState() override { 151 ConsideredInstUsesAlreadyWrittenVectorElement = false; 152 } 153 154 // ignorePseudoInstruction - Ignore bundling of pseudo instructions. 155 bool ignorePseudoInstruction(const MachineInstr &MI, 156 const MachineBasicBlock *MBB) override { 157 return false; 158 } 159 160 // isSoloInstruction - return true if instruction MI can not be packetized 161 // with any other instruction, which means that MI itself is a packet. 162 bool isSoloInstruction(const MachineInstr &MI) override { 163 if (TII->isVector(MI)) 164 return true; 165 if (!TII->isALUInstr(MI.getOpcode())) 166 return true; 167 if (MI.getOpcode() == R600::GROUP_BARRIER) 168 return true; 169 // XXX: This can be removed once the packetizer properly handles all the 170 // LDS instruction group restrictions. 171 return TII->isLDSInstr(MI.getOpcode()); 172 } 173 174 // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ 175 // together. 176 bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override { 177 MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); 178 if (getSlot(*MII) == getSlot(*MIJ)) 179 ConsideredInstUsesAlreadyWrittenVectorElement = true; 180 // Does MII and MIJ share the same pred_sel ? 181 int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel), 182 OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel); 183 Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register(), 184 PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register(); 185 if (PredI != PredJ) 186 return false; 187 if (SUJ->isSucc(SUI)) { 188 for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) { 189 const SDep &Dep = SUJ->Succs[i]; 190 if (Dep.getSUnit() != SUI) 191 continue; 192 if (Dep.getKind() == SDep::Anti) 193 continue; 194 if (Dep.getKind() == SDep::Output) 195 if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) 196 continue; 197 return false; 198 } 199 } 200 201 bool ARDef = 202 TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ); 203 bool ARUse = 204 TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ); 205 206 return !ARDef || !ARUse; 207 } 208 209 // isLegalToPruneDependencies - Is it legal to prune dependece between SUI 210 // and SUJ. 211 bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override { 212 return false; 213 } 214 215 void setIsLastBit(MachineInstr *MI, unsigned Bit) const { 216 unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last); 217 MI->getOperand(LastOp).setImm(Bit); 218 } 219 220 bool isBundlableWithCurrentPMI(MachineInstr &MI, 221 const DenseMap<unsigned, unsigned> &PV, 222 std::vector<R600InstrInfo::BankSwizzle> &BS, 223 bool &isTransSlot) { 224 isTransSlot = TII->isTransOnly(MI); 225 assert (!isTransSlot || VLIW5); 226 227 // Is the dst reg sequence legal ? 228 if (!isTransSlot && !CurrentPacketMIs.empty()) { 229 if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) { 230 if (ConsideredInstUsesAlreadyWrittenVectorElement && 231 !TII->isVectorOnly(MI) && VLIW5) { 232 isTransSlot = true; 233 LLVM_DEBUG({ 234 dbgs() << "Considering as Trans Inst :"; 235 MI.dump(); 236 }); 237 } 238 else 239 return false; 240 } 241 } 242 243 // Are the Constants limitations met ? 244 CurrentPacketMIs.push_back(&MI); 245 if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { 246 LLVM_DEBUG({ 247 dbgs() << "Couldn't pack :\n"; 248 MI.dump(); 249 dbgs() << "with the following packets :\n"; 250 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 251 CurrentPacketMIs[i]->dump(); 252 dbgs() << "\n"; 253 } 254 dbgs() << "because of Consts read limitations\n"; 255 }); 256 CurrentPacketMIs.pop_back(); 257 return false; 258 } 259 260 // Is there a BankSwizzle set that meet Read Port limitations ? 261 if (!TII->fitsReadPortLimitations(CurrentPacketMIs, 262 PV, BS, isTransSlot)) { 263 LLVM_DEBUG({ 264 dbgs() << "Couldn't pack :\n"; 265 MI.dump(); 266 dbgs() << "with the following packets :\n"; 267 for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { 268 CurrentPacketMIs[i]->dump(); 269 dbgs() << "\n"; 270 } 271 dbgs() << "because of Read port limitations\n"; 272 }); 273 CurrentPacketMIs.pop_back(); 274 return false; 275 } 276 277 // We cannot read LDS source registers from the Trans slot. 278 if (isTransSlot && TII->readsLDSSrcReg(MI)) 279 return false; 280 281 CurrentPacketMIs.pop_back(); 282 return true; 283 } 284 285 MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override { 286 MachineBasicBlock::iterator FirstInBundle = 287 CurrentPacketMIs.empty() ? &MI : CurrentPacketMIs.front(); 288 const DenseMap<unsigned, unsigned> &PV = 289 getPreviousVector(FirstInBundle); 290 std::vector<R600InstrInfo::BankSwizzle> BS; 291 bool isTransSlot; 292 293 if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { 294 for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) { 295 MachineInstr *MI = CurrentPacketMIs[i]; 296 unsigned Op = TII->getOperandIdx(MI->getOpcode(), 297 R600::OpName::bank_swizzle); 298 MI->getOperand(Op).setImm(BS[i]); 299 } 300 unsigned Op = 301 TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle); 302 MI.getOperand(Op).setImm(BS.back()); 303 if (!CurrentPacketMIs.empty()) 304 setIsLastBit(CurrentPacketMIs.back(), 0); 305 substitutePV(MI, PV); 306 MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); 307 if (isTransSlot) { 308 endPacket(std::next(It)->getParent(), std::next(It)); 309 } 310 return It; 311 } 312 endPacket(MI.getParent(), MI); 313 if (TII->isTransOnly(MI)) 314 return MI; 315 return VLIWPacketizerList::addToPacket(MI); 316 } 317 }; 318 319 bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { 320 const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); 321 const R600InstrInfo *TII = ST.getInstrInfo(); 322 323 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); 324 325 // Instantiate the packetizer. 326 R600PacketizerList Packetizer(Fn, ST, MLI); 327 328 // DFA state table should not be empty. 329 assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 330 assert(Packetizer.getResourceTracker()->getInstrItins()); 331 332 if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty()) 333 return false; 334 335 // 336 // Loop over all basic blocks and remove KILL pseudo-instructions 337 // These instructions confuse the dependence analysis. Consider: 338 // D0 = ... (Insn 0) 339 // R0 = KILL R0, D0 (Insn 1) 340 // R0 = ... (Insn 2) 341 // Here, Insn 1 will result in the dependence graph not emitting an output 342 // dependence between Insn 0 and Insn 2. This can lead to incorrect 343 // packetization 344 // 345 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 346 MBB != MBBe; ++MBB) { 347 MachineBasicBlock::iterator End = MBB->end(); 348 MachineBasicBlock::iterator MI = MBB->begin(); 349 while (MI != End) { 350 if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF || 351 (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) { 352 MachineBasicBlock::iterator DeleteMI = MI; 353 ++MI; 354 MBB->erase(DeleteMI); 355 End = MBB->end(); 356 continue; 357 } 358 ++MI; 359 } 360 } 361 362 // Loop over all of the basic blocks. 363 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 364 MBB != MBBe; ++MBB) { 365 // Find scheduling regions and schedule / packetize each region. 366 unsigned RemainingCount = MBB->size(); 367 for(MachineBasicBlock::iterator RegionEnd = MBB->end(); 368 RegionEnd != MBB->begin();) { 369 // The next region starts above the previous region. Look backward in the 370 // instruction stream until we find the nearest boundary. 371 MachineBasicBlock::iterator I = RegionEnd; 372 for(;I != MBB->begin(); --I, --RemainingCount) { 373 if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn)) 374 break; 375 } 376 I = MBB->begin(); 377 378 // Skip empty scheduling regions. 379 if (I == RegionEnd) { 380 RegionEnd = std::prev(RegionEnd); 381 --RemainingCount; 382 continue; 383 } 384 // Skip regions with one instruction. 385 if (I == std::prev(RegionEnd)) { 386 RegionEnd = std::prev(RegionEnd); 387 continue; 388 } 389 390 Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd); 391 RegionEnd = I; 392 } 393 } 394 395 return true; 396 397 } 398 399 } // end anonymous namespace 400 401 INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE, 402 "R600 Packetizer", false, false) 403 INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE, 404 "R600 Packetizer", false, false) 405 406 char R600Packetizer::ID = 0; 407 408 char &llvm::R600PacketizerID = R600Packetizer::ID; 409 410 llvm::FunctionPass *llvm::createR600Packetizer() { 411 return new R600Packetizer(); 412 } 413