1 //===- R600MergeVectorRegisters.cpp ---------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass merges inputs of swizzeable instructions into vector sharing 11 /// common data and/or have enough undef subreg using swizzle abilities. 12 /// 13 /// For instance let's consider the following pseudo code : 14 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 15 /// ... 16 /// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3 17 /// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3 18 /// 19 /// is turned into : 20 /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 21 /// ... 22 /// %7 = INSERT_SUBREG %4, sub3 23 /// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3 24 /// 25 /// This allow regalloc to reduce register pressure for vector registers and 26 /// to reduce MOV count. 27 //===----------------------------------------------------------------------===// 28 29 #include "AMDGPU.h" 30 #include "AMDGPUSubtarget.h" 31 #include "R600Defines.h" 32 #include "llvm/CodeGen/MachineDominators.h" 33 #include "llvm/CodeGen/MachineLoopInfo.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "vec-merger" 38 39 static bool isImplicitlyDef(MachineRegisterInfo &MRI, Register Reg) { 40 if (Reg.isPhysical()) 41 return false; 42 const MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 43 return MI && MI->isImplicitDef(); 44 } 45 46 namespace { 47 48 class RegSeqInfo { 49 public: 50 MachineInstr *Instr; 51 DenseMap<Register, unsigned> RegToChan; 52 std::vector<Register> UndefReg; 53 54 RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { 55 assert(MI->getOpcode() == R600::REG_SEQUENCE); 56 for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { 57 MachineOperand &MO = Instr->getOperand(i); 58 unsigned Chan = Instr->getOperand(i + 1).getImm(); 59 if (isImplicitlyDef(MRI, MO.getReg())) 60 UndefReg.push_back(Chan); 61 else 62 RegToChan[MO.getReg()] = Chan; 63 } 64 } 65 66 RegSeqInfo() = default; 67 68 bool operator==(const RegSeqInfo &RSI) const { 69 return RSI.Instr == Instr; 70 } 71 }; 72 73 class R600VectorRegMerger : public MachineFunctionPass { 74 private: 75 using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>; 76 77 MachineRegisterInfo *MRI; 78 const R600InstrInfo *TII = nullptr; 79 DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; 80 InstructionSetMap PreviousRegSeqByReg; 81 InstructionSetMap PreviousRegSeqByUndefCount; 82 83 bool canSwizzle(const MachineInstr &MI) const; 84 bool areAllUsesSwizzeable(Register Reg) const; 85 void SwizzleInput(MachineInstr &, 86 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const; 87 bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge, 88 std::vector<std::pair<unsigned, unsigned>> &Remap) const; 89 bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 90 std::vector<std::pair<unsigned, unsigned>> &RemapChan); 91 bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 92 std::vector<std::pair<unsigned, unsigned>> &RemapChan); 93 MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec, 94 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const; 95 void RemoveMI(MachineInstr *); 96 void trackRSI(const RegSeqInfo &RSI); 97 98 public: 99 static char ID; 100 101 R600VectorRegMerger() : MachineFunctionPass(ID) {} 102 103 void getAnalysisUsage(AnalysisUsage &AU) const override { 104 AU.setPreservesCFG(); 105 AU.addRequired<MachineDominatorTree>(); 106 AU.addPreserved<MachineDominatorTree>(); 107 AU.addRequired<MachineLoopInfo>(); 108 AU.addPreserved<MachineLoopInfo>(); 109 MachineFunctionPass::getAnalysisUsage(AU); 110 } 111 112 MachineFunctionProperties getRequiredProperties() const override { 113 return MachineFunctionProperties() 114 .set(MachineFunctionProperties::Property::IsSSA); 115 } 116 117 StringRef getPassName() const override { 118 return "R600 Vector Registers Merge Pass"; 119 } 120 121 bool runOnMachineFunction(MachineFunction &Fn) override; 122 }; 123 124 } // end anonymous namespace 125 126 INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE, 127 "R600 Vector Reg Merger", false, false) 128 INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE, 129 "R600 Vector Reg Merger", false, false) 130 131 char R600VectorRegMerger::ID = 0; 132 133 char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID; 134 135 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) 136 const { 137 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 138 return true; 139 switch (MI.getOpcode()) { 140 case R600::R600_ExportSwz: 141 case R600::EG_ExportSwz: 142 return true; 143 default: 144 return false; 145 } 146 } 147 148 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, 149 RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap) 150 const { 151 unsigned CurrentUndexIdx = 0; 152 for (DenseMap<Register, unsigned>::iterator It = ToMerge->RegToChan.begin(), 153 E = ToMerge->RegToChan.end(); It != E; ++It) { 154 DenseMap<Register, unsigned>::const_iterator PosInUntouched = 155 Untouched->RegToChan.find((*It).first); 156 if (PosInUntouched != Untouched->RegToChan.end()) { 157 Remap.push_back(std::pair<unsigned, unsigned> 158 ((*It).second, (*PosInUntouched).second)); 159 continue; 160 } 161 if (CurrentUndexIdx >= Untouched->UndefReg.size()) 162 return false; 163 Remap.push_back(std::pair<unsigned, unsigned> 164 ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); 165 } 166 167 return true; 168 } 169 170 static 171 unsigned getReassignedChan( 172 const std::vector<std::pair<unsigned, unsigned>> &RemapChan, 173 unsigned Chan) { 174 for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { 175 if (RemapChan[j].first == Chan) 176 return RemapChan[j].second; 177 } 178 llvm_unreachable("Chan wasn't reassigned"); 179 } 180 181 MachineInstr *R600VectorRegMerger::RebuildVector( 182 RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, 183 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const { 184 Register Reg = RSI->Instr->getOperand(0).getReg(); 185 MachineBasicBlock::iterator Pos = RSI->Instr; 186 MachineBasicBlock &MBB = *Pos->getParent(); 187 DebugLoc DL = Pos->getDebugLoc(); 188 189 Register SrcVec = BaseRSI->Instr->getOperand(0).getReg(); 190 DenseMap<Register, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; 191 std::vector<Register> UpdatedUndef = BaseRSI->UndefReg; 192 for (DenseMap<Register, unsigned>::iterator It = RSI->RegToChan.begin(), 193 E = RSI->RegToChan.end(); It != E; ++It) { 194 Register DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass); 195 unsigned SubReg = (*It).first; 196 unsigned Swizzle = (*It).second; 197 unsigned Chan = getReassignedChan(RemapChan, Swizzle); 198 199 MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG), 200 DstReg) 201 .addReg(SrcVec) 202 .addReg(SubReg) 203 .addImm(Chan); 204 UpdatedRegToChan[SubReg] = Chan; 205 std::vector<Register>::iterator ChanPos = llvm::find(UpdatedUndef, Chan); 206 if (ChanPos != UpdatedUndef.end()) 207 UpdatedUndef.erase(ChanPos); 208 assert(!is_contained(UpdatedUndef, Chan) && 209 "UpdatedUndef shouldn't contain Chan more than once!"); 210 LLVM_DEBUG(dbgs() << " ->"; Tmp->dump();); 211 (void)Tmp; 212 SrcVec = DstReg; 213 } 214 MachineInstr *NewMI = 215 BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec); 216 LLVM_DEBUG(dbgs() << " ->"; NewMI->dump();); 217 218 LLVM_DEBUG(dbgs() << " Updating Swizzle:\n"); 219 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 220 E = MRI->use_instr_end(); It != E; ++It) { 221 LLVM_DEBUG(dbgs() << " "; (*It).dump(); dbgs() << " ->"); 222 SwizzleInput(*It, RemapChan); 223 LLVM_DEBUG((*It).dump()); 224 } 225 RSI->Instr->eraseFromParent(); 226 227 // Update RSI 228 RSI->Instr = NewMI; 229 RSI->RegToChan = UpdatedRegToChan; 230 RSI->UndefReg = UpdatedUndef; 231 232 return NewMI; 233 } 234 235 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { 236 for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), 237 E = PreviousRegSeqByReg.end(); It != E; ++It) { 238 std::vector<MachineInstr *> &MIs = (*It).second; 239 MIs.erase(llvm::find(MIs, MI), MIs.end()); 240 } 241 for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), 242 E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { 243 std::vector<MachineInstr *> &MIs = (*It).second; 244 MIs.erase(llvm::find(MIs, MI), MIs.end()); 245 } 246 } 247 248 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, 249 const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const { 250 unsigned Offset; 251 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 252 Offset = 2; 253 else 254 Offset = 3; 255 for (unsigned i = 0; i < 4; i++) { 256 unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; 257 for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { 258 if (RemapChan[j].first == Swizzle) { 259 MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); 260 break; 261 } 262 } 263 } 264 } 265 266 bool R600VectorRegMerger::areAllUsesSwizzeable(Register Reg) const { 267 for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(Reg), 268 E = MRI->use_instr_end(); It != E; ++It) { 269 if (!canSwizzle(*It)) 270 return false; 271 } 272 return true; 273 } 274 275 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, 276 RegSeqInfo &CompatibleRSI, 277 std::vector<std::pair<unsigned, unsigned>> &RemapChan) { 278 for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), 279 MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { 280 if (!MOp->isReg()) 281 continue; 282 if (PreviousRegSeqByReg[MOp->getReg()].empty()) 283 continue; 284 for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) { 285 CompatibleRSI = PreviousRegSeq[MI]; 286 if (RSI == CompatibleRSI) 287 continue; 288 if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) 289 return true; 290 } 291 } 292 return false; 293 } 294 295 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, 296 RegSeqInfo &CompatibleRSI, 297 std::vector<std::pair<unsigned, unsigned>> &RemapChan) { 298 unsigned NeededUndefs = 4 - RSI.UndefReg.size(); 299 if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) 300 return false; 301 std::vector<MachineInstr *> &MIs = 302 PreviousRegSeqByUndefCount[NeededUndefs]; 303 CompatibleRSI = PreviousRegSeq[MIs.back()]; 304 tryMergeVector(&CompatibleRSI, &RSI, RemapChan); 305 return true; 306 } 307 308 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { 309 for (DenseMap<Register, unsigned>::const_iterator 310 It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { 311 PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); 312 } 313 PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); 314 PreviousRegSeq[RSI.Instr] = RSI; 315 } 316 317 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { 318 if (skipFunction(Fn.getFunction())) 319 return false; 320 321 const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); 322 TII = ST.getInstrInfo(); 323 MRI = &Fn.getRegInfo(); 324 325 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 326 MBB != MBBe; ++MBB) { 327 MachineBasicBlock *MB = &*MBB; 328 PreviousRegSeq.clear(); 329 PreviousRegSeqByReg.clear(); 330 PreviousRegSeqByUndefCount.clear(); 331 332 for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); 333 MII != MIIE; ++MII) { 334 MachineInstr &MI = *MII; 335 if (MI.getOpcode() != R600::REG_SEQUENCE) { 336 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { 337 Register Reg = MI.getOperand(1).getReg(); 338 for (MachineRegisterInfo::def_instr_iterator 339 It = MRI->def_instr_begin(Reg), E = MRI->def_instr_end(); 340 It != E; ++It) { 341 RemoveMI(&(*It)); 342 } 343 } 344 continue; 345 } 346 347 RegSeqInfo RSI(*MRI, &MI); 348 349 // All uses of MI are swizzeable ? 350 Register Reg = MI.getOperand(0).getReg(); 351 if (!areAllUsesSwizzeable(Reg)) 352 continue; 353 354 LLVM_DEBUG({ 355 dbgs() << "Trying to optimize "; 356 MI.dump(); 357 }); 358 359 RegSeqInfo CandidateRSI; 360 std::vector<std::pair<unsigned, unsigned>> RemapChan; 361 LLVM_DEBUG(dbgs() << "Using common slots...\n";); 362 if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { 363 // Remove CandidateRSI mapping 364 RemoveMI(CandidateRSI.Instr); 365 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 366 trackRSI(RSI); 367 continue; 368 } 369 LLVM_DEBUG(dbgs() << "Using free slots...\n";); 370 RemapChan.clear(); 371 if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { 372 RemoveMI(CandidateRSI.Instr); 373 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 374 trackRSI(RSI); 375 continue; 376 } 377 //Failed to merge 378 trackRSI(RSI); 379 } 380 } 381 return false; 382 } 383 384 llvm::FunctionPass *llvm::createR600VectorRegMerger() { 385 return new R600VectorRegMerger(); 386 } 387