1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //===----------------------------------------------------------------------===// 10 // 11 12 #include "AMDGPU.h" 13 #include "AMDGPUSubtarget.h" 14 #include "SIInstrInfo.h" 15 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 16 #include "llvm/CodeGen/MachineDominators.h" 17 #include "llvm/CodeGen/MachineFunctionPass.h" 18 #include "llvm/CodeGen/MachineInstrBuilder.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/Target/TargetMachine.h" 25 26 #define DEBUG_TYPE "si-fold-operands" 27 using namespace llvm; 28 29 namespace { 30 31 class SIFoldOperands : public MachineFunctionPass { 32 public: 33 static char ID; 34 35 public: 36 SIFoldOperands() : MachineFunctionPass(ID) { 37 initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry()); 38 } 39 40 bool runOnMachineFunction(MachineFunction &MF) override; 41 42 const char *getPassName() const override { 43 return "SI Fold Operands"; 44 } 45 46 void getAnalysisUsage(AnalysisUsage &AU) const override { 47 AU.addRequired<MachineDominatorTree>(); 48 AU.addPreserved<MachineDominatorTree>(); 49 AU.setPreservesCFG(); 50 MachineFunctionPass::getAnalysisUsage(AU); 51 } 52 }; 53 54 struct FoldCandidate { 55 MachineInstr *UseMI; 56 unsigned UseOpNo; 57 MachineOperand *OpToFold; 58 uint64_t ImmToFold; 59 60 FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : 61 UseMI(MI), UseOpNo(OpNo) { 62 63 if (FoldOp->isImm()) { 64 OpToFold = nullptr; 65 ImmToFold = FoldOp->getImm(); 66 } else { 67 assert(FoldOp->isReg()); 68 OpToFold = FoldOp; 69 } 70 } 71 72 bool isImm() const { 73 return !OpToFold; 74 } 75 }; 76 77 } // End anonymous namespace. 78 79 INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE, 80 "SI Fold Operands", false, false) 81 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 82 INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE, 83 "SI Fold Operands", false, false) 84 85 char SIFoldOperands::ID = 0; 86 87 char &llvm::SIFoldOperandsID = SIFoldOperands::ID; 88 89 FunctionPass *llvm::createSIFoldOperandsPass() { 90 return new SIFoldOperands(); 91 } 92 93 static bool isSafeToFold(unsigned Opcode) { 94 switch(Opcode) { 95 case AMDGPU::V_MOV_B32_e32: 96 case AMDGPU::V_MOV_B32_e64: 97 case AMDGPU::V_MOV_B64_PSEUDO: 98 case AMDGPU::S_MOV_B32: 99 case AMDGPU::S_MOV_B64: 100 case AMDGPU::COPY: 101 return true; 102 default: 103 return false; 104 } 105 } 106 107 static bool updateOperand(FoldCandidate &Fold, 108 const TargetRegisterInfo &TRI) { 109 MachineInstr *MI = Fold.UseMI; 110 MachineOperand &Old = MI->getOperand(Fold.UseOpNo); 111 assert(Old.isReg()); 112 113 if (Fold.isImm()) { 114 Old.ChangeToImmediate(Fold.ImmToFold); 115 return true; 116 } 117 118 MachineOperand *New = Fold.OpToFold; 119 if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && 120 TargetRegisterInfo::isVirtualRegister(New->getReg())) { 121 Old.substVirtReg(New->getReg(), New->getSubReg(), TRI); 122 return true; 123 } 124 125 // FIXME: Handle physical registers. 126 127 return false; 128 } 129 130 static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList, 131 const MachineInstr *MI) { 132 for (auto Candidate : FoldList) { 133 if (Candidate.UseMI == MI) 134 return true; 135 } 136 return false; 137 } 138 139 static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList, 140 MachineInstr *MI, unsigned OpNo, 141 MachineOperand *OpToFold, 142 const SIInstrInfo *TII) { 143 if (!TII->isOperandLegal(MI, OpNo, OpToFold)) { 144 145 // Special case for v_mac_f32_e64 if we are trying to fold into src2 146 unsigned Opc = MI->getOpcode(); 147 if (Opc == AMDGPU::V_MAC_F32_e64 && 148 (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) { 149 // Check if changing this to a v_mad_f32 instruction will allow us to 150 // fold the operand. 151 MI->setDesc(TII->get(AMDGPU::V_MAD_F32)); 152 bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII); 153 if (FoldAsMAD) { 154 MI->untieRegOperand(OpNo); 155 return true; 156 } 157 MI->setDesc(TII->get(Opc)); 158 } 159 160 // If we are already folding into another operand of MI, then 161 // we can't commute the instruction, otherwise we risk making the 162 // other fold illegal. 163 if (isUseMIInFoldList(FoldList, MI)) 164 return false; 165 166 // Operand is not legal, so try to commute the instruction to 167 // see if this makes it possible to fold. 168 unsigned CommuteIdx0; 169 unsigned CommuteIdx1; 170 bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1); 171 172 if (CanCommute) { 173 if (CommuteIdx0 == OpNo) 174 OpNo = CommuteIdx1; 175 else if (CommuteIdx1 == OpNo) 176 OpNo = CommuteIdx0; 177 } 178 179 if (!CanCommute || !TII->commuteInstruction(MI)) 180 return false; 181 182 if (!TII->isOperandLegal(MI, OpNo, OpToFold)) 183 return false; 184 } 185 186 FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); 187 return true; 188 } 189 190 static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, 191 unsigned UseOpIdx, 192 std::vector<FoldCandidate> &FoldList, 193 SmallVectorImpl<MachineInstr *> &CopiesToReplace, 194 const SIInstrInfo *TII, const SIRegisterInfo &TRI, 195 MachineRegisterInfo &MRI) { 196 const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); 197 198 // FIXME: Fold operands with subregs. 199 if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || 200 UseOp.isImplicit())) { 201 return; 202 } 203 204 bool FoldingImm = OpToFold.isImm(); 205 APInt Imm; 206 207 if (FoldingImm) { 208 unsigned UseReg = UseOp.getReg(); 209 const TargetRegisterClass *UseRC 210 = TargetRegisterInfo::isVirtualRegister(UseReg) ? 211 MRI.getRegClass(UseReg) : 212 TRI.getPhysRegClass(UseReg); 213 214 Imm = APInt(64, OpToFold.getImm()); 215 216 const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode()); 217 const TargetRegisterClass *FoldRC = 218 TRI.getRegClass(FoldDesc.OpInfo[0].RegClass); 219 220 // Split 64-bit constants into 32-bits for folding. 221 if (FoldRC->getSize() == 8 && UseOp.getSubReg()) { 222 if (UseRC->getSize() != 8) 223 return; 224 225 if (UseOp.getSubReg() == AMDGPU::sub0) { 226 Imm = Imm.getLoBits(32); 227 } else { 228 assert(UseOp.getSubReg() == AMDGPU::sub1); 229 Imm = Imm.getHiBits(32); 230 } 231 } 232 233 // In order to fold immediates into copies, we need to change the 234 // copy to a MOV. 235 if (UseMI->getOpcode() == AMDGPU::COPY) { 236 unsigned DestReg = UseMI->getOperand(0).getReg(); 237 const TargetRegisterClass *DestRC 238 = TargetRegisterInfo::isVirtualRegister(DestReg) ? 239 MRI.getRegClass(DestReg) : 240 TRI.getPhysRegClass(DestReg); 241 242 unsigned MovOp = TII->getMovOpcode(DestRC); 243 if (MovOp == AMDGPU::COPY) 244 return; 245 246 UseMI->setDesc(TII->get(MovOp)); 247 CopiesToReplace.push_back(UseMI); 248 } 249 } 250 251 // Special case for REG_SEQUENCE: We can't fold literals into 252 // REG_SEQUENCE instructions, so we have to fold them into the 253 // uses of REG_SEQUENCE. 254 if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) { 255 unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); 256 unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); 257 258 for (MachineRegisterInfo::use_iterator 259 RSUse = MRI.use_begin(RegSeqDstReg), 260 RSE = MRI.use_end(); RSUse != RSE; ++RSUse) { 261 262 MachineInstr *RSUseMI = RSUse->getParent(); 263 if (RSUse->getSubReg() != RegSeqDstSubReg) 264 continue; 265 266 foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, 267 CopiesToReplace, TII, TRI, MRI); 268 } 269 return; 270 } 271 272 const MCInstrDesc &UseDesc = UseMI->getDesc(); 273 274 // Don't fold into target independent nodes. Target independent opcodes 275 // don't have defined register classes. 276 if (UseDesc.isVariadic() || 277 UseDesc.OpInfo[UseOpIdx].RegClass == -1) 278 return; 279 280 if (FoldingImm) { 281 MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); 282 tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); 283 return; 284 } 285 286 tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); 287 288 // FIXME: We could try to change the instruction from 64-bit to 32-bit 289 // to enable more folding opportunites. The shrink operands pass 290 // already does this. 291 return; 292 } 293 294 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { 295 MachineRegisterInfo &MRI = MF.getRegInfo(); 296 const SIInstrInfo *TII = 297 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); 298 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 299 300 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 301 BI != BE; ++BI) { 302 303 MachineBasicBlock &MBB = *BI; 304 MachineBasicBlock::iterator I, Next; 305 for (I = MBB.begin(); I != MBB.end(); I = Next) { 306 Next = std::next(I); 307 MachineInstr &MI = *I; 308 309 if (!isSafeToFold(MI.getOpcode())) 310 continue; 311 312 unsigned OpSize = TII->getOpSize(MI, 1); 313 MachineOperand &OpToFold = MI.getOperand(1); 314 bool FoldingImm = OpToFold.isImm(); 315 316 // FIXME: We could also be folding things like FrameIndexes and 317 // TargetIndexes. 318 if (!FoldingImm && !OpToFold.isReg()) 319 continue; 320 321 // Folding immediates with more than one use will increase program size. 322 // FIXME: This will also reduce register usage, which may be better 323 // in some cases. A better heuristic is needed. 324 if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && 325 !MRI.hasOneUse(MI.getOperand(0).getReg())) 326 continue; 327 328 // FIXME: Fold operands with subregs. 329 if (OpToFold.isReg() && 330 (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || 331 OpToFold.getSubReg())) 332 continue; 333 334 335 // We need mutate the operands of new mov instructions to add implicit 336 // uses of EXEC, but adding them invalidates the use_iterator, so defer 337 // this. 338 SmallVector<MachineInstr *, 4> CopiesToReplace; 339 340 std::vector<FoldCandidate> FoldList; 341 for (MachineRegisterInfo::use_iterator 342 Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); 343 Use != E; ++Use) { 344 345 MachineInstr *UseMI = Use->getParent(); 346 347 foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList, 348 CopiesToReplace, TII, TRI, MRI); 349 } 350 351 // Make sure we add EXEC uses to any new v_mov instructions created. 352 for (MachineInstr *Copy : CopiesToReplace) 353 Copy->addImplicitDefUseOperands(MF); 354 355 for (FoldCandidate &Fold : FoldList) { 356 if (updateOperand(Fold, TRI)) { 357 // Clear kill flags. 358 if (!Fold.isImm()) { 359 assert(Fold.OpToFold && Fold.OpToFold->isReg()); 360 Fold.OpToFold->setIsKill(false); 361 } 362 DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << 363 Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); 364 } 365 } 366 } 367 } 368 return false; 369 } 370