1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //===----------------------------------------------------------------------===// 10 // 11 12 #include "AMDGPU.h" 13 #include "AMDGPUSubtarget.h" 14 #include "SIInstrInfo.h" 15 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 16 #include "llvm/CodeGen/MachineDominators.h" 17 #include "llvm/CodeGen/MachineFunctionPass.h" 18 #include "llvm/CodeGen/MachineInstrBuilder.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/Target/TargetMachine.h" 25 26 #define DEBUG_TYPE "si-fold-operands" 27 using namespace llvm; 28 29 namespace { 30 31 class SIFoldOperands : public MachineFunctionPass { 32 public: 33 static char ID; 34 35 public: 36 SIFoldOperands() : MachineFunctionPass(ID) { 37 initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry()); 38 } 39 40 bool runOnMachineFunction(MachineFunction &MF) override; 41 42 const char *getPassName() const override { 43 return "SI Fold Operands"; 44 } 45 46 void getAnalysisUsage(AnalysisUsage &AU) const override { 47 AU.addRequired<MachineDominatorTree>(); 48 AU.setPreservesCFG(); 49 MachineFunctionPass::getAnalysisUsage(AU); 50 } 51 }; 52 53 struct FoldCandidate { 54 MachineInstr *UseMI; 55 unsigned UseOpNo; 56 MachineOperand *OpToFold; 57 uint64_t ImmToFold; 58 59 FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : 60 UseMI(MI), UseOpNo(OpNo) { 61 62 if (FoldOp->isImm()) { 63 OpToFold = nullptr; 64 ImmToFold = FoldOp->getImm(); 65 } else { 66 assert(FoldOp->isReg()); 67 OpToFold = FoldOp; 68 } 69 } 70 71 bool isImm() const { 72 return !OpToFold; 73 } 74 }; 75 76 } // End anonymous namespace. 77 78 INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE, 79 "SI Fold Operands", false, false) 80 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 81 INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE, 82 "SI Fold Operands", false, false) 83 84 char SIFoldOperands::ID = 0; 85 86 char &llvm::SIFoldOperandsID = SIFoldOperands::ID; 87 88 FunctionPass *llvm::createSIFoldOperandsPass() { 89 return new SIFoldOperands(); 90 } 91 92 static bool isSafeToFold(unsigned Opcode) { 93 switch(Opcode) { 94 case AMDGPU::V_MOV_B32_e32: 95 case AMDGPU::V_MOV_B32_e64: 96 case AMDGPU::V_MOV_B64_PSEUDO: 97 case AMDGPU::S_MOV_B32: 98 case AMDGPU::S_MOV_B64: 99 case AMDGPU::COPY: 100 return true; 101 default: 102 return false; 103 } 104 } 105 106 static bool updateOperand(FoldCandidate &Fold, 107 const TargetRegisterInfo &TRI) { 108 MachineInstr *MI = Fold.UseMI; 109 MachineOperand &Old = MI->getOperand(Fold.UseOpNo); 110 assert(Old.isReg()); 111 112 if (Fold.isImm()) { 113 Old.ChangeToImmediate(Fold.ImmToFold); 114 return true; 115 } 116 117 MachineOperand *New = Fold.OpToFold; 118 if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && 119 TargetRegisterInfo::isVirtualRegister(New->getReg())) { 120 Old.substVirtReg(New->getReg(), New->getSubReg(), TRI); 121 return true; 122 } 123 124 // FIXME: Handle physical registers. 125 126 return false; 127 } 128 129 static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList, 130 const MachineInstr *MI) { 131 for (auto Candidate : FoldList) { 132 if (Candidate.UseMI == MI) 133 return true; 134 } 135 return false; 136 } 137 138 static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList, 139 MachineInstr *MI, unsigned OpNo, 140 MachineOperand *OpToFold, 141 const SIInstrInfo *TII) { 142 if (!TII->isOperandLegal(MI, OpNo, OpToFold)) { 143 144 // Special case for v_mac_f32_e64 if we are trying to fold into src2 145 unsigned Opc = MI->getOpcode(); 146 if (Opc == AMDGPU::V_MAC_F32_e64 && 147 (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) { 148 // Check if changing this to a v_mad_f32 instruction will allow us to 149 // fold the operand. 150 MI->setDesc(TII->get(AMDGPU::V_MAD_F32)); 151 bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII); 152 if (FoldAsMAD) { 153 MI->untieRegOperand(OpNo); 154 return true; 155 } 156 MI->setDesc(TII->get(Opc)); 157 } 158 159 // If we are already folding into another operand of MI, then 160 // we can't commute the instruction, otherwise we risk making the 161 // other fold illegal. 162 if (isUseMIInFoldList(FoldList, MI)) 163 return false; 164 165 // Operand is not legal, so try to commute the instruction to 166 // see if this makes it possible to fold. 167 unsigned CommuteIdx0; 168 unsigned CommuteIdx1; 169 bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1); 170 171 if (CanCommute) { 172 if (CommuteIdx0 == OpNo) 173 OpNo = CommuteIdx1; 174 else if (CommuteIdx1 == OpNo) 175 OpNo = CommuteIdx0; 176 } 177 178 if (!CanCommute || !TII->commuteInstruction(MI)) 179 return false; 180 181 if (!TII->isOperandLegal(MI, OpNo, OpToFold)) 182 return false; 183 } 184 185 FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); 186 return true; 187 } 188 189 static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI, 190 unsigned UseOpIdx, 191 std::vector<FoldCandidate> &FoldList, 192 const SIInstrInfo *TII, const SIRegisterInfo &TRI, 193 MachineRegisterInfo &MRI) { 194 const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx); 195 196 // FIXME: Fold operands with subregs. 197 if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || 198 UseOp.isImplicit())) { 199 return; 200 } 201 202 bool FoldingImm = OpToFold.isImm(); 203 APInt Imm; 204 205 if (FoldingImm) { 206 unsigned UseReg = UseOp.getReg(); 207 const TargetRegisterClass *UseRC 208 = TargetRegisterInfo::isVirtualRegister(UseReg) ? 209 MRI.getRegClass(UseReg) : 210 TRI.getPhysRegClass(UseReg); 211 212 Imm = APInt(64, OpToFold.getImm()); 213 214 const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode()); 215 const TargetRegisterClass *FoldRC = 216 TRI.getRegClass(FoldDesc.OpInfo[0].RegClass); 217 218 // Split 64-bit constants into 32-bits for folding. 219 if (FoldRC->getSize() == 8 && UseOp.getSubReg()) { 220 if (UseRC->getSize() != 8) 221 return; 222 223 if (UseOp.getSubReg() == AMDGPU::sub0) { 224 Imm = Imm.getLoBits(32); 225 } else { 226 assert(UseOp.getSubReg() == AMDGPU::sub1); 227 Imm = Imm.getHiBits(32); 228 } 229 } 230 231 // In order to fold immediates into copies, we need to change the 232 // copy to a MOV. 233 if (UseMI->getOpcode() == AMDGPU::COPY) { 234 unsigned DestReg = UseMI->getOperand(0).getReg(); 235 const TargetRegisterClass *DestRC 236 = TargetRegisterInfo::isVirtualRegister(DestReg) ? 237 MRI.getRegClass(DestReg) : 238 TRI.getPhysRegClass(DestReg); 239 240 unsigned MovOp = TII->getMovOpcode(DestRC); 241 if (MovOp == AMDGPU::COPY) 242 return; 243 244 UseMI->setDesc(TII->get(MovOp)); 245 } 246 } 247 248 // Special case for REG_SEQUENCE: We can't fold literals into 249 // REG_SEQUENCE instructions, so we have to fold them into the 250 // uses of REG_SEQUENCE. 251 if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) { 252 unsigned RegSeqDstReg = UseMI->getOperand(0).getReg(); 253 unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm(); 254 255 for (MachineRegisterInfo::use_iterator 256 RSUse = MRI.use_begin(RegSeqDstReg), 257 RSE = MRI.use_end(); RSUse != RSE; ++RSUse) { 258 259 MachineInstr *RSUseMI = RSUse->getParent(); 260 if (RSUse->getSubReg() != RegSeqDstSubReg) 261 continue; 262 263 foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList, 264 TII, TRI, MRI); 265 } 266 return; 267 } 268 269 const MCInstrDesc &UseDesc = UseMI->getDesc(); 270 271 // Don't fold into target independent nodes. Target independent opcodes 272 // don't have defined register classes. 273 if (UseDesc.isVariadic() || 274 UseDesc.OpInfo[UseOpIdx].RegClass == -1) 275 return; 276 277 if (FoldingImm) { 278 MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); 279 tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII); 280 return; 281 } 282 283 tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII); 284 285 // FIXME: We could try to change the instruction from 64-bit to 32-bit 286 // to enable more folding opportunites. The shrink operands pass 287 // already does this. 288 return; 289 } 290 291 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { 292 MachineRegisterInfo &MRI = MF.getRegInfo(); 293 const SIInstrInfo *TII = 294 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); 295 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 296 297 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 298 BI != BE; ++BI) { 299 300 MachineBasicBlock &MBB = *BI; 301 MachineBasicBlock::iterator I, Next; 302 for (I = MBB.begin(); I != MBB.end(); I = Next) { 303 Next = std::next(I); 304 MachineInstr &MI = *I; 305 306 if (!isSafeToFold(MI.getOpcode())) 307 continue; 308 309 unsigned OpSize = TII->getOpSize(MI, 1); 310 MachineOperand &OpToFold = MI.getOperand(1); 311 bool FoldingImm = OpToFold.isImm(); 312 313 // FIXME: We could also be folding things like FrameIndexes and 314 // TargetIndexes. 315 if (!FoldingImm && !OpToFold.isReg()) 316 continue; 317 318 // Folding immediates with more than one use will increase program size. 319 // FIXME: This will also reduce register usage, which may be better 320 // in some cases. A better heuristic is needed. 321 if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && 322 !MRI.hasOneUse(MI.getOperand(0).getReg())) 323 continue; 324 325 // FIXME: Fold operands with subregs. 326 if (OpToFold.isReg() && 327 (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || 328 OpToFold.getSubReg())) 329 continue; 330 331 std::vector<FoldCandidate> FoldList; 332 for (MachineRegisterInfo::use_iterator 333 Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); 334 Use != E; ++Use) { 335 336 MachineInstr *UseMI = Use->getParent(); 337 338 foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList, 339 TII, TRI, MRI); 340 } 341 342 for (FoldCandidate &Fold : FoldList) { 343 if (updateOperand(Fold, TRI)) { 344 // Clear kill flags. 345 if (!Fold.isImm()) { 346 assert(Fold.OpToFold && Fold.OpToFold->isReg()); 347 Fold.OpToFold->setIsKill(false); 348 } 349 DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << 350 Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); 351 } 352 } 353 } 354 } 355 return false; 356 } 357