1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //===----------------------------------------------------------------------===// 10 // 11 12 #include "AMDGPU.h" 13 #include "AMDGPUSubtarget.h" 14 #include "SIInstrInfo.h" 15 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 16 #include "llvm/CodeGen/MachineDominators.h" 17 #include "llvm/CodeGen/MachineFunctionPass.h" 18 #include "llvm/CodeGen/MachineInstrBuilder.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include "llvm/Target/TargetMachine.h" 25 26 #define DEBUG_TYPE "si-fold-operands" 27 using namespace llvm; 28 29 namespace { 30 31 class SIFoldOperands : public MachineFunctionPass { 32 public: 33 static char ID; 34 35 public: 36 SIFoldOperands() : MachineFunctionPass(ID) { 37 initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry()); 38 } 39 40 bool runOnMachineFunction(MachineFunction &MF) override; 41 42 const char *getPassName() const override { 43 return "SI Fold Operands"; 44 } 45 46 void getAnalysisUsage(AnalysisUsage &AU) const override { 47 AU.addRequired<MachineDominatorTree>(); 48 AU.setPreservesCFG(); 49 MachineFunctionPass::getAnalysisUsage(AU); 50 } 51 }; 52 53 struct FoldCandidate { 54 MachineInstr *UseMI; 55 unsigned UseOpNo; 56 MachineOperand *OpToFold; 57 uint64_t ImmToFold; 58 59 FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) : 60 UseMI(MI), UseOpNo(OpNo) { 61 62 if (FoldOp->isImm()) { 63 OpToFold = nullptr; 64 ImmToFold = FoldOp->getImm(); 65 } else { 66 assert(FoldOp->isReg()); 67 OpToFold = FoldOp; 68 } 69 } 70 71 bool isImm() const { 72 return !OpToFold; 73 } 74 }; 75 76 } // End anonymous namespace. 77 78 INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE, 79 "SI Fold Operands", false, false) 80 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 81 INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE, 82 "SI Fold Operands", false, false) 83 84 char SIFoldOperands::ID = 0; 85 86 char &llvm::SIFoldOperandsID = SIFoldOperands::ID; 87 88 FunctionPass *llvm::createSIFoldOperandsPass() { 89 return new SIFoldOperands(); 90 } 91 92 static bool isSafeToFold(unsigned Opcode) { 93 switch(Opcode) { 94 case AMDGPU::V_MOV_B32_e32: 95 case AMDGPU::V_MOV_B32_e64: 96 case AMDGPU::V_MOV_B64_PSEUDO: 97 case AMDGPU::S_MOV_B32: 98 case AMDGPU::S_MOV_B64: 99 case AMDGPU::COPY: 100 return true; 101 default: 102 return false; 103 } 104 } 105 106 static bool updateOperand(FoldCandidate &Fold, 107 const TargetRegisterInfo &TRI) { 108 MachineInstr *MI = Fold.UseMI; 109 MachineOperand &Old = MI->getOperand(Fold.UseOpNo); 110 assert(Old.isReg()); 111 112 if (Fold.isImm()) { 113 Old.ChangeToImmediate(Fold.ImmToFold); 114 return true; 115 } 116 117 MachineOperand *New = Fold.OpToFold; 118 if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && 119 TargetRegisterInfo::isVirtualRegister(New->getReg())) { 120 Old.substVirtReg(New->getReg(), New->getSubReg(), TRI); 121 return true; 122 } 123 124 // FIXME: Handle physical registers. 125 126 return false; 127 } 128 129 static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList, 130 const MachineInstr *MI) { 131 for (auto Candidate : FoldList) { 132 if (Candidate.UseMI == MI) 133 return true; 134 } 135 return false; 136 } 137 138 static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList, 139 MachineInstr *MI, unsigned OpNo, 140 MachineOperand *OpToFold, 141 const SIInstrInfo *TII) { 142 if (!TII->isOperandLegal(MI, OpNo, OpToFold)) { 143 144 // Special case for v_mac_f32_e64 if we are trying to fold into src2 145 unsigned Opc = MI->getOpcode(); 146 if (Opc == AMDGPU::V_MAC_F32_e64 && 147 (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) { 148 // Check if changing this to a v_mad_f32 instruction will allow us to 149 // fold the operand. 150 MI->setDesc(TII->get(AMDGPU::V_MAD_F32)); 151 bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII); 152 if (FoldAsMAD) { 153 MI->untieRegOperand(OpNo); 154 return true; 155 } 156 MI->setDesc(TII->get(Opc)); 157 } 158 159 // If we are already folding into another operand of MI, then 160 // we can't commute the instruction, otherwise we risk making the 161 // other fold illegal. 162 if (isUseMIInFoldList(FoldList, MI)) 163 return false; 164 165 // Operand is not legal, so try to commute the instruction to 166 // see if this makes it possible to fold. 167 unsigned CommuteIdx0; 168 unsigned CommuteIdx1; 169 bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1); 170 171 if (CanCommute) { 172 if (CommuteIdx0 == OpNo) 173 OpNo = CommuteIdx1; 174 else if (CommuteIdx1 == OpNo) 175 OpNo = CommuteIdx0; 176 } 177 178 if (!CanCommute || !TII->commuteInstruction(MI)) 179 return false; 180 181 if (!TII->isOperandLegal(MI, OpNo, OpToFold)) 182 return false; 183 } 184 185 FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold)); 186 return true; 187 } 188 189 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { 190 MachineRegisterInfo &MRI = MF.getRegInfo(); 191 const SIInstrInfo *TII = 192 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); 193 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 194 195 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 196 BI != BE; ++BI) { 197 198 MachineBasicBlock &MBB = *BI; 199 MachineBasicBlock::iterator I, Next; 200 for (I = MBB.begin(); I != MBB.end(); I = Next) { 201 Next = std::next(I); 202 MachineInstr &MI = *I; 203 204 if (!isSafeToFold(MI.getOpcode())) 205 continue; 206 207 unsigned OpSize = TII->getOpSize(MI, 1); 208 MachineOperand &OpToFold = MI.getOperand(1); 209 bool FoldingImm = OpToFold.isImm(); 210 211 // FIXME: We could also be folding things like FrameIndexes and 212 // TargetIndexes. 213 if (!FoldingImm && !OpToFold.isReg()) 214 continue; 215 216 // Folding immediates with more than one use will increase program size. 217 // FIXME: This will also reduce register usage, which may be better 218 // in some cases. A better heuristic is needed. 219 if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) && 220 !MRI.hasOneUse(MI.getOperand(0).getReg())) 221 continue; 222 223 // FIXME: Fold operands with subregs. 224 if (OpToFold.isReg() && 225 (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || 226 OpToFold.getSubReg())) 227 continue; 228 229 std::vector<FoldCandidate> FoldList; 230 for (MachineRegisterInfo::use_iterator 231 Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); 232 Use != E; ++Use) { 233 234 MachineInstr *UseMI = Use->getParent(); 235 const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo()); 236 237 // FIXME: Fold operands with subregs. 238 if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) || 239 UseOp.isImplicit())) { 240 continue; 241 } 242 243 APInt Imm; 244 245 if (FoldingImm) { 246 unsigned UseReg = UseOp.getReg(); 247 const TargetRegisterClass *UseRC 248 = TargetRegisterInfo::isVirtualRegister(UseReg) ? 249 MRI.getRegClass(UseReg) : 250 TRI.getPhysRegClass(UseReg); 251 252 Imm = APInt(64, OpToFold.getImm()); 253 254 // Split 64-bit constants into 32-bits for folding. 255 if (UseOp.getSubReg()) { 256 if (UseRC->getSize() != 8) 257 continue; 258 259 if (UseOp.getSubReg() == AMDGPU::sub0) { 260 Imm = Imm.getLoBits(32); 261 } else { 262 assert(UseOp.getSubReg() == AMDGPU::sub1); 263 Imm = Imm.getHiBits(32); 264 } 265 } 266 267 // In order to fold immediates into copies, we need to change the 268 // copy to a MOV. 269 if (UseMI->getOpcode() == AMDGPU::COPY) { 270 unsigned DestReg = UseMI->getOperand(0).getReg(); 271 const TargetRegisterClass *DestRC 272 = TargetRegisterInfo::isVirtualRegister(DestReg) ? 273 MRI.getRegClass(DestReg) : 274 TRI.getPhysRegClass(DestReg); 275 276 unsigned MovOp = TII->getMovOpcode(DestRC); 277 if (MovOp == AMDGPU::COPY) 278 continue; 279 280 UseMI->setDesc(TII->get(MovOp)); 281 } 282 } 283 284 const MCInstrDesc &UseDesc = UseMI->getDesc(); 285 286 // Don't fold into target independent nodes. Target independent opcodes 287 // don't have defined register classes. 288 if (UseDesc.isVariadic() || 289 UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1) 290 continue; 291 292 if (FoldingImm) { 293 MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue()); 294 tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII); 295 continue; 296 } 297 298 tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII); 299 300 // FIXME: We could try to change the instruction from 64-bit to 32-bit 301 // to enable more folding opportunites. The shrink operands pass 302 // already does this. 303 } 304 305 for (FoldCandidate &Fold : FoldList) { 306 if (updateOperand(Fold, TRI)) { 307 // Clear kill flags. 308 if (!Fold.isImm()) { 309 assert(Fold.OpToFold && Fold.OpToFold->isReg()); 310 Fold.OpToFold->setIsKill(false); 311 } 312 DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << 313 Fold.UseOpNo << " of " << *Fold.UseMI << '\n'); 314 } 315 } 316 } 317 } 318 return false; 319 } 320