1 //===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// i1 values are usually inserted by the CFG Structurize pass and they are 9 /// unique in that they can be copied from VALU to SALU registers. 10 /// This is not possible for any other value type. Since there are no 11 /// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1. 12 /// 13 //===----------------------------------------------------------------------===// 14 // 15 16 #define DEBUG_TYPE "si-i1-copies" 17 #include "AMDGPU.h" 18 #include "AMDGPUSubtarget.h" 19 #include "SIInstrInfo.h" 20 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 21 #include "Utils/AMDGPULaneDominator.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/IR/LLVMContext.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Target/TargetMachine.h" 30 31 using namespace llvm; 32 33 namespace { 34 35 class SILowerI1Copies : public MachineFunctionPass { 36 public: 37 static char ID; 38 39 public: 40 SILowerI1Copies() : MachineFunctionPass(ID) { 41 initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry()); 42 } 43 44 bool runOnMachineFunction(MachineFunction &MF) override; 45 46 StringRef getPassName() const override { return "SI Lower i1 Copies"; } 47 48 void getAnalysisUsage(AnalysisUsage &AU) const override { 49 AU.setPreservesCFG(); 50 MachineFunctionPass::getAnalysisUsage(AU); 51 } 52 }; 53 54 } // End anonymous namespace. 55 56 INITIALIZE_PASS(SILowerI1Copies, DEBUG_TYPE, 57 "SI Lower i1 Copies", false, false) 58 59 char SILowerI1Copies::ID = 0; 60 61 char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID; 62 63 FunctionPass *llvm::createSILowerI1CopiesPass() { 64 return new SILowerI1Copies(); 65 } 66 67 bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { 68 MachineRegisterInfo &MRI = MF.getRegInfo(); 69 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 70 const SIInstrInfo *TII = ST.getInstrInfo(); 71 const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); 72 73 std::vector<unsigned> I1Defs; 74 75 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 76 BI != BE; ++BI) { 77 78 MachineBasicBlock &MBB = *BI; 79 MachineBasicBlock::iterator I, Next; 80 for (I = MBB.begin(); I != MBB.end(); I = Next) { 81 Next = std::next(I); 82 MachineInstr &MI = *I; 83 84 if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) { 85 unsigned Reg = MI.getOperand(0).getReg(); 86 const TargetRegisterClass *RC = MRI.getRegClass(Reg); 87 if (RC == &AMDGPU::VReg_1RegClass) 88 MRI.setRegClass(Reg, &AMDGPU::SReg_64RegClass); 89 continue; 90 } 91 92 if (MI.getOpcode() != AMDGPU::COPY) 93 continue; 94 95 const MachineOperand &Dst = MI.getOperand(0); 96 const MachineOperand &Src = MI.getOperand(1); 97 98 if (!TargetRegisterInfo::isVirtualRegister(Src.getReg()) || 99 !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) 100 continue; 101 102 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg()); 103 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg()); 104 105 DebugLoc DL = MI.getDebugLoc(); 106 MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg()); 107 if (DstRC == &AMDGPU::VReg_1RegClass && 108 TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) { 109 I1Defs.push_back(Dst.getReg()); 110 111 if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) { 112 if (DefInst->getOperand(1).isImm()) { 113 I1Defs.push_back(Dst.getReg()); 114 115 int64_t Val = DefInst->getOperand(1).getImm(); 116 assert(Val == 0 || Val == -1); 117 118 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32)) 119 .add(Dst) 120 .addImm(Val); 121 MI.eraseFromParent(); 122 continue; 123 } 124 } 125 126 unsigned int TmpSrc = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); 127 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::COPY), TmpSrc) 128 .add(Src); 129 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64)) 130 .add(Dst) 131 .addImm(0) 132 .addImm(-1) 133 .addReg(TmpSrc); 134 MI.eraseFromParent(); 135 } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) && 136 SrcRC == &AMDGPU::VReg_1RegClass) { 137 if (DefInst->getOpcode() == AMDGPU::V_CNDMASK_B32_e64 && 138 DefInst->getOperand(1).isImm() && DefInst->getOperand(2).isImm() && 139 DefInst->getOperand(1).getImm() == 0 && 140 DefInst->getOperand(2).getImm() != 0 && 141 DefInst->getOperand(3).isReg() && 142 TargetRegisterInfo::isVirtualRegister( 143 DefInst->getOperand(3).getReg()) && 144 TRI->getCommonSubClass( 145 MRI.getRegClass(DefInst->getOperand(3).getReg()), 146 &AMDGPU::SGPR_64RegClass) && 147 AMDGPU::laneDominates(DefInst->getParent(), &MBB)) { 148 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64)) 149 .add(Dst) 150 .addReg(AMDGPU::EXEC) 151 .add(DefInst->getOperand(3)); 152 } else { 153 BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64)) 154 .add(Dst) 155 .add(Src) 156 .addImm(0); 157 } 158 MI.eraseFromParent(); 159 } 160 } 161 } 162 163 for (unsigned Reg : I1Defs) 164 MRI.setRegClass(Reg, &AMDGPU::VGPR_32RegClass); 165 166 return false; 167 } 168