10b57cec5SDimitry Andric //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0 90b57cec5SDimitry Andric // operand. If any of the use instruction cannot be combined with the mov the 100b57cec5SDimitry Andric // whole sequence is reverted. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric // $old = ... 130b57cec5SDimitry Andric // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane, 140b57cec5SDimitry Andric // dpp_controls..., $row_mask, $bank_mask, $bound_ctrl 150b57cec5SDimitry Andric // $res = VALU $dpp_value [, src1] 160b57cec5SDimitry Andric // 170b57cec5SDimitry Andric // to 180b57cec5SDimitry Andric // 190b57cec5SDimitry Andric // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,] 200b57cec5SDimitry Andric // dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl 210b57cec5SDimitry Andric // 220b57cec5SDimitry Andric // Combining rules : 230b57cec5SDimitry Andric // 240b57cec5SDimitry Andric // if $row_mask and $bank_mask are fully enabled (0xF) and 250b57cec5SDimitry Andric // $bound_ctrl==DPP_BOUND_ZERO or $old==0 260b57cec5SDimitry Andric // -> $combined_old = undef, 270b57cec5SDimitry Andric // $combined_bound_ctrl = DPP_BOUND_ZERO 280b57cec5SDimitry Andric // 290b57cec5SDimitry Andric // if the VALU op is binary and 300b57cec5SDimitry Andric // $bound_ctrl==DPP_BOUND_OFF and 310b57cec5SDimitry Andric // $old==identity value (immediate) for the VALU op 320b57cec5SDimitry Andric // -> $combined_old = src1, 330b57cec5SDimitry Andric // $combined_bound_ctrl = DPP_BOUND_OFF 340b57cec5SDimitry Andric // 350b57cec5SDimitry Andric // Otherwise cancel. 360b57cec5SDimitry Andric // 370b57cec5SDimitry Andric // The mov_dpp instruction should reside in the same BB as all its uses 380b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric #include "AMDGPU.h" 41e8d8bef9SDimitry Andric #include "GCNSubtarget.h" 420b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 430b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 440b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 450b57cec5SDimitry Andric 460b57cec5SDimitry Andric using namespace llvm; 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric #define DEBUG_TYPE "gcn-dpp-combine" 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined."); 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric namespace { 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric class GCNDPPCombine : public MachineFunctionPass { 550b57cec5SDimitry Andric MachineRegisterInfo *MRI; 560b57cec5SDimitry Andric const SIInstrInfo *TII; 57fe6060f1SDimitry Andric const GCNSubtarget *ST; 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric using RegSubRegPair = TargetInstrInfo::RegSubRegPair; 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const; 620b57cec5SDimitry Andric 63fe6060f1SDimitry Andric MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI, 640b57cec5SDimitry Andric RegSubRegPair CombOldVGPR, 65fe6060f1SDimitry Andric MachineOperand *OldOpnd, bool CombBCZ, 66fe6060f1SDimitry Andric bool IsShrinkable) const; 670b57cec5SDimitry Andric 68fe6060f1SDimitry Andric MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI, 69fe6060f1SDimitry Andric RegSubRegPair CombOldVGPR, bool CombBCZ, 70fe6060f1SDimitry Andric bool IsShrinkable) const; 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric bool hasNoImmOrEqual(MachineInstr &MI, 730b57cec5SDimitry Andric unsigned OpndName, 740b57cec5SDimitry Andric int64_t Value, 750b57cec5SDimitry Andric int64_t Mask = -1) const; 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric bool combineDPPMov(MachineInstr &MI) const; 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric public: 800b57cec5SDimitry Andric static char ID; 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric GCNDPPCombine() : MachineFunctionPass(ID) { 830b57cec5SDimitry Andric initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry()); 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric StringRef getPassName() const override { return "GCN DPP Combine"; } 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 910b57cec5SDimitry Andric AU.setPreservesCFG(); 920b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 930b57cec5SDimitry Andric } 94480093f4SDimitry Andric 955ffd83dbSDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 965ffd83dbSDimitry Andric return MachineFunctionProperties() 975ffd83dbSDimitry Andric .set(MachineFunctionProperties::Property::IsSSA); 985ffd83dbSDimitry Andric } 995ffd83dbSDimitry Andric 100480093f4SDimitry Andric private: 101fe6060f1SDimitry Andric int getDPPOp(unsigned Op, bool IsShrinkable) const; 102fe6060f1SDimitry Andric bool isShrinkable(MachineInstr &MI) const; 1030b57cec5SDimitry Andric }; 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric } // end anonymous namespace 1060b57cec5SDimitry Andric 1070b57cec5SDimitry Andric INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false) 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric char GCNDPPCombine::ID = 0; 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric char &llvm::GCNDPPCombineID = GCNDPPCombine::ID; 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric FunctionPass *llvm::createGCNDPPCombinePass() { 1140b57cec5SDimitry Andric return new GCNDPPCombine(); 1150b57cec5SDimitry Andric } 1160b57cec5SDimitry Andric 117fe6060f1SDimitry Andric bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const { 118fe6060f1SDimitry Andric unsigned Op = MI.getOpcode(); 119fe6060f1SDimitry Andric if (!TII->isVOP3(Op)) { 120fe6060f1SDimitry Andric return false; 121fe6060f1SDimitry Andric } 122fe6060f1SDimitry Andric if (!TII->hasVALU32BitEncoding(Op)) { 123fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Inst hasn't e32 equivalent\n"); 124fe6060f1SDimitry Andric return false; 125fe6060f1SDimitry Andric } 126bdd1243dSDimitry Andric // Do not shrink True16 instructions pre-RA to avoid the restriction in 127bdd1243dSDimitry Andric // register allocation from only being able to use 128 VGPRs 128bdd1243dSDimitry Andric if (AMDGPU::isTrue16Inst(Op)) 129bdd1243dSDimitry Andric return false; 130fe6060f1SDimitry Andric if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) { 131bdd1243dSDimitry Andric // Give up if there are any uses of the sdst in carry-out or VOPC. 132bdd1243dSDimitry Andric // The shrunken form of the instruction would write it to vcc instead of to 133bdd1243dSDimitry Andric // a virtual register. If we rewrote the uses the shrinking would be 134bdd1243dSDimitry Andric // possible. 135fe6060f1SDimitry Andric if (!MRI->use_nodbg_empty(SDst->getReg())) 136fe6060f1SDimitry Andric return false; 137fe6060f1SDimitry Andric } 138fe6060f1SDimitry Andric // check if other than abs|neg modifiers are set (opsel for example) 139fe6060f1SDimitry Andric const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG); 140fe6060f1SDimitry Andric if (!hasNoImmOrEqual(MI, AMDGPU::OpName::src0_modifiers, 0, Mask) || 141fe6060f1SDimitry Andric !hasNoImmOrEqual(MI, AMDGPU::OpName::src1_modifiers, 0, Mask) || 142fe6060f1SDimitry Andric !hasNoImmOrEqual(MI, AMDGPU::OpName::clamp, 0) || 143*0fca6ea1SDimitry Andric !hasNoImmOrEqual(MI, AMDGPU::OpName::omod, 0) || 144*0fca6ea1SDimitry Andric !hasNoImmOrEqual(MI, AMDGPU::OpName::byte_sel, 0)) { 145fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Inst has non-default modifiers\n"); 146fe6060f1SDimitry Andric return false; 147fe6060f1SDimitry Andric } 148fe6060f1SDimitry Andric return true; 149fe6060f1SDimitry Andric } 150fe6060f1SDimitry Andric 151fe6060f1SDimitry Andric int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const { 152753f127fSDimitry Andric int DPP32 = AMDGPU::getDPPOp32(Op); 153fe6060f1SDimitry Andric if (IsShrinkable) { 154fe6060f1SDimitry Andric assert(DPP32 == -1); 155753f127fSDimitry Andric int E32 = AMDGPU::getVOPe32(Op); 156480093f4SDimitry Andric DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32); 157480093f4SDimitry Andric } 158753f127fSDimitry Andric if (DPP32 != -1 && TII->pseudoToMCOpcode(DPP32) != -1) 159753f127fSDimitry Andric return DPP32; 160753f127fSDimitry Andric int DPP64 = -1; 161753f127fSDimitry Andric if (ST->hasVOP3DPP()) 162753f127fSDimitry Andric DPP64 = AMDGPU::getDPPOp64(Op); 163753f127fSDimitry Andric if (DPP64 != -1 && TII->pseudoToMCOpcode(DPP64) != -1) 164753f127fSDimitry Andric return DPP64; 165753f127fSDimitry Andric return -1; 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric // tracks the register operand definition and returns: 1690b57cec5SDimitry Andric // 1. immediate operand used to initialize the register if found 1700b57cec5SDimitry Andric // 2. nullptr if the register operand is undef 1710b57cec5SDimitry Andric // 3. the operand itself otherwise 1720b57cec5SDimitry Andric MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const { 1730b57cec5SDimitry Andric auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI); 1740b57cec5SDimitry Andric if (!Def) 1750b57cec5SDimitry Andric return nullptr; 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric switch(Def->getOpcode()) { 1780b57cec5SDimitry Andric default: break; 1790b57cec5SDimitry Andric case AMDGPU::IMPLICIT_DEF: 1800b57cec5SDimitry Andric return nullptr; 1810b57cec5SDimitry Andric case AMDGPU::COPY: 182fe6060f1SDimitry Andric case AMDGPU::V_MOV_B32_e32: 18381ad6265SDimitry Andric case AMDGPU::V_MOV_B64_PSEUDO: 18481ad6265SDimitry Andric case AMDGPU::V_MOV_B64_e32: 18581ad6265SDimitry Andric case AMDGPU::V_MOV_B64_e64: { 1860b57cec5SDimitry Andric auto &Op1 = Def->getOperand(1); 1870b57cec5SDimitry Andric if (Op1.isImm()) 1880b57cec5SDimitry Andric return &Op1; 1890b57cec5SDimitry Andric break; 1900b57cec5SDimitry Andric } 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric return &OldOpnd; 1930b57cec5SDimitry Andric } 1940b57cec5SDimitry Andric 1955f757f3fSDimitry Andric [[maybe_unused]] static unsigned getOperandSize(MachineInstr &MI, unsigned Idx, 1965f757f3fSDimitry Andric MachineRegisterInfo &MRI) { 1975f757f3fSDimitry Andric int16_t RegClass = MI.getDesc().operands()[Idx].RegClass; 1985f757f3fSDimitry Andric if (RegClass == -1) 1995f757f3fSDimitry Andric return 0; 2005f757f3fSDimitry Andric 2015f757f3fSDimitry Andric const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 2025f757f3fSDimitry Andric return TRI->getRegSizeInBits(*TRI->getRegClass(RegClass)); 2035f757f3fSDimitry Andric } 2045f757f3fSDimitry Andric 2050b57cec5SDimitry Andric MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, 2060b57cec5SDimitry Andric MachineInstr &MovMI, 2070b57cec5SDimitry Andric RegSubRegPair CombOldVGPR, 208fe6060f1SDimitry Andric bool CombBCZ, 209fe6060f1SDimitry Andric bool IsShrinkable) const { 210fe6060f1SDimitry Andric assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp || 21181ad6265SDimitry Andric MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp || 212fe6060f1SDimitry Andric MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); 2130b57cec5SDimitry Andric 214753f127fSDimitry Andric bool HasVOP3DPP = ST->hasVOP3DPP(); 2150b57cec5SDimitry Andric auto OrigOp = OrigMI.getOpcode(); 216fe6060f1SDimitry Andric auto DPPOp = getDPPOp(OrigOp, IsShrinkable); 2170b57cec5SDimitry Andric if (DPPOp == -1) { 2180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " failed: no DPP opcode\n"); 2190b57cec5SDimitry Andric return nullptr; 2200b57cec5SDimitry Andric } 221fcaf7f86SDimitry Andric int OrigOpE32 = AMDGPU::getVOPe32(OrigOp); 222fcaf7f86SDimitry Andric // Prior checks cover Mask with VOPC condition, but not on purpose 223fcaf7f86SDimitry Andric auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask); 224fcaf7f86SDimitry Andric assert(RowMaskOpnd && RowMaskOpnd->isImm()); 225fcaf7f86SDimitry Andric auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask); 226fcaf7f86SDimitry Andric assert(BankMaskOpnd && BankMaskOpnd->isImm()); 227fcaf7f86SDimitry Andric const bool MaskAllLanes = 228fcaf7f86SDimitry Andric RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF; 229fcaf7f86SDimitry Andric (void)MaskAllLanes; 230bdd1243dSDimitry Andric assert((MaskAllLanes || 231bdd1243dSDimitry Andric !(TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && 232bdd1243dSDimitry Andric TII->isVOPC(OrigOpE32)))) && 233fcaf7f86SDimitry Andric "VOPC cannot form DPP unless mask is full"); 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI, 2365ffd83dbSDimitry Andric OrigMI.getDebugLoc(), TII->get(DPPOp)) 2375ffd83dbSDimitry Andric .setMIFlags(OrigMI.getFlags()); 2385ffd83dbSDimitry Andric 2390b57cec5SDimitry Andric bool Fail = false; 2400b57cec5SDimitry Andric do { 241753f127fSDimitry Andric int NumOperands = 0; 242753f127fSDimitry Andric if (auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) { 2430b57cec5SDimitry Andric DPPInst.add(*Dst); 244753f127fSDimitry Andric ++NumOperands; 245753f127fSDimitry Andric } 246753f127fSDimitry Andric if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) { 247753f127fSDimitry Andric if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) { 248753f127fSDimitry Andric DPPInst.add(*SDst); 249753f127fSDimitry Andric ++NumOperands; 250753f127fSDimitry Andric } 251753f127fSDimitry Andric // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst 252753f127fSDimitry Andric } 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old); 2550b57cec5SDimitry Andric if (OldIdx != -1) { 2560b57cec5SDimitry Andric assert(OldIdx == NumOperands); 257fe6060f1SDimitry Andric assert(isOfRegClass( 258fe6060f1SDimitry Andric CombOldVGPR, 259fe6060f1SDimitry Andric *MRI->getRegClass( 260fe6060f1SDimitry Andric TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg()), 261fe6060f1SDimitry Andric *MRI)); 2628bcb0991SDimitry Andric auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI); 2638bcb0991SDimitry Andric DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef, 2648bcb0991SDimitry Andric CombOldVGPR.SubReg); 2650b57cec5SDimitry Andric ++NumOperands; 266fcaf7f86SDimitry Andric } else if (TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && 267fcaf7f86SDimitry Andric TII->isVOPC(OrigOpE32))) { 268fcaf7f86SDimitry Andric // VOPC DPP and VOPC promoted to VOP3 DPP do not have an old operand 269fcaf7f86SDimitry Andric // because they write to SGPRs not VGPRs 2700b57cec5SDimitry Andric } else { 2710b57cec5SDimitry Andric // TODO: this discards MAC/FMA instructions for now, let's add it later 2720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " failed: no old operand in DPP instruction," 2730b57cec5SDimitry Andric " TBD\n"); 2740b57cec5SDimitry Andric Fail = true; 2750b57cec5SDimitry Andric break; 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric 2787a6dacacSDimitry Andric auto *Mod0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0_modifiers); 2797a6dacacSDimitry Andric if (Mod0) { 2800b57cec5SDimitry Andric assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp, 2810b57cec5SDimitry Andric AMDGPU::OpName::src0_modifiers)); 282753f127fSDimitry Andric assert(HasVOP3DPP || 283753f127fSDimitry Andric (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); 2840b57cec5SDimitry Andric DPPInst.addImm(Mod0->getImm()); 2850b57cec5SDimitry Andric ++NumOperands; 286bdd1243dSDimitry Andric } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src0_modifiers)) { 2878bcb0991SDimitry Andric DPPInst.addImm(0); 2888bcb0991SDimitry Andric ++NumOperands; 2890b57cec5SDimitry Andric } 2900b57cec5SDimitry Andric auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0); 2910b57cec5SDimitry Andric assert(Src0); 2925f757f3fSDimitry Andric int Src0Idx = NumOperands; 2930b57cec5SDimitry Andric if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) { 2940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " failed: src0 is illegal\n"); 2950b57cec5SDimitry Andric Fail = true; 2960b57cec5SDimitry Andric break; 2970b57cec5SDimitry Andric } 2980b57cec5SDimitry Andric DPPInst.add(*Src0); 2990b57cec5SDimitry Andric DPPInst->getOperand(NumOperands).setIsKill(false); 3000b57cec5SDimitry Andric ++NumOperands; 3010b57cec5SDimitry Andric 3027a6dacacSDimitry Andric auto *Mod1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1_modifiers); 3037a6dacacSDimitry Andric if (Mod1) { 3040b57cec5SDimitry Andric assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp, 3050b57cec5SDimitry Andric AMDGPU::OpName::src1_modifiers)); 306753f127fSDimitry Andric assert(HasVOP3DPP || 307753f127fSDimitry Andric (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); 3080b57cec5SDimitry Andric DPPInst.addImm(Mod1->getImm()); 3090b57cec5SDimitry Andric ++NumOperands; 310bdd1243dSDimitry Andric } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1_modifiers)) { 3118bcb0991SDimitry Andric DPPInst.addImm(0); 3128bcb0991SDimitry Andric ++NumOperands; 3130b57cec5SDimitry Andric } 314753f127fSDimitry Andric auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1); 315753f127fSDimitry Andric if (Src1) { 3165f757f3fSDimitry Andric int OpNum = NumOperands; 3175f757f3fSDimitry Andric // If subtarget does not support SGPRs for src1 operand then the 3185f757f3fSDimitry Andric // requirements are the same as for src0. We check src0 instead because 3195f757f3fSDimitry Andric // pseudos are shared between subtargets and allow SGPR for src1 on all. 3205f757f3fSDimitry Andric if (!ST->hasDPPSrc1SGPR()) { 3215f757f3fSDimitry Andric assert(getOperandSize(*DPPInst, Src0Idx, *MRI) == 3225f757f3fSDimitry Andric getOperandSize(*DPPInst, NumOperands, *MRI) && 3235f757f3fSDimitry Andric "Src0 and Src1 operands should have the same size"); 3245f757f3fSDimitry Andric OpNum = Src0Idx; 3255f757f3fSDimitry Andric } 3265f757f3fSDimitry Andric if (!TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) { 3270b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " failed: src1 is illegal\n"); 3280b57cec5SDimitry Andric Fail = true; 3290b57cec5SDimitry Andric break; 3300b57cec5SDimitry Andric } 3310b57cec5SDimitry Andric DPPInst.add(*Src1); 3320b57cec5SDimitry Andric ++NumOperands; 3330b57cec5SDimitry Andric } 3347a6dacacSDimitry Andric 3357a6dacacSDimitry Andric auto *Mod2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers); 3367a6dacacSDimitry Andric if (Mod2) { 337753f127fSDimitry Andric assert(NumOperands == 338753f127fSDimitry Andric AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src2_modifiers)); 339753f127fSDimitry Andric assert(HasVOP3DPP || 340753f127fSDimitry Andric (0LL == (Mod2->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); 341753f127fSDimitry Andric DPPInst.addImm(Mod2->getImm()); 342753f127fSDimitry Andric ++NumOperands; 343753f127fSDimitry Andric } 344753f127fSDimitry Andric auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2); 345753f127fSDimitry Andric if (Src2) { 346480093f4SDimitry Andric if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) || 347480093f4SDimitry Andric !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) { 3480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " failed: src2 is illegal\n"); 3490b57cec5SDimitry Andric Fail = true; 3500b57cec5SDimitry Andric break; 3510b57cec5SDimitry Andric } 3520b57cec5SDimitry Andric DPPInst.add(*Src2); 353753f127fSDimitry Andric ++NumOperands; 3540b57cec5SDimitry Andric } 3557a6dacacSDimitry Andric 356753f127fSDimitry Andric if (HasVOP3DPP) { 357753f127fSDimitry Andric auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp); 358bdd1243dSDimitry Andric if (ClampOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::clamp)) { 359753f127fSDimitry Andric DPPInst.addImm(ClampOpr->getImm()); 360753f127fSDimitry Andric } 361753f127fSDimitry Andric auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in); 362753f127fSDimitry Andric if (VdstInOpr && 363bdd1243dSDimitry Andric AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::vdst_in)) { 364753f127fSDimitry Andric DPPInst.add(*VdstInOpr); 365753f127fSDimitry Andric } 366753f127fSDimitry Andric auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod); 367bdd1243dSDimitry Andric if (OmodOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::omod)) { 368753f127fSDimitry Andric DPPInst.addImm(OmodOpr->getImm()); 369753f127fSDimitry Andric } 370753f127fSDimitry Andric // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to 371753f127fSDimitry Andric // all 1. 372*0fca6ea1SDimitry Andric if (TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) { 3737a6dacacSDimitry Andric int64_t OpSel = 0; 3747a6dacacSDimitry Andric OpSel |= (Mod0 ? (!!(Mod0->getImm() & SISrcMods::OP_SEL_0) << 0) : 0); 3757a6dacacSDimitry Andric OpSel |= (Mod1 ? (!!(Mod1->getImm() & SISrcMods::OP_SEL_0) << 1) : 0); 3767a6dacacSDimitry Andric OpSel |= (Mod2 ? (!!(Mod2->getImm() & SISrcMods::OP_SEL_0) << 2) : 0); 3777a6dacacSDimitry Andric if (Mod0 && TII->isVOP3(OrigMI) && !TII->isVOP3P(OrigMI)) 3787a6dacacSDimitry Andric OpSel |= !!(Mod0->getImm() & SISrcMods::DST_OP_SEL) << 3; 3797a6dacacSDimitry Andric 380753f127fSDimitry Andric if (OpSel != 0) { 381753f127fSDimitry Andric LLVM_DEBUG(dbgs() << " failed: op_sel must be zero\n"); 382753f127fSDimitry Andric Fail = true; 383753f127fSDimitry Andric break; 384753f127fSDimitry Andric } 385bdd1243dSDimitry Andric if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel)) 386753f127fSDimitry Andric DPPInst.addImm(OpSel); 387753f127fSDimitry Andric } 388*0fca6ea1SDimitry Andric if (TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) { 3897a6dacacSDimitry Andric int64_t OpSelHi = 0; 3907a6dacacSDimitry Andric OpSelHi |= (Mod0 ? (!!(Mod0->getImm() & SISrcMods::OP_SEL_1) << 0) : 0); 3917a6dacacSDimitry Andric OpSelHi |= (Mod1 ? (!!(Mod1->getImm() & SISrcMods::OP_SEL_1) << 1) : 0); 3927a6dacacSDimitry Andric OpSelHi |= (Mod2 ? (!!(Mod2->getImm() & SISrcMods::OP_SEL_1) << 2) : 0); 3937a6dacacSDimitry Andric 394753f127fSDimitry Andric // Only vop3p has op_sel_hi, and all vop3p have 3 operands, so check 395753f127fSDimitry Andric // the bitmask for 3 op_sel_hi bits set 396753f127fSDimitry Andric assert(Src2 && "Expected vop3p with 3 operands"); 397753f127fSDimitry Andric if (OpSelHi != 7) { 398753f127fSDimitry Andric LLVM_DEBUG(dbgs() << " failed: op_sel_hi must be all set to one\n"); 399753f127fSDimitry Andric Fail = true; 400753f127fSDimitry Andric break; 401753f127fSDimitry Andric } 402bdd1243dSDimitry Andric if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel_hi)) 403753f127fSDimitry Andric DPPInst.addImm(OpSelHi); 404753f127fSDimitry Andric } 405753f127fSDimitry Andric auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo); 406bdd1243dSDimitry Andric if (NegOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_lo)) { 407753f127fSDimitry Andric DPPInst.addImm(NegOpr->getImm()); 408753f127fSDimitry Andric } 409753f127fSDimitry Andric auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi); 410bdd1243dSDimitry Andric if (NegHiOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_hi)) { 411753f127fSDimitry Andric DPPInst.addImm(NegHiOpr->getImm()); 412753f127fSDimitry Andric } 413*0fca6ea1SDimitry Andric auto *ByteSelOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::byte_sel); 414*0fca6ea1SDimitry Andric if (ByteSelOpr && 415*0fca6ea1SDimitry Andric AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::byte_sel)) { 416*0fca6ea1SDimitry Andric DPPInst.addImm(ByteSelOpr->getImm()); 417*0fca6ea1SDimitry Andric } 418753f127fSDimitry Andric } 4190b57cec5SDimitry Andric DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl)); 4200b57cec5SDimitry Andric DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask)); 4210b57cec5SDimitry Andric DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask)); 4220b57cec5SDimitry Andric DPPInst.addImm(CombBCZ ? 1 : 0); 4230b57cec5SDimitry Andric } while (false); 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric if (Fail) { 4260b57cec5SDimitry Andric DPPInst.getInstr()->eraseFromParent(); 4270b57cec5SDimitry Andric return nullptr; 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " combined: " << *DPPInst.getInstr()); 4300b57cec5SDimitry Andric return DPPInst.getInstr(); 4310b57cec5SDimitry Andric } 4320b57cec5SDimitry Andric 4330b57cec5SDimitry Andric static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) { 4340b57cec5SDimitry Andric assert(OldOpnd->isImm()); 4350b57cec5SDimitry Andric switch (OrigMIOp) { 4360b57cec5SDimitry Andric default: break; 4370b57cec5SDimitry Andric case AMDGPU::V_ADD_U32_e32: 4380b57cec5SDimitry Andric case AMDGPU::V_ADD_U32_e64: 439e8d8bef9SDimitry Andric case AMDGPU::V_ADD_CO_U32_e32: 440e8d8bef9SDimitry Andric case AMDGPU::V_ADD_CO_U32_e64: 4410b57cec5SDimitry Andric case AMDGPU::V_OR_B32_e32: 4420b57cec5SDimitry Andric case AMDGPU::V_OR_B32_e64: 4430b57cec5SDimitry Andric case AMDGPU::V_SUBREV_U32_e32: 4440b57cec5SDimitry Andric case AMDGPU::V_SUBREV_U32_e64: 445e8d8bef9SDimitry Andric case AMDGPU::V_SUBREV_CO_U32_e32: 446e8d8bef9SDimitry Andric case AMDGPU::V_SUBREV_CO_U32_e64: 4470b57cec5SDimitry Andric case AMDGPU::V_MAX_U32_e32: 4480b57cec5SDimitry Andric case AMDGPU::V_MAX_U32_e64: 4490b57cec5SDimitry Andric case AMDGPU::V_XOR_B32_e32: 4500b57cec5SDimitry Andric case AMDGPU::V_XOR_B32_e64: 4510b57cec5SDimitry Andric if (OldOpnd->getImm() == 0) 4520b57cec5SDimitry Andric return true; 4530b57cec5SDimitry Andric break; 4540b57cec5SDimitry Andric case AMDGPU::V_AND_B32_e32: 4550b57cec5SDimitry Andric case AMDGPU::V_AND_B32_e64: 4560b57cec5SDimitry Andric case AMDGPU::V_MIN_U32_e32: 4570b57cec5SDimitry Andric case AMDGPU::V_MIN_U32_e64: 4580b57cec5SDimitry Andric if (static_cast<uint32_t>(OldOpnd->getImm()) == 4590b57cec5SDimitry Andric std::numeric_limits<uint32_t>::max()) 4600b57cec5SDimitry Andric return true; 4610b57cec5SDimitry Andric break; 4620b57cec5SDimitry Andric case AMDGPU::V_MIN_I32_e32: 4630b57cec5SDimitry Andric case AMDGPU::V_MIN_I32_e64: 4640b57cec5SDimitry Andric if (static_cast<int32_t>(OldOpnd->getImm()) == 4650b57cec5SDimitry Andric std::numeric_limits<int32_t>::max()) 4660b57cec5SDimitry Andric return true; 4670b57cec5SDimitry Andric break; 4680b57cec5SDimitry Andric case AMDGPU::V_MAX_I32_e32: 4690b57cec5SDimitry Andric case AMDGPU::V_MAX_I32_e64: 4700b57cec5SDimitry Andric if (static_cast<int32_t>(OldOpnd->getImm()) == 4710b57cec5SDimitry Andric std::numeric_limits<int32_t>::min()) 4720b57cec5SDimitry Andric return true; 4730b57cec5SDimitry Andric break; 4740b57cec5SDimitry Andric case AMDGPU::V_MUL_I32_I24_e32: 4750b57cec5SDimitry Andric case AMDGPU::V_MUL_I32_I24_e64: 4760b57cec5SDimitry Andric case AMDGPU::V_MUL_U32_U24_e32: 4770b57cec5SDimitry Andric case AMDGPU::V_MUL_U32_U24_e64: 4780b57cec5SDimitry Andric if (OldOpnd->getImm() == 1) 4790b57cec5SDimitry Andric return true; 4800b57cec5SDimitry Andric break; 4810b57cec5SDimitry Andric } 4820b57cec5SDimitry Andric return false; 4830b57cec5SDimitry Andric } 4840b57cec5SDimitry Andric 485fe6060f1SDimitry Andric MachineInstr *GCNDPPCombine::createDPPInst( 486fe6060f1SDimitry Andric MachineInstr &OrigMI, MachineInstr &MovMI, RegSubRegPair CombOldVGPR, 487fe6060f1SDimitry Andric MachineOperand *OldOpndValue, bool CombBCZ, bool IsShrinkable) const { 4880b57cec5SDimitry Andric assert(CombOldVGPR.Reg); 4890b57cec5SDimitry Andric if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) { 4900b57cec5SDimitry Andric auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1); 4910b57cec5SDimitry Andric if (!Src1 || !Src1->isReg()) { 4920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " failed: no src1 or it isn't a register\n"); 4930b57cec5SDimitry Andric return nullptr; 4940b57cec5SDimitry Andric } 4950b57cec5SDimitry Andric if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) { 4960b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " failed: old immediate isn't an identity\n"); 4970b57cec5SDimitry Andric return nullptr; 4980b57cec5SDimitry Andric } 4990b57cec5SDimitry Andric CombOldVGPR = getRegSubRegPair(*Src1); 500fe6060f1SDimitry Andric auto MovDst = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst); 501fe6060f1SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(MovDst->getReg()); 502fe6060f1SDimitry Andric if (!isOfRegClass(CombOldVGPR, *RC, *MRI)) { 503fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " failed: src1 has wrong register class\n"); 5040b57cec5SDimitry Andric return nullptr; 5050b57cec5SDimitry Andric } 5060b57cec5SDimitry Andric } 507fe6060f1SDimitry Andric return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ, IsShrinkable); 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric 5100b57cec5SDimitry Andric // returns true if MI doesn't have OpndName immediate operand or the 5110b57cec5SDimitry Andric // operand has Value 5120b57cec5SDimitry Andric bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName, 5130b57cec5SDimitry Andric int64_t Value, int64_t Mask) const { 5140b57cec5SDimitry Andric auto *Imm = TII->getNamedOperand(MI, OpndName); 5150b57cec5SDimitry Andric if (!Imm) 5160b57cec5SDimitry Andric return true; 5170b57cec5SDimitry Andric 5180b57cec5SDimitry Andric assert(Imm->isImm()); 5190b57cec5SDimitry Andric return (Imm->getImm() & Mask) == Value; 5200b57cec5SDimitry Andric } 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { 523fe6060f1SDimitry Andric assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp || 52481ad6265SDimitry Andric MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp || 525fe6060f1SDimitry Andric MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO); 5260b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI); 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst); 5290b57cec5SDimitry Andric assert(DstOpnd && DstOpnd->isReg()); 5300b57cec5SDimitry Andric auto DPPMovReg = DstOpnd->getReg(); 5318bcb0991SDimitry Andric if (DPPMovReg.isPhysical()) { 5328bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " failed: dpp move writes physreg\n"); 5338bcb0991SDimitry Andric return false; 5348bcb0991SDimitry Andric } 5350b57cec5SDimitry Andric if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) { 5360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" 5370b57cec5SDimitry Andric " for all uses\n"); 5380b57cec5SDimitry Andric return false; 5390b57cec5SDimitry Andric } 5400b57cec5SDimitry Andric 54181ad6265SDimitry Andric if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO || 54281ad6265SDimitry Andric MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) { 543fe6060f1SDimitry Andric auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl); 544fe6060f1SDimitry Andric assert(DppCtrl && DppCtrl->isImm()); 5455f757f3fSDimitry Andric if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl->getImm())) { 546fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " failed: 64 bit dpp move uses unsupported" 547fe6060f1SDimitry Andric " control value\n"); 548fe6060f1SDimitry Andric // Let it split, then control may become legal. 549fe6060f1SDimitry Andric return false; 550fe6060f1SDimitry Andric } 551fe6060f1SDimitry Andric } 552fe6060f1SDimitry Andric 5530b57cec5SDimitry Andric auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask); 5540b57cec5SDimitry Andric assert(RowMaskOpnd && RowMaskOpnd->isImm()); 5550b57cec5SDimitry Andric auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask); 5560b57cec5SDimitry Andric assert(BankMaskOpnd && BankMaskOpnd->isImm()); 5570b57cec5SDimitry Andric const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF && 5580b57cec5SDimitry Andric BankMaskOpnd->getImm() == 0xF; 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl); 5610b57cec5SDimitry Andric assert(BCZOpnd && BCZOpnd->isImm()); 5620b57cec5SDimitry Andric bool BoundCtrlZero = BCZOpnd->getImm(); 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old); 5658bcb0991SDimitry Andric auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0); 5660b57cec5SDimitry Andric assert(OldOpnd && OldOpnd->isReg()); 5678bcb0991SDimitry Andric assert(SrcOpnd && SrcOpnd->isReg()); 5688bcb0991SDimitry Andric if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) { 5698bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " failed: dpp move reads physreg\n"); 5708bcb0991SDimitry Andric return false; 5718bcb0991SDimitry Andric } 5720b57cec5SDimitry Andric 5730b57cec5SDimitry Andric auto * const OldOpndValue = getOldOpndValue(*OldOpnd); 5740b57cec5SDimitry Andric // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else 5750b57cec5SDimitry Andric // We could use: assert(!OldOpndValue || OldOpndValue->isImm()) 5760b57cec5SDimitry Andric // but the third option is used to distinguish undef from non-immediate 5770b57cec5SDimitry Andric // to reuse IMPLICIT_DEF instruction later 5780b57cec5SDimitry Andric assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd); 5790b57cec5SDimitry Andric 5800b57cec5SDimitry Andric bool CombBCZ = false; 5810b57cec5SDimitry Andric 5820b57cec5SDimitry Andric if (MaskAllLanes && BoundCtrlZero) { // [1] 5830b57cec5SDimitry Andric CombBCZ = true; 5840b57cec5SDimitry Andric } else { 5850b57cec5SDimitry Andric if (!OldOpndValue || !OldOpndValue->isImm()) { 5860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " failed: the DPP mov isn't combinable\n"); 5870b57cec5SDimitry Andric return false; 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric 5900b57cec5SDimitry Andric if (OldOpndValue->getImm() == 0) { 5910b57cec5SDimitry Andric if (MaskAllLanes) { 5920b57cec5SDimitry Andric assert(!BoundCtrlZero); // by check [1] 5930b57cec5SDimitry Andric CombBCZ = true; 5940b57cec5SDimitry Andric } 5950b57cec5SDimitry Andric } else if (BoundCtrlZero) { 5960b57cec5SDimitry Andric assert(!MaskAllLanes); // by check [1] 5970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << 5980b57cec5SDimitry Andric " failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n"); 5990b57cec5SDimitry Andric return false; 6000b57cec5SDimitry Andric } 6010b57cec5SDimitry Andric } 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " old="; 6040b57cec5SDimitry Andric if (!OldOpndValue) 6050b57cec5SDimitry Andric dbgs() << "undef"; 6060b57cec5SDimitry Andric else 6070b57cec5SDimitry Andric dbgs() << *OldOpndValue; 6080b57cec5SDimitry Andric dbgs() << ", bound_ctrl=" << CombBCZ << '\n'); 6090b57cec5SDimitry Andric 6100b57cec5SDimitry Andric SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs; 6118bcb0991SDimitry Andric DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos; 6120b57cec5SDimitry Andric auto CombOldVGPR = getRegSubRegPair(*OldOpnd); 6130b57cec5SDimitry Andric // try to reuse previous old reg if its undefined (IMPLICIT_DEF) 6140b57cec5SDimitry Andric if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef 615fe6060f1SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(DPPMovReg); 6160b57cec5SDimitry Andric CombOldVGPR = RegSubRegPair( 617fe6060f1SDimitry Andric MRI->createVirtualRegister(RC)); 6180b57cec5SDimitry Andric auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(), 6190b57cec5SDimitry Andric TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg); 6200b57cec5SDimitry Andric DPPMIs.push_back(UndefInst.getInstr()); 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric 6230b57cec5SDimitry Andric OrigMIs.push_back(&MovMI); 6240b57cec5SDimitry Andric bool Rollback = true; 6258bcb0991SDimitry Andric SmallVector<MachineOperand*, 16> Uses; 6268bcb0991SDimitry Andric 6270b57cec5SDimitry Andric for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) { 6288bcb0991SDimitry Andric Uses.push_back(&Use); 6298bcb0991SDimitry Andric } 6308bcb0991SDimitry Andric 6318bcb0991SDimitry Andric while (!Uses.empty()) { 6328bcb0991SDimitry Andric MachineOperand *Use = Uses.pop_back_val(); 6330b57cec5SDimitry Andric Rollback = true; 6340b57cec5SDimitry Andric 6358bcb0991SDimitry Andric auto &OrigMI = *Use->getParent(); 6360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " try: " << OrigMI); 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andric auto OrigOp = OrigMI.getOpcode(); 639bdd1243dSDimitry Andric assert((TII->get(OrigOp).getSize() != 4 || !AMDGPU::isTrue16Inst(OrigOp)) && 640bdd1243dSDimitry Andric "There should not be e32 True16 instructions pre-RA"); 6418bcb0991SDimitry Andric if (OrigOp == AMDGPU::REG_SEQUENCE) { 6428bcb0991SDimitry Andric Register FwdReg = OrigMI.getOperand(0).getReg(); 6438bcb0991SDimitry Andric unsigned FwdSubReg = 0; 6448bcb0991SDimitry Andric 6458bcb0991SDimitry Andric if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) { 6468bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" 6478bcb0991SDimitry Andric " for all uses\n"); 6488bcb0991SDimitry Andric break; 6498bcb0991SDimitry Andric } 6508bcb0991SDimitry Andric 6518bcb0991SDimitry Andric unsigned OpNo, E = OrigMI.getNumOperands(); 6528bcb0991SDimitry Andric for (OpNo = 1; OpNo < E; OpNo += 2) { 6538bcb0991SDimitry Andric if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) { 6548bcb0991SDimitry Andric FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm(); 6558bcb0991SDimitry Andric break; 6568bcb0991SDimitry Andric } 6578bcb0991SDimitry Andric } 6588bcb0991SDimitry Andric 6598bcb0991SDimitry Andric if (!FwdSubReg) 6608bcb0991SDimitry Andric break; 6618bcb0991SDimitry Andric 6628bcb0991SDimitry Andric for (auto &Op : MRI->use_nodbg_operands(FwdReg)) { 6638bcb0991SDimitry Andric if (Op.getSubReg() == FwdSubReg) 6648bcb0991SDimitry Andric Uses.push_back(&Op); 6658bcb0991SDimitry Andric } 6668bcb0991SDimitry Andric RegSeqWithOpNos[&OrigMI].push_back(OpNo); 6678bcb0991SDimitry Andric continue; 6688bcb0991SDimitry Andric } 6698bcb0991SDimitry Andric 670fe6060f1SDimitry Andric bool IsShrinkable = isShrinkable(OrigMI); 671753f127fSDimitry Andric if (!(IsShrinkable || 672753f127fSDimitry Andric ((TII->isVOP3P(OrigOp) || TII->isVOPC(OrigOp) || 673753f127fSDimitry Andric TII->isVOP3(OrigOp)) && 674753f127fSDimitry Andric ST->hasVOP3DPP()) || 675753f127fSDimitry Andric TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) { 676753f127fSDimitry Andric LLVM_DEBUG(dbgs() << " failed: not VOP1/2/3/3P/C\n"); 677753f127fSDimitry Andric break; 678753f127fSDimitry Andric } 679753f127fSDimitry Andric if (OrigMI.modifiesRegister(AMDGPU::EXEC, ST->getRegisterInfo())) { 680753f127fSDimitry Andric LLVM_DEBUG(dbgs() << " failed: can't combine v_cmpx\n"); 6810b57cec5SDimitry Andric break; 6820b57cec5SDimitry Andric } 6830b57cec5SDimitry Andric 6845ffd83dbSDimitry Andric auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0); 6855ffd83dbSDimitry Andric auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1); 6865ffd83dbSDimitry Andric if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1] 6875ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << " failed: no suitable operands\n"); 6885ffd83dbSDimitry Andric break; 6895ffd83dbSDimitry Andric } 6905ffd83dbSDimitry Andric 691753f127fSDimitry Andric auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2); 6925ffd83dbSDimitry Andric assert(Src0 && "Src1 without Src0?"); 693753f127fSDimitry Andric if ((Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) || 694753f127fSDimitry Andric (Src2 && Src2->isIdenticalTo(*Src0)))) || 695753f127fSDimitry Andric (Use == Src1 && (Src1->isIdenticalTo(*Src0) || 696753f127fSDimitry Andric (Src2 && Src2->isIdenticalTo(*Src1))))) { 6975ffd83dbSDimitry Andric LLVM_DEBUG( 6985ffd83dbSDimitry Andric dbgs() 6995ffd83dbSDimitry Andric << " " << OrigMI 7005ffd83dbSDimitry Andric << " failed: DPP register is used more than once per instruction\n"); 7015ffd83dbSDimitry Andric break; 7025ffd83dbSDimitry Andric } 7035ffd83dbSDimitry Andric 7040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " combining: " << OrigMI); 7055ffd83dbSDimitry Andric if (Use == Src0) { 7060b57cec5SDimitry Andric if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR, 707fe6060f1SDimitry Andric OldOpndValue, CombBCZ, IsShrinkable)) { 7080b57cec5SDimitry Andric DPPMIs.push_back(DPPInst); 7090b57cec5SDimitry Andric Rollback = false; 7100b57cec5SDimitry Andric } 7115ffd83dbSDimitry Andric } else { 7125ffd83dbSDimitry Andric assert(Use == Src1 && OrigMI.isCommutable()); // by check [1] 7130b57cec5SDimitry Andric auto *BB = OrigMI.getParent(); 7140b57cec5SDimitry Andric auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI); 7150b57cec5SDimitry Andric BB->insert(OrigMI, NewMI); 7160b57cec5SDimitry Andric if (TII->commuteInstruction(*NewMI)) { 7170b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " commuted: " << *NewMI); 718fe6060f1SDimitry Andric if (auto *DPPInst = 719fe6060f1SDimitry Andric createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ, 720fe6060f1SDimitry Andric IsShrinkable)) { 7210b57cec5SDimitry Andric DPPMIs.push_back(DPPInst); 7220b57cec5SDimitry Andric Rollback = false; 7230b57cec5SDimitry Andric } 7240b57cec5SDimitry Andric } else 7250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " failed: cannot be commuted\n"); 7260b57cec5SDimitry Andric NewMI->eraseFromParent(); 7275ffd83dbSDimitry Andric } 7280b57cec5SDimitry Andric if (Rollback) 7290b57cec5SDimitry Andric break; 7300b57cec5SDimitry Andric OrigMIs.push_back(&OrigMI); 7310b57cec5SDimitry Andric } 7320b57cec5SDimitry Andric 7338bcb0991SDimitry Andric Rollback |= !Uses.empty(); 7348bcb0991SDimitry Andric 7350b57cec5SDimitry Andric for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs)) 7360b57cec5SDimitry Andric MI->eraseFromParent(); 7370b57cec5SDimitry Andric 7388bcb0991SDimitry Andric if (!Rollback) { 7398bcb0991SDimitry Andric for (auto &S : RegSeqWithOpNos) { 7408bcb0991SDimitry Andric if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) { 7418bcb0991SDimitry Andric S.first->eraseFromParent(); 7428bcb0991SDimitry Andric continue; 7438bcb0991SDimitry Andric } 7448bcb0991SDimitry Andric while (!S.second.empty()) 745bdd1243dSDimitry Andric S.first->getOperand(S.second.pop_back_val()).setIsUndef(); 7468bcb0991SDimitry Andric } 7478bcb0991SDimitry Andric } 7488bcb0991SDimitry Andric 7490b57cec5SDimitry Andric return !Rollback; 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric 7520b57cec5SDimitry Andric bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) { 753fe6060f1SDimitry Andric ST = &MF.getSubtarget<GCNSubtarget>(); 754fe6060f1SDimitry Andric if (!ST->hasDPP() || skipFunction(MF.getFunction())) 7550b57cec5SDimitry Andric return false; 7560b57cec5SDimitry Andric 7570b57cec5SDimitry Andric MRI = &MF.getRegInfo(); 758fe6060f1SDimitry Andric TII = ST->getInstrInfo(); 7590b57cec5SDimitry Andric 7600b57cec5SDimitry Andric bool Changed = false; 7610b57cec5SDimitry Andric for (auto &MBB : MF) { 762349cc55cSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) { 7630b57cec5SDimitry Andric if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) { 7640b57cec5SDimitry Andric Changed = true; 7650b57cec5SDimitry Andric ++NumDPPMovsCombined; 76681ad6265SDimitry Andric } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO || 76781ad6265SDimitry Andric MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) { 7685f757f3fSDimitry Andric if (ST->hasDPALU_DPP() && combineDPPMov(MI)) { 769fe6060f1SDimitry Andric Changed = true; 770fe6060f1SDimitry Andric ++NumDPPMovsCombined; 771fe6060f1SDimitry Andric } else { 7728bcb0991SDimitry Andric auto Split = TII->expandMovDPP64(MI); 773bdd1243dSDimitry Andric for (auto *M : {Split.first, Split.second}) { 774fe6060f1SDimitry Andric if (M && combineDPPMov(*M)) 7758bcb0991SDimitry Andric ++NumDPPMovsCombined; 7768bcb0991SDimitry Andric } 7778bcb0991SDimitry Andric Changed = true; 7780b57cec5SDimitry Andric } 7790b57cec5SDimitry Andric } 7800b57cec5SDimitry Andric } 781fe6060f1SDimitry Andric } 7820b57cec5SDimitry Andric return Changed; 7830b57cec5SDimitry Andric } 784