xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
90b57cec5SDimitry Andric // operand. If any of the use instruction cannot be combined with the mov the
100b57cec5SDimitry Andric // whole sequence is reverted.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // $old = ...
130b57cec5SDimitry Andric // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
140b57cec5SDimitry Andric //                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
150b57cec5SDimitry Andric // $res = VALU $dpp_value [, src1]
160b57cec5SDimitry Andric //
170b57cec5SDimitry Andric // to
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
200b57cec5SDimitry Andric //                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
210b57cec5SDimitry Andric //
220b57cec5SDimitry Andric // Combining rules :
230b57cec5SDimitry Andric //
240b57cec5SDimitry Andric // if $row_mask and $bank_mask are fully enabled (0xF) and
250b57cec5SDimitry Andric //    $bound_ctrl==DPP_BOUND_ZERO or $old==0
260b57cec5SDimitry Andric // -> $combined_old = undef,
270b57cec5SDimitry Andric //    $combined_bound_ctrl = DPP_BOUND_ZERO
280b57cec5SDimitry Andric //
290b57cec5SDimitry Andric // if the VALU op is binary and
300b57cec5SDimitry Andric //    $bound_ctrl==DPP_BOUND_OFF and
310b57cec5SDimitry Andric //    $old==identity value (immediate) for the VALU op
320b57cec5SDimitry Andric // -> $combined_old = src1,
330b57cec5SDimitry Andric //    $combined_bound_ctrl = DPP_BOUND_OFF
340b57cec5SDimitry Andric //
350b57cec5SDimitry Andric // Otherwise cancel.
360b57cec5SDimitry Andric //
370b57cec5SDimitry Andric // The mov_dpp instruction should reside in the same BB as all its uses
380b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric #include "AMDGPU.h"
41e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
420b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
430b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
440b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric using namespace llvm;
470b57cec5SDimitry Andric 
480b57cec5SDimitry Andric #define DEBUG_TYPE "gcn-dpp-combine"
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric namespace {
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric class GCNDPPCombine : public MachineFunctionPass {
550b57cec5SDimitry Andric   MachineRegisterInfo *MRI;
560b57cec5SDimitry Andric   const SIInstrInfo *TII;
57fe6060f1SDimitry Andric   const GCNSubtarget *ST;
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric   using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric   MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
620b57cec5SDimitry Andric 
63fe6060f1SDimitry Andric   MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,
640b57cec5SDimitry Andric                               RegSubRegPair CombOldVGPR,
65fe6060f1SDimitry Andric                               MachineOperand *OldOpnd, bool CombBCZ,
66fe6060f1SDimitry Andric                               bool IsShrinkable) const;
670b57cec5SDimitry Andric 
68fe6060f1SDimitry Andric   MachineInstr *createDPPInst(MachineInstr &OrigMI, MachineInstr &MovMI,
69fe6060f1SDimitry Andric                               RegSubRegPair CombOldVGPR, bool CombBCZ,
70fe6060f1SDimitry Andric                               bool IsShrinkable) const;
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric   bool hasNoImmOrEqual(MachineInstr &MI,
730b57cec5SDimitry Andric                        unsigned OpndName,
740b57cec5SDimitry Andric                        int64_t Value,
750b57cec5SDimitry Andric                        int64_t Mask = -1) const;
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric   bool combineDPPMov(MachineInstr &MI) const;
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric public:
800b57cec5SDimitry Andric   static char ID;
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric   GCNDPPCombine() : MachineFunctionPass(ID) {
830b57cec5SDimitry Andric     initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
840b57cec5SDimitry Andric   }
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
870b57cec5SDimitry Andric 
880b57cec5SDimitry Andric   StringRef getPassName() const override { return "GCN DPP Combine"; }
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
910b57cec5SDimitry Andric     AU.setPreservesCFG();
920b57cec5SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
930b57cec5SDimitry Andric   }
94480093f4SDimitry Andric 
955ffd83dbSDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
965ffd83dbSDimitry Andric     return MachineFunctionProperties()
975ffd83dbSDimitry Andric       .set(MachineFunctionProperties::Property::IsSSA);
985ffd83dbSDimitry Andric   }
995ffd83dbSDimitry Andric 
100480093f4SDimitry Andric private:
101fe6060f1SDimitry Andric   int getDPPOp(unsigned Op, bool IsShrinkable) const;
102fe6060f1SDimitry Andric   bool isShrinkable(MachineInstr &MI) const;
1030b57cec5SDimitry Andric };
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric } // end anonymous namespace
1060b57cec5SDimitry Andric 
1070b57cec5SDimitry Andric INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric char GCNDPPCombine::ID = 0;
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric FunctionPass *llvm::createGCNDPPCombinePass() {
1140b57cec5SDimitry Andric   return new GCNDPPCombine();
1150b57cec5SDimitry Andric }
1160b57cec5SDimitry Andric 
117fe6060f1SDimitry Andric bool GCNDPPCombine::isShrinkable(MachineInstr &MI) const {
118fe6060f1SDimitry Andric   unsigned Op = MI.getOpcode();
119fe6060f1SDimitry Andric   if (!TII->isVOP3(Op)) {
120fe6060f1SDimitry Andric     return false;
121fe6060f1SDimitry Andric   }
122fe6060f1SDimitry Andric   if (!TII->hasVALU32BitEncoding(Op)) {
123fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  Inst hasn't e32 equivalent\n");
124fe6060f1SDimitry Andric     return false;
125fe6060f1SDimitry Andric   }
126bdd1243dSDimitry Andric   // Do not shrink True16 instructions pre-RA to avoid the restriction in
127bdd1243dSDimitry Andric   // register allocation from only being able to use 128 VGPRs
128bdd1243dSDimitry Andric   if (AMDGPU::isTrue16Inst(Op))
129bdd1243dSDimitry Andric     return false;
130fe6060f1SDimitry Andric   if (const auto *SDst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
131bdd1243dSDimitry Andric     // Give up if there are any uses of the sdst in carry-out or VOPC.
132bdd1243dSDimitry Andric     // The shrunken form of the instruction would write it to vcc instead of to
133bdd1243dSDimitry Andric     // a virtual register. If we rewrote the uses the shrinking would be
134bdd1243dSDimitry Andric     // possible.
135fe6060f1SDimitry Andric     if (!MRI->use_nodbg_empty(SDst->getReg()))
136fe6060f1SDimitry Andric       return false;
137fe6060f1SDimitry Andric   }
138fe6060f1SDimitry Andric   // check if other than abs|neg modifiers are set (opsel for example)
139fe6060f1SDimitry Andric   const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
140fe6060f1SDimitry Andric   if (!hasNoImmOrEqual(MI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
141fe6060f1SDimitry Andric       !hasNoImmOrEqual(MI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
142fe6060f1SDimitry Andric       !hasNoImmOrEqual(MI, AMDGPU::OpName::clamp, 0) ||
143*0fca6ea1SDimitry Andric       !hasNoImmOrEqual(MI, AMDGPU::OpName::omod, 0) ||
144*0fca6ea1SDimitry Andric       !hasNoImmOrEqual(MI, AMDGPU::OpName::byte_sel, 0)) {
145fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  Inst has non-default modifiers\n");
146fe6060f1SDimitry Andric     return false;
147fe6060f1SDimitry Andric   }
148fe6060f1SDimitry Andric   return true;
149fe6060f1SDimitry Andric }
150fe6060f1SDimitry Andric 
151fe6060f1SDimitry Andric int GCNDPPCombine::getDPPOp(unsigned Op, bool IsShrinkable) const {
152753f127fSDimitry Andric   int DPP32 = AMDGPU::getDPPOp32(Op);
153fe6060f1SDimitry Andric   if (IsShrinkable) {
154fe6060f1SDimitry Andric     assert(DPP32 == -1);
155753f127fSDimitry Andric     int E32 = AMDGPU::getVOPe32(Op);
156480093f4SDimitry Andric     DPP32 = (E32 == -1) ? -1 : AMDGPU::getDPPOp32(E32);
157480093f4SDimitry Andric   }
158753f127fSDimitry Andric   if (DPP32 != -1 && TII->pseudoToMCOpcode(DPP32) != -1)
159753f127fSDimitry Andric     return DPP32;
160753f127fSDimitry Andric   int DPP64 = -1;
161753f127fSDimitry Andric   if (ST->hasVOP3DPP())
162753f127fSDimitry Andric     DPP64 = AMDGPU::getDPPOp64(Op);
163753f127fSDimitry Andric   if (DPP64 != -1 && TII->pseudoToMCOpcode(DPP64) != -1)
164753f127fSDimitry Andric     return DPP64;
165753f127fSDimitry Andric   return -1;
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric // tracks the register operand definition and returns:
1690b57cec5SDimitry Andric //   1. immediate operand used to initialize the register if found
1700b57cec5SDimitry Andric //   2. nullptr if the register operand is undef
1710b57cec5SDimitry Andric //   3. the operand itself otherwise
1720b57cec5SDimitry Andric MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
1730b57cec5SDimitry Andric   auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
1740b57cec5SDimitry Andric   if (!Def)
1750b57cec5SDimitry Andric     return nullptr;
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric   switch(Def->getOpcode()) {
1780b57cec5SDimitry Andric   default: break;
1790b57cec5SDimitry Andric   case AMDGPU::IMPLICIT_DEF:
1800b57cec5SDimitry Andric     return nullptr;
1810b57cec5SDimitry Andric   case AMDGPU::COPY:
182fe6060f1SDimitry Andric   case AMDGPU::V_MOV_B32_e32:
18381ad6265SDimitry Andric   case AMDGPU::V_MOV_B64_PSEUDO:
18481ad6265SDimitry Andric   case AMDGPU::V_MOV_B64_e32:
18581ad6265SDimitry Andric   case AMDGPU::V_MOV_B64_e64: {
1860b57cec5SDimitry Andric     auto &Op1 = Def->getOperand(1);
1870b57cec5SDimitry Andric     if (Op1.isImm())
1880b57cec5SDimitry Andric       return &Op1;
1890b57cec5SDimitry Andric     break;
1900b57cec5SDimitry Andric   }
1910b57cec5SDimitry Andric   }
1920b57cec5SDimitry Andric   return &OldOpnd;
1930b57cec5SDimitry Andric }
1940b57cec5SDimitry Andric 
1955f757f3fSDimitry Andric [[maybe_unused]] static unsigned getOperandSize(MachineInstr &MI, unsigned Idx,
1965f757f3fSDimitry Andric                                MachineRegisterInfo &MRI) {
1975f757f3fSDimitry Andric   int16_t RegClass = MI.getDesc().operands()[Idx].RegClass;
1985f757f3fSDimitry Andric   if (RegClass == -1)
1995f757f3fSDimitry Andric     return 0;
2005f757f3fSDimitry Andric 
2015f757f3fSDimitry Andric   const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
2025f757f3fSDimitry Andric   return TRI->getRegSizeInBits(*TRI->getRegClass(RegClass));
2035f757f3fSDimitry Andric }
2045f757f3fSDimitry Andric 
2050b57cec5SDimitry Andric MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
2060b57cec5SDimitry Andric                                            MachineInstr &MovMI,
2070b57cec5SDimitry Andric                                            RegSubRegPair CombOldVGPR,
208fe6060f1SDimitry Andric                                            bool CombBCZ,
209fe6060f1SDimitry Andric                                            bool IsShrinkable) const {
210fe6060f1SDimitry Andric   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
21181ad6265SDimitry Andric          MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
212fe6060f1SDimitry Andric          MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2130b57cec5SDimitry Andric 
214753f127fSDimitry Andric   bool HasVOP3DPP = ST->hasVOP3DPP();
2150b57cec5SDimitry Andric   auto OrigOp = OrigMI.getOpcode();
216fe6060f1SDimitry Andric   auto DPPOp = getDPPOp(OrigOp, IsShrinkable);
2170b57cec5SDimitry Andric   if (DPPOp == -1) {
2180b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
2190b57cec5SDimitry Andric     return nullptr;
2200b57cec5SDimitry Andric   }
221fcaf7f86SDimitry Andric   int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
222fcaf7f86SDimitry Andric   // Prior checks cover Mask with VOPC condition, but not on purpose
223fcaf7f86SDimitry Andric   auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
224fcaf7f86SDimitry Andric   assert(RowMaskOpnd && RowMaskOpnd->isImm());
225fcaf7f86SDimitry Andric   auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
226fcaf7f86SDimitry Andric   assert(BankMaskOpnd && BankMaskOpnd->isImm());
227fcaf7f86SDimitry Andric   const bool MaskAllLanes =
228fcaf7f86SDimitry Andric       RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
229fcaf7f86SDimitry Andric   (void)MaskAllLanes;
230bdd1243dSDimitry Andric   assert((MaskAllLanes ||
231bdd1243dSDimitry Andric           !(TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
232bdd1243dSDimitry Andric                                    TII->isVOPC(OrigOpE32)))) &&
233fcaf7f86SDimitry Andric          "VOPC cannot form DPP unless mask is full");
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
2365ffd83dbSDimitry Andric                          OrigMI.getDebugLoc(), TII->get(DPPOp))
2375ffd83dbSDimitry Andric     .setMIFlags(OrigMI.getFlags());
2385ffd83dbSDimitry Andric 
2390b57cec5SDimitry Andric   bool Fail = false;
2400b57cec5SDimitry Andric   do {
241753f127fSDimitry Andric     int NumOperands = 0;
242753f127fSDimitry Andric     if (auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst)) {
2430b57cec5SDimitry Andric       DPPInst.add(*Dst);
244753f127fSDimitry Andric       ++NumOperands;
245753f127fSDimitry Andric     }
246753f127fSDimitry Andric     if (auto *SDst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::sdst)) {
247753f127fSDimitry Andric       if (TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, SDst)) {
248753f127fSDimitry Andric         DPPInst.add(*SDst);
249753f127fSDimitry Andric         ++NumOperands;
250753f127fSDimitry Andric       }
251753f127fSDimitry Andric       // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst
252753f127fSDimitry Andric     }
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
2550b57cec5SDimitry Andric     if (OldIdx != -1) {
2560b57cec5SDimitry Andric       assert(OldIdx == NumOperands);
257fe6060f1SDimitry Andric       assert(isOfRegClass(
258fe6060f1SDimitry Andric           CombOldVGPR,
259fe6060f1SDimitry Andric           *MRI->getRegClass(
260fe6060f1SDimitry Andric               TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg()),
261fe6060f1SDimitry Andric           *MRI));
2628bcb0991SDimitry Andric       auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
2638bcb0991SDimitry Andric       DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
2648bcb0991SDimitry Andric                      CombOldVGPR.SubReg);
2650b57cec5SDimitry Andric       ++NumOperands;
266fcaf7f86SDimitry Andric     } else if (TII->isVOPC(DPPOp) || (TII->isVOP3(DPPOp) && OrigOpE32 != -1 &&
267fcaf7f86SDimitry Andric                                       TII->isVOPC(OrigOpE32))) {
268fcaf7f86SDimitry Andric       // VOPC DPP and VOPC promoted to VOP3 DPP do not have an old operand
269fcaf7f86SDimitry Andric       // because they write to SGPRs not VGPRs
2700b57cec5SDimitry Andric     } else {
2710b57cec5SDimitry Andric       // TODO: this discards MAC/FMA instructions for now, let's add it later
2720b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"
2730b57cec5SDimitry Andric                            " TBD\n");
2740b57cec5SDimitry Andric       Fail = true;
2750b57cec5SDimitry Andric       break;
2760b57cec5SDimitry Andric     }
2770b57cec5SDimitry Andric 
2787a6dacacSDimitry Andric     auto *Mod0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0_modifiers);
2797a6dacacSDimitry Andric     if (Mod0) {
2800b57cec5SDimitry Andric       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
2810b57cec5SDimitry Andric                                           AMDGPU::OpName::src0_modifiers));
282753f127fSDimitry Andric       assert(HasVOP3DPP ||
283753f127fSDimitry Andric              (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
2840b57cec5SDimitry Andric       DPPInst.addImm(Mod0->getImm());
2850b57cec5SDimitry Andric       ++NumOperands;
286bdd1243dSDimitry Andric     } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src0_modifiers)) {
2878bcb0991SDimitry Andric       DPPInst.addImm(0);
2888bcb0991SDimitry Andric       ++NumOperands;
2890b57cec5SDimitry Andric     }
2900b57cec5SDimitry Andric     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
2910b57cec5SDimitry Andric     assert(Src0);
2925f757f3fSDimitry Andric     int Src0Idx = NumOperands;
2930b57cec5SDimitry Andric     if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
2940b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
2950b57cec5SDimitry Andric       Fail = true;
2960b57cec5SDimitry Andric       break;
2970b57cec5SDimitry Andric     }
2980b57cec5SDimitry Andric     DPPInst.add(*Src0);
2990b57cec5SDimitry Andric     DPPInst->getOperand(NumOperands).setIsKill(false);
3000b57cec5SDimitry Andric     ++NumOperands;
3010b57cec5SDimitry Andric 
3027a6dacacSDimitry Andric     auto *Mod1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1_modifiers);
3037a6dacacSDimitry Andric     if (Mod1) {
3040b57cec5SDimitry Andric       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
3050b57cec5SDimitry Andric                                           AMDGPU::OpName::src1_modifiers));
306753f127fSDimitry Andric       assert(HasVOP3DPP ||
307753f127fSDimitry Andric              (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
3080b57cec5SDimitry Andric       DPPInst.addImm(Mod1->getImm());
3090b57cec5SDimitry Andric       ++NumOperands;
310bdd1243dSDimitry Andric     } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1_modifiers)) {
3118bcb0991SDimitry Andric       DPPInst.addImm(0);
3128bcb0991SDimitry Andric       ++NumOperands;
3130b57cec5SDimitry Andric     }
314753f127fSDimitry Andric     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
315753f127fSDimitry Andric     if (Src1) {
3165f757f3fSDimitry Andric       int OpNum = NumOperands;
3175f757f3fSDimitry Andric       // If subtarget does not support SGPRs for src1 operand then the
3185f757f3fSDimitry Andric       // requirements are the same as for src0. We check src0 instead because
3195f757f3fSDimitry Andric       // pseudos are shared between subtargets and allow SGPR for src1 on all.
3205f757f3fSDimitry Andric       if (!ST->hasDPPSrc1SGPR()) {
3215f757f3fSDimitry Andric         assert(getOperandSize(*DPPInst, Src0Idx, *MRI) ==
3225f757f3fSDimitry Andric                    getOperandSize(*DPPInst, NumOperands, *MRI) &&
3235f757f3fSDimitry Andric                "Src0 and Src1 operands should have the same size");
3245f757f3fSDimitry Andric         OpNum = Src0Idx;
3255f757f3fSDimitry Andric       }
3265f757f3fSDimitry Andric       if (!TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) {
3270b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
3280b57cec5SDimitry Andric         Fail = true;
3290b57cec5SDimitry Andric         break;
3300b57cec5SDimitry Andric       }
3310b57cec5SDimitry Andric       DPPInst.add(*Src1);
3320b57cec5SDimitry Andric       ++NumOperands;
3330b57cec5SDimitry Andric     }
3347a6dacacSDimitry Andric 
3357a6dacacSDimitry Andric     auto *Mod2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers);
3367a6dacacSDimitry Andric     if (Mod2) {
337753f127fSDimitry Andric       assert(NumOperands ==
338753f127fSDimitry Andric              AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src2_modifiers));
339753f127fSDimitry Andric       assert(HasVOP3DPP ||
340753f127fSDimitry Andric              (0LL == (Mod2->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG))));
341753f127fSDimitry Andric       DPPInst.addImm(Mod2->getImm());
342753f127fSDimitry Andric       ++NumOperands;
343753f127fSDimitry Andric     }
344753f127fSDimitry Andric     auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
345753f127fSDimitry Andric     if (Src2) {
346480093f4SDimitry Andric       if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
347480093f4SDimitry Andric           !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
3480b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
3490b57cec5SDimitry Andric         Fail = true;
3500b57cec5SDimitry Andric         break;
3510b57cec5SDimitry Andric       }
3520b57cec5SDimitry Andric       DPPInst.add(*Src2);
353753f127fSDimitry Andric       ++NumOperands;
3540b57cec5SDimitry Andric     }
3557a6dacacSDimitry Andric 
356753f127fSDimitry Andric     if (HasVOP3DPP) {
357753f127fSDimitry Andric       auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);
358bdd1243dSDimitry Andric       if (ClampOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::clamp)) {
359753f127fSDimitry Andric         DPPInst.addImm(ClampOpr->getImm());
360753f127fSDimitry Andric       }
361753f127fSDimitry Andric       auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in);
362753f127fSDimitry Andric       if (VdstInOpr &&
363bdd1243dSDimitry Andric           AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::vdst_in)) {
364753f127fSDimitry Andric         DPPInst.add(*VdstInOpr);
365753f127fSDimitry Andric       }
366753f127fSDimitry Andric       auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod);
367bdd1243dSDimitry Andric       if (OmodOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::omod)) {
368753f127fSDimitry Andric         DPPInst.addImm(OmodOpr->getImm());
369753f127fSDimitry Andric       }
370753f127fSDimitry Andric       // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to
371753f127fSDimitry Andric       // all 1.
372*0fca6ea1SDimitry Andric       if (TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) {
3737a6dacacSDimitry Andric         int64_t OpSel = 0;
3747a6dacacSDimitry Andric         OpSel |= (Mod0 ? (!!(Mod0->getImm() & SISrcMods::OP_SEL_0) << 0) : 0);
3757a6dacacSDimitry Andric         OpSel |= (Mod1 ? (!!(Mod1->getImm() & SISrcMods::OP_SEL_0) << 1) : 0);
3767a6dacacSDimitry Andric         OpSel |= (Mod2 ? (!!(Mod2->getImm() & SISrcMods::OP_SEL_0) << 2) : 0);
3777a6dacacSDimitry Andric         if (Mod0 && TII->isVOP3(OrigMI) && !TII->isVOP3P(OrigMI))
3787a6dacacSDimitry Andric           OpSel |= !!(Mod0->getImm() & SISrcMods::DST_OP_SEL) << 3;
3797a6dacacSDimitry Andric 
380753f127fSDimitry Andric         if (OpSel != 0) {
381753f127fSDimitry Andric           LLVM_DEBUG(dbgs() << "  failed: op_sel must be zero\n");
382753f127fSDimitry Andric           Fail = true;
383753f127fSDimitry Andric           break;
384753f127fSDimitry Andric         }
385bdd1243dSDimitry Andric         if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel))
386753f127fSDimitry Andric           DPPInst.addImm(OpSel);
387753f127fSDimitry Andric       }
388*0fca6ea1SDimitry Andric       if (TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) {
3897a6dacacSDimitry Andric         int64_t OpSelHi = 0;
3907a6dacacSDimitry Andric         OpSelHi |= (Mod0 ? (!!(Mod0->getImm() & SISrcMods::OP_SEL_1) << 0) : 0);
3917a6dacacSDimitry Andric         OpSelHi |= (Mod1 ? (!!(Mod1->getImm() & SISrcMods::OP_SEL_1) << 1) : 0);
3927a6dacacSDimitry Andric         OpSelHi |= (Mod2 ? (!!(Mod2->getImm() & SISrcMods::OP_SEL_1) << 2) : 0);
3937a6dacacSDimitry Andric 
394753f127fSDimitry Andric         // Only vop3p has op_sel_hi, and all vop3p have 3 operands, so check
395753f127fSDimitry Andric         // the bitmask for 3 op_sel_hi bits set
396753f127fSDimitry Andric         assert(Src2 && "Expected vop3p with 3 operands");
397753f127fSDimitry Andric         if (OpSelHi != 7) {
398753f127fSDimitry Andric           LLVM_DEBUG(dbgs() << "  failed: op_sel_hi must be all set to one\n");
399753f127fSDimitry Andric           Fail = true;
400753f127fSDimitry Andric           break;
401753f127fSDimitry Andric         }
402bdd1243dSDimitry Andric         if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel_hi))
403753f127fSDimitry Andric           DPPInst.addImm(OpSelHi);
404753f127fSDimitry Andric       }
405753f127fSDimitry Andric       auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo);
406bdd1243dSDimitry Andric       if (NegOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_lo)) {
407753f127fSDimitry Andric         DPPInst.addImm(NegOpr->getImm());
408753f127fSDimitry Andric       }
409753f127fSDimitry Andric       auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi);
410bdd1243dSDimitry Andric       if (NegHiOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_hi)) {
411753f127fSDimitry Andric         DPPInst.addImm(NegHiOpr->getImm());
412753f127fSDimitry Andric       }
413*0fca6ea1SDimitry Andric       auto *ByteSelOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::byte_sel);
414*0fca6ea1SDimitry Andric       if (ByteSelOpr &&
415*0fca6ea1SDimitry Andric           AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::byte_sel)) {
416*0fca6ea1SDimitry Andric         DPPInst.addImm(ByteSelOpr->getImm());
417*0fca6ea1SDimitry Andric       }
418753f127fSDimitry Andric     }
4190b57cec5SDimitry Andric     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
4200b57cec5SDimitry Andric     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
4210b57cec5SDimitry Andric     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
4220b57cec5SDimitry Andric     DPPInst.addImm(CombBCZ ? 1 : 0);
4230b57cec5SDimitry Andric   } while (false);
4240b57cec5SDimitry Andric 
4250b57cec5SDimitry Andric   if (Fail) {
4260b57cec5SDimitry Andric     DPPInst.getInstr()->eraseFromParent();
4270b57cec5SDimitry Andric     return nullptr;
4280b57cec5SDimitry Andric   }
4290b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());
4300b57cec5SDimitry Andric   return DPPInst.getInstr();
4310b57cec5SDimitry Andric }
4320b57cec5SDimitry Andric 
4330b57cec5SDimitry Andric static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
4340b57cec5SDimitry Andric   assert(OldOpnd->isImm());
4350b57cec5SDimitry Andric   switch (OrigMIOp) {
4360b57cec5SDimitry Andric   default: break;
4370b57cec5SDimitry Andric   case AMDGPU::V_ADD_U32_e32:
4380b57cec5SDimitry Andric   case AMDGPU::V_ADD_U32_e64:
439e8d8bef9SDimitry Andric   case AMDGPU::V_ADD_CO_U32_e32:
440e8d8bef9SDimitry Andric   case AMDGPU::V_ADD_CO_U32_e64:
4410b57cec5SDimitry Andric   case AMDGPU::V_OR_B32_e32:
4420b57cec5SDimitry Andric   case AMDGPU::V_OR_B32_e64:
4430b57cec5SDimitry Andric   case AMDGPU::V_SUBREV_U32_e32:
4440b57cec5SDimitry Andric   case AMDGPU::V_SUBREV_U32_e64:
445e8d8bef9SDimitry Andric   case AMDGPU::V_SUBREV_CO_U32_e32:
446e8d8bef9SDimitry Andric   case AMDGPU::V_SUBREV_CO_U32_e64:
4470b57cec5SDimitry Andric   case AMDGPU::V_MAX_U32_e32:
4480b57cec5SDimitry Andric   case AMDGPU::V_MAX_U32_e64:
4490b57cec5SDimitry Andric   case AMDGPU::V_XOR_B32_e32:
4500b57cec5SDimitry Andric   case AMDGPU::V_XOR_B32_e64:
4510b57cec5SDimitry Andric     if (OldOpnd->getImm() == 0)
4520b57cec5SDimitry Andric       return true;
4530b57cec5SDimitry Andric     break;
4540b57cec5SDimitry Andric   case AMDGPU::V_AND_B32_e32:
4550b57cec5SDimitry Andric   case AMDGPU::V_AND_B32_e64:
4560b57cec5SDimitry Andric   case AMDGPU::V_MIN_U32_e32:
4570b57cec5SDimitry Andric   case AMDGPU::V_MIN_U32_e64:
4580b57cec5SDimitry Andric     if (static_cast<uint32_t>(OldOpnd->getImm()) ==
4590b57cec5SDimitry Andric         std::numeric_limits<uint32_t>::max())
4600b57cec5SDimitry Andric       return true;
4610b57cec5SDimitry Andric     break;
4620b57cec5SDimitry Andric   case AMDGPU::V_MIN_I32_e32:
4630b57cec5SDimitry Andric   case AMDGPU::V_MIN_I32_e64:
4640b57cec5SDimitry Andric     if (static_cast<int32_t>(OldOpnd->getImm()) ==
4650b57cec5SDimitry Andric         std::numeric_limits<int32_t>::max())
4660b57cec5SDimitry Andric       return true;
4670b57cec5SDimitry Andric     break;
4680b57cec5SDimitry Andric   case AMDGPU::V_MAX_I32_e32:
4690b57cec5SDimitry Andric   case AMDGPU::V_MAX_I32_e64:
4700b57cec5SDimitry Andric     if (static_cast<int32_t>(OldOpnd->getImm()) ==
4710b57cec5SDimitry Andric         std::numeric_limits<int32_t>::min())
4720b57cec5SDimitry Andric       return true;
4730b57cec5SDimitry Andric     break;
4740b57cec5SDimitry Andric   case AMDGPU::V_MUL_I32_I24_e32:
4750b57cec5SDimitry Andric   case AMDGPU::V_MUL_I32_I24_e64:
4760b57cec5SDimitry Andric   case AMDGPU::V_MUL_U32_U24_e32:
4770b57cec5SDimitry Andric   case AMDGPU::V_MUL_U32_U24_e64:
4780b57cec5SDimitry Andric     if (OldOpnd->getImm() == 1)
4790b57cec5SDimitry Andric       return true;
4800b57cec5SDimitry Andric     break;
4810b57cec5SDimitry Andric   }
4820b57cec5SDimitry Andric   return false;
4830b57cec5SDimitry Andric }
4840b57cec5SDimitry Andric 
485fe6060f1SDimitry Andric MachineInstr *GCNDPPCombine::createDPPInst(
486fe6060f1SDimitry Andric     MachineInstr &OrigMI, MachineInstr &MovMI, RegSubRegPair CombOldVGPR,
487fe6060f1SDimitry Andric     MachineOperand *OldOpndValue, bool CombBCZ, bool IsShrinkable) const {
4880b57cec5SDimitry Andric   assert(CombOldVGPR.Reg);
4890b57cec5SDimitry Andric   if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
4900b57cec5SDimitry Andric     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
4910b57cec5SDimitry Andric     if (!Src1 || !Src1->isReg()) {
4920b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");
4930b57cec5SDimitry Andric       return nullptr;
4940b57cec5SDimitry Andric     }
4950b57cec5SDimitry Andric     if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
4960b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");
4970b57cec5SDimitry Andric       return nullptr;
4980b57cec5SDimitry Andric     }
4990b57cec5SDimitry Andric     CombOldVGPR = getRegSubRegPair(*Src1);
500fe6060f1SDimitry Andric     auto MovDst = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
501fe6060f1SDimitry Andric     const TargetRegisterClass *RC = MRI->getRegClass(MovDst->getReg());
502fe6060f1SDimitry Andric     if (!isOfRegClass(CombOldVGPR, *RC, *MRI)) {
503fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "  failed: src1 has wrong register class\n");
5040b57cec5SDimitry Andric       return nullptr;
5050b57cec5SDimitry Andric     }
5060b57cec5SDimitry Andric   }
507fe6060f1SDimitry Andric   return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ, IsShrinkable);
5080b57cec5SDimitry Andric }
5090b57cec5SDimitry Andric 
5100b57cec5SDimitry Andric // returns true if MI doesn't have OpndName immediate operand or the
5110b57cec5SDimitry Andric // operand has Value
5120b57cec5SDimitry Andric bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
5130b57cec5SDimitry Andric                                     int64_t Value, int64_t Mask) const {
5140b57cec5SDimitry Andric   auto *Imm = TII->getNamedOperand(MI, OpndName);
5150b57cec5SDimitry Andric   if (!Imm)
5160b57cec5SDimitry Andric     return true;
5170b57cec5SDimitry Andric 
5180b57cec5SDimitry Andric   assert(Imm->isImm());
5190b57cec5SDimitry Andric   return (Imm->getImm() & Mask) == Value;
5200b57cec5SDimitry Andric }
5210b57cec5SDimitry Andric 
5220b57cec5SDimitry Andric bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
523fe6060f1SDimitry Andric   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
52481ad6265SDimitry Andric          MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp ||
525fe6060f1SDimitry Andric          MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
5260b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
5270b57cec5SDimitry Andric 
5280b57cec5SDimitry Andric   auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
5290b57cec5SDimitry Andric   assert(DstOpnd && DstOpnd->isReg());
5300b57cec5SDimitry Andric   auto DPPMovReg = DstOpnd->getReg();
5318bcb0991SDimitry Andric   if (DPPMovReg.isPhysical()) {
5328bcb0991SDimitry Andric     LLVM_DEBUG(dbgs() << "  failed: dpp move writes physreg\n");
5338bcb0991SDimitry Andric     return false;
5348bcb0991SDimitry Andric   }
5350b57cec5SDimitry Andric   if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
5360b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
5370b57cec5SDimitry Andric                          " for all uses\n");
5380b57cec5SDimitry Andric     return false;
5390b57cec5SDimitry Andric   }
5400b57cec5SDimitry Andric 
54181ad6265SDimitry Andric   if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
54281ad6265SDimitry Andric       MovMI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
543fe6060f1SDimitry Andric     auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
544fe6060f1SDimitry Andric     assert(DppCtrl && DppCtrl->isImm());
5455f757f3fSDimitry Andric     if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl->getImm())) {
546fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "  failed: 64 bit dpp move uses unsupported"
547fe6060f1SDimitry Andric                            " control value\n");
548fe6060f1SDimitry Andric       // Let it split, then control may become legal.
549fe6060f1SDimitry Andric       return false;
550fe6060f1SDimitry Andric     }
551fe6060f1SDimitry Andric   }
552fe6060f1SDimitry Andric 
5530b57cec5SDimitry Andric   auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
5540b57cec5SDimitry Andric   assert(RowMaskOpnd && RowMaskOpnd->isImm());
5550b57cec5SDimitry Andric   auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
5560b57cec5SDimitry Andric   assert(BankMaskOpnd && BankMaskOpnd->isImm());
5570b57cec5SDimitry Andric   const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
5580b57cec5SDimitry Andric                             BankMaskOpnd->getImm() == 0xF;
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric   auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
5610b57cec5SDimitry Andric   assert(BCZOpnd && BCZOpnd->isImm());
5620b57cec5SDimitry Andric   bool BoundCtrlZero = BCZOpnd->getImm();
5630b57cec5SDimitry Andric 
5640b57cec5SDimitry Andric   auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
5658bcb0991SDimitry Andric   auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
5660b57cec5SDimitry Andric   assert(OldOpnd && OldOpnd->isReg());
5678bcb0991SDimitry Andric   assert(SrcOpnd && SrcOpnd->isReg());
5688bcb0991SDimitry Andric   if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
5698bcb0991SDimitry Andric     LLVM_DEBUG(dbgs() << "  failed: dpp move reads physreg\n");
5708bcb0991SDimitry Andric     return false;
5718bcb0991SDimitry Andric   }
5720b57cec5SDimitry Andric 
5730b57cec5SDimitry Andric   auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
5740b57cec5SDimitry Andric   // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
5750b57cec5SDimitry Andric   // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
5760b57cec5SDimitry Andric   // but the third option is used to distinguish undef from non-immediate
5770b57cec5SDimitry Andric   // to reuse IMPLICIT_DEF instruction later
5780b57cec5SDimitry Andric   assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
5790b57cec5SDimitry Andric 
5800b57cec5SDimitry Andric   bool CombBCZ = false;
5810b57cec5SDimitry Andric 
5820b57cec5SDimitry Andric   if (MaskAllLanes && BoundCtrlZero) { // [1]
5830b57cec5SDimitry Andric     CombBCZ = true;
5840b57cec5SDimitry Andric   } else {
5850b57cec5SDimitry Andric     if (!OldOpndValue || !OldOpndValue->isImm()) {
5860b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");
5870b57cec5SDimitry Andric       return false;
5880b57cec5SDimitry Andric     }
5890b57cec5SDimitry Andric 
5900b57cec5SDimitry Andric     if (OldOpndValue->getImm() == 0) {
5910b57cec5SDimitry Andric       if (MaskAllLanes) {
5920b57cec5SDimitry Andric         assert(!BoundCtrlZero); // by check [1]
5930b57cec5SDimitry Andric         CombBCZ = true;
5940b57cec5SDimitry Andric       }
5950b57cec5SDimitry Andric     } else if (BoundCtrlZero) {
5960b57cec5SDimitry Andric       assert(!MaskAllLanes); // by check [1]
5970b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() <<
5980b57cec5SDimitry Andric         "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
5990b57cec5SDimitry Andric       return false;
6000b57cec5SDimitry Andric     }
6010b57cec5SDimitry Andric   }
6020b57cec5SDimitry Andric 
6030b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "  old=";
6040b57cec5SDimitry Andric     if (!OldOpndValue)
6050b57cec5SDimitry Andric       dbgs() << "undef";
6060b57cec5SDimitry Andric     else
6070b57cec5SDimitry Andric       dbgs() << *OldOpndValue;
6080b57cec5SDimitry Andric     dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
6090b57cec5SDimitry Andric 
6100b57cec5SDimitry Andric   SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
6118bcb0991SDimitry Andric   DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
6120b57cec5SDimitry Andric   auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
6130b57cec5SDimitry Andric   // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
6140b57cec5SDimitry Andric   if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
615fe6060f1SDimitry Andric     const TargetRegisterClass *RC = MRI->getRegClass(DPPMovReg);
6160b57cec5SDimitry Andric     CombOldVGPR = RegSubRegPair(
617fe6060f1SDimitry Andric       MRI->createVirtualRegister(RC));
6180b57cec5SDimitry Andric     auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
6190b57cec5SDimitry Andric                              TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
6200b57cec5SDimitry Andric     DPPMIs.push_back(UndefInst.getInstr());
6210b57cec5SDimitry Andric   }
6220b57cec5SDimitry Andric 
6230b57cec5SDimitry Andric   OrigMIs.push_back(&MovMI);
6240b57cec5SDimitry Andric   bool Rollback = true;
6258bcb0991SDimitry Andric   SmallVector<MachineOperand*, 16> Uses;
6268bcb0991SDimitry Andric 
6270b57cec5SDimitry Andric   for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
6288bcb0991SDimitry Andric     Uses.push_back(&Use);
6298bcb0991SDimitry Andric   }
6308bcb0991SDimitry Andric 
6318bcb0991SDimitry Andric   while (!Uses.empty()) {
6328bcb0991SDimitry Andric     MachineOperand *Use = Uses.pop_back_val();
6330b57cec5SDimitry Andric     Rollback = true;
6340b57cec5SDimitry Andric 
6358bcb0991SDimitry Andric     auto &OrigMI = *Use->getParent();
6360b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
6370b57cec5SDimitry Andric 
6380b57cec5SDimitry Andric     auto OrigOp = OrigMI.getOpcode();
639bdd1243dSDimitry Andric     assert((TII->get(OrigOp).getSize() != 4 || !AMDGPU::isTrue16Inst(OrigOp)) &&
640bdd1243dSDimitry Andric            "There should not be e32 True16 instructions pre-RA");
6418bcb0991SDimitry Andric     if (OrigOp == AMDGPU::REG_SEQUENCE) {
6428bcb0991SDimitry Andric       Register FwdReg = OrigMI.getOperand(0).getReg();
6438bcb0991SDimitry Andric       unsigned FwdSubReg = 0;
6448bcb0991SDimitry Andric 
6458bcb0991SDimitry Andric       if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
6468bcb0991SDimitry Andric         LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
6478bcb0991SDimitry Andric                              " for all uses\n");
6488bcb0991SDimitry Andric         break;
6498bcb0991SDimitry Andric       }
6508bcb0991SDimitry Andric 
6518bcb0991SDimitry Andric       unsigned OpNo, E = OrigMI.getNumOperands();
6528bcb0991SDimitry Andric       for (OpNo = 1; OpNo < E; OpNo += 2) {
6538bcb0991SDimitry Andric         if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
6548bcb0991SDimitry Andric           FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
6558bcb0991SDimitry Andric           break;
6568bcb0991SDimitry Andric         }
6578bcb0991SDimitry Andric       }
6588bcb0991SDimitry Andric 
6598bcb0991SDimitry Andric       if (!FwdSubReg)
6608bcb0991SDimitry Andric         break;
6618bcb0991SDimitry Andric 
6628bcb0991SDimitry Andric       for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
6638bcb0991SDimitry Andric         if (Op.getSubReg() == FwdSubReg)
6648bcb0991SDimitry Andric           Uses.push_back(&Op);
6658bcb0991SDimitry Andric       }
6668bcb0991SDimitry Andric       RegSeqWithOpNos[&OrigMI].push_back(OpNo);
6678bcb0991SDimitry Andric       continue;
6688bcb0991SDimitry Andric     }
6698bcb0991SDimitry Andric 
670fe6060f1SDimitry Andric     bool IsShrinkable = isShrinkable(OrigMI);
671753f127fSDimitry Andric     if (!(IsShrinkable ||
672753f127fSDimitry Andric           ((TII->isVOP3P(OrigOp) || TII->isVOPC(OrigOp) ||
673753f127fSDimitry Andric             TII->isVOP3(OrigOp)) &&
674753f127fSDimitry Andric            ST->hasVOP3DPP()) ||
675753f127fSDimitry Andric           TII->isVOP1(OrigOp) || TII->isVOP2(OrigOp))) {
676753f127fSDimitry Andric       LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3/3P/C\n");
677753f127fSDimitry Andric       break;
678753f127fSDimitry Andric     }
679753f127fSDimitry Andric     if (OrigMI.modifiesRegister(AMDGPU::EXEC, ST->getRegisterInfo())) {
680753f127fSDimitry Andric       LLVM_DEBUG(dbgs() << "  failed: can't combine v_cmpx\n");
6810b57cec5SDimitry Andric       break;
6820b57cec5SDimitry Andric     }
6830b57cec5SDimitry Andric 
6845ffd83dbSDimitry Andric     auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
6855ffd83dbSDimitry Andric     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
6865ffd83dbSDimitry Andric     if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1]
6875ffd83dbSDimitry Andric       LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");
6885ffd83dbSDimitry Andric       break;
6895ffd83dbSDimitry Andric     }
6905ffd83dbSDimitry Andric 
691753f127fSDimitry Andric     auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2);
6925ffd83dbSDimitry Andric     assert(Src0 && "Src1 without Src0?");
693753f127fSDimitry Andric     if ((Use == Src0 && ((Src1 && Src1->isIdenticalTo(*Src0)) ||
694753f127fSDimitry Andric                          (Src2 && Src2->isIdenticalTo(*Src0)))) ||
695753f127fSDimitry Andric         (Use == Src1 && (Src1->isIdenticalTo(*Src0) ||
696753f127fSDimitry Andric                          (Src2 && Src2->isIdenticalTo(*Src1))))) {
6975ffd83dbSDimitry Andric       LLVM_DEBUG(
6985ffd83dbSDimitry Andric           dbgs()
6995ffd83dbSDimitry Andric           << "  " << OrigMI
7005ffd83dbSDimitry Andric           << "  failed: DPP register is used more than once per instruction\n");
7015ffd83dbSDimitry Andric       break;
7025ffd83dbSDimitry Andric     }
7035ffd83dbSDimitry Andric 
7040b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
7055ffd83dbSDimitry Andric     if (Use == Src0) {
7060b57cec5SDimitry Andric       if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
707fe6060f1SDimitry Andric                                         OldOpndValue, CombBCZ, IsShrinkable)) {
7080b57cec5SDimitry Andric         DPPMIs.push_back(DPPInst);
7090b57cec5SDimitry Andric         Rollback = false;
7100b57cec5SDimitry Andric       }
7115ffd83dbSDimitry Andric     } else {
7125ffd83dbSDimitry Andric       assert(Use == Src1 && OrigMI.isCommutable()); // by check [1]
7130b57cec5SDimitry Andric       auto *BB = OrigMI.getParent();
7140b57cec5SDimitry Andric       auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
7150b57cec5SDimitry Andric       BB->insert(OrigMI, NewMI);
7160b57cec5SDimitry Andric       if (TII->commuteInstruction(*NewMI)) {
7170b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
718fe6060f1SDimitry Andric         if (auto *DPPInst =
719fe6060f1SDimitry Andric                 createDPPInst(*NewMI, MovMI, CombOldVGPR, OldOpndValue, CombBCZ,
720fe6060f1SDimitry Andric                               IsShrinkable)) {
7210b57cec5SDimitry Andric           DPPMIs.push_back(DPPInst);
7220b57cec5SDimitry Andric           Rollback = false;
7230b57cec5SDimitry Andric         }
7240b57cec5SDimitry Andric       } else
7250b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");
7260b57cec5SDimitry Andric       NewMI->eraseFromParent();
7275ffd83dbSDimitry Andric     }
7280b57cec5SDimitry Andric     if (Rollback)
7290b57cec5SDimitry Andric       break;
7300b57cec5SDimitry Andric     OrigMIs.push_back(&OrigMI);
7310b57cec5SDimitry Andric   }
7320b57cec5SDimitry Andric 
7338bcb0991SDimitry Andric   Rollback |= !Uses.empty();
7348bcb0991SDimitry Andric 
7350b57cec5SDimitry Andric   for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
7360b57cec5SDimitry Andric     MI->eraseFromParent();
7370b57cec5SDimitry Andric 
7388bcb0991SDimitry Andric   if (!Rollback) {
7398bcb0991SDimitry Andric     for (auto &S : RegSeqWithOpNos) {
7408bcb0991SDimitry Andric       if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
7418bcb0991SDimitry Andric         S.first->eraseFromParent();
7428bcb0991SDimitry Andric         continue;
7438bcb0991SDimitry Andric       }
7448bcb0991SDimitry Andric       while (!S.second.empty())
745bdd1243dSDimitry Andric         S.first->getOperand(S.second.pop_back_val()).setIsUndef();
7468bcb0991SDimitry Andric     }
7478bcb0991SDimitry Andric   }
7488bcb0991SDimitry Andric 
7490b57cec5SDimitry Andric   return !Rollback;
7500b57cec5SDimitry Andric }
7510b57cec5SDimitry Andric 
7520b57cec5SDimitry Andric bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
753fe6060f1SDimitry Andric   ST = &MF.getSubtarget<GCNSubtarget>();
754fe6060f1SDimitry Andric   if (!ST->hasDPP() || skipFunction(MF.getFunction()))
7550b57cec5SDimitry Andric     return false;
7560b57cec5SDimitry Andric 
7570b57cec5SDimitry Andric   MRI = &MF.getRegInfo();
758fe6060f1SDimitry Andric   TII = ST->getInstrInfo();
7590b57cec5SDimitry Andric 
7600b57cec5SDimitry Andric   bool Changed = false;
7610b57cec5SDimitry Andric   for (auto &MBB : MF) {
762349cc55cSDimitry Andric     for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
7630b57cec5SDimitry Andric       if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
7640b57cec5SDimitry Andric         Changed = true;
7650b57cec5SDimitry Andric         ++NumDPPMovsCombined;
76681ad6265SDimitry Andric       } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO ||
76781ad6265SDimitry Andric                  MI.getOpcode() == AMDGPU::V_MOV_B64_dpp) {
7685f757f3fSDimitry Andric         if (ST->hasDPALU_DPP() && combineDPPMov(MI)) {
769fe6060f1SDimitry Andric           Changed = true;
770fe6060f1SDimitry Andric           ++NumDPPMovsCombined;
771fe6060f1SDimitry Andric         } else {
7728bcb0991SDimitry Andric           auto Split = TII->expandMovDPP64(MI);
773bdd1243dSDimitry Andric           for (auto *M : {Split.first, Split.second}) {
774fe6060f1SDimitry Andric             if (M && combineDPPMov(*M))
7758bcb0991SDimitry Andric               ++NumDPPMovsCombined;
7768bcb0991SDimitry Andric           }
7778bcb0991SDimitry Andric           Changed = true;
7780b57cec5SDimitry Andric         }
7790b57cec5SDimitry Andric       }
7800b57cec5SDimitry Andric     }
781fe6060f1SDimitry Andric   }
7820b57cec5SDimitry Andric   return Changed;
7830b57cec5SDimitry Andric }
784