xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric /// \file
90b57cec5SDimitry Andric /// This file implements the targeting of the InstructionSelector class for
100b57cec5SDimitry Andric /// AMDGPU.
110b57cec5SDimitry Andric /// \todo This should be generated by TableGen.
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "AMDGPUInstructionSelector.h"
15e8d8bef9SDimitry Andric #include "AMDGPU.h"
16480093f4SDimitry Andric #include "AMDGPUGlobalISelUtils.h"
17e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h"
180b57cec5SDimitry Andric #include "AMDGPURegisterBankInfo.h"
190b57cec5SDimitry Andric #include "AMDGPUTargetMachine.h"
200b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
21fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
2206c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
238bcb0991SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
245f757f3fSDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2781ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
28e8d8bef9SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
29349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
30bdd1243dSDimitry Andric #include <optional>
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-isel"
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric using namespace llvm;
350b57cec5SDimitry Andric using namespace MIPatternMatch;
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric #define GET_GLOBALISEL_IMPL
380b57cec5SDimitry Andric #define AMDGPUSubtarget GCNSubtarget
390b57cec5SDimitry Andric #include "AMDGPUGenGlobalISel.inc"
400b57cec5SDimitry Andric #undef GET_GLOBALISEL_IMPL
410b57cec5SDimitry Andric #undef AMDGPUSubtarget
420b57cec5SDimitry Andric 
430b57cec5SDimitry Andric AMDGPUInstructionSelector::AMDGPUInstructionSelector(
440b57cec5SDimitry Andric     const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI,
450b57cec5SDimitry Andric     const AMDGPUTargetMachine &TM)
4604eeddc0SDimitry Andric     : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
470b57cec5SDimitry Andric       STI(STI),
480b57cec5SDimitry Andric       EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG),
490b57cec5SDimitry Andric #define GET_GLOBALISEL_PREDICATES_INIT
500b57cec5SDimitry Andric #include "AMDGPUGenGlobalISel.inc"
510b57cec5SDimitry Andric #undef GET_GLOBALISEL_PREDICATES_INIT
520b57cec5SDimitry Andric #define GET_GLOBALISEL_TEMPORARIES_INIT
530b57cec5SDimitry Andric #include "AMDGPUGenGlobalISel.inc"
540b57cec5SDimitry Andric #undef GET_GLOBALISEL_TEMPORARIES_INIT
550b57cec5SDimitry Andric {
560b57cec5SDimitry Andric }
570b57cec5SDimitry Andric 
580b57cec5SDimitry Andric const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
590b57cec5SDimitry Andric 
60fe6060f1SDimitry Andric void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB,
6106c3fb27SDimitry Andric                                         CodeGenCoverage *CoverageInfo,
62fe6060f1SDimitry Andric                                         ProfileSummaryInfo *PSI,
63fe6060f1SDimitry Andric                                         BlockFrequencyInfo *BFI) {
648bcb0991SDimitry Andric   MRI = &MF.getRegInfo();
65e8d8bef9SDimitry Andric   Subtarget = &MF.getSubtarget<GCNSubtarget>();
66*0fca6ea1SDimitry Andric   Subtarget->checkSubtargetFeatures(MF.getFunction());
67fe6060f1SDimitry Andric   InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
688bcb0991SDimitry Andric }
698bcb0991SDimitry Andric 
705f757f3fSDimitry Andric // Return the wave level SGPR base address if this is a wave address.
715f757f3fSDimitry Andric static Register getWaveAddress(const MachineInstr *Def) {
725f757f3fSDimitry Andric   return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
735f757f3fSDimitry Andric              ? Def->getOperand(1).getReg()
745f757f3fSDimitry Andric              : Register();
755f757f3fSDimitry Andric }
765f757f3fSDimitry Andric 
770b57cec5SDimitry Andric bool AMDGPUInstructionSelector::isVCC(Register Reg,
780b57cec5SDimitry Andric                                       const MachineRegisterInfo &MRI) const {
79e8d8bef9SDimitry Andric   // The verifier is oblivious to s1 being a valid value for wavesize registers.
80e8d8bef9SDimitry Andric   if (Reg.isPhysical())
81e8d8bef9SDimitry Andric     return false;
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric   auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
840b57cec5SDimitry Andric   const TargetRegisterClass *RC =
850b57cec5SDimitry Andric       RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
860b57cec5SDimitry Andric   if (RC) {
870b57cec5SDimitry Andric     const LLT Ty = MRI.getType(Reg);
8881ad6265SDimitry Andric     if (!Ty.isValid() || Ty.getSizeInBits() != 1)
8981ad6265SDimitry Andric       return false;
9081ad6265SDimitry Andric     // G_TRUNC s1 result is never vcc.
9181ad6265SDimitry Andric     return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
9281ad6265SDimitry Andric            RC->hasSuperClassEq(TRI.getBoolRC());
930b57cec5SDimitry Andric   }
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric   const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
960b57cec5SDimitry Andric   return RB->getID() == AMDGPU::VCCRegBankID;
970b57cec5SDimitry Andric }
980b57cec5SDimitry Andric 
995ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI,
1005ffd83dbSDimitry Andric                                                         unsigned NewOpc) const {
1015ffd83dbSDimitry Andric   MI.setDesc(TII.get(NewOpc));
10281ad6265SDimitry Andric   MI.removeOperand(1); // Remove intrinsic ID.
1035ffd83dbSDimitry Andric   MI.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
1045ffd83dbSDimitry Andric 
1055ffd83dbSDimitry Andric   MachineOperand &Dst = MI.getOperand(0);
1065ffd83dbSDimitry Andric   MachineOperand &Src = MI.getOperand(1);
1075ffd83dbSDimitry Andric 
1085ffd83dbSDimitry Andric   // TODO: This should be legalized to s32 if needed
1095ffd83dbSDimitry Andric   if (MRI->getType(Dst.getReg()) == LLT::scalar(1))
1105ffd83dbSDimitry Andric     return false;
1115ffd83dbSDimitry Andric 
1125ffd83dbSDimitry Andric   const TargetRegisterClass *DstRC
1135ffd83dbSDimitry Andric     = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1145ffd83dbSDimitry Andric   const TargetRegisterClass *SrcRC
1155ffd83dbSDimitry Andric     = TRI.getConstrainedRegClassForOperand(Src, *MRI);
1165ffd83dbSDimitry Andric   if (!DstRC || DstRC != SrcRC)
1175ffd83dbSDimitry Andric     return false;
1185ffd83dbSDimitry Andric 
1195ffd83dbSDimitry Andric   return RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) &&
1205ffd83dbSDimitry Andric          RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
1215ffd83dbSDimitry Andric }
1225ffd83dbSDimitry Andric 
1230b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
1240b57cec5SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
1250b57cec5SDimitry Andric   MachineBasicBlock *BB = I.getParent();
1260b57cec5SDimitry Andric   I.setDesc(TII.get(TargetOpcode::COPY));
1270b57cec5SDimitry Andric 
1280b57cec5SDimitry Andric   const MachineOperand &Src = I.getOperand(1);
1290b57cec5SDimitry Andric   MachineOperand &Dst = I.getOperand(0);
1300b57cec5SDimitry Andric   Register DstReg = Dst.getReg();
1310b57cec5SDimitry Andric   Register SrcReg = Src.getReg();
1320b57cec5SDimitry Andric 
1338bcb0991SDimitry Andric   if (isVCC(DstReg, *MRI)) {
1340b57cec5SDimitry Andric     if (SrcReg == AMDGPU::SCC) {
1350b57cec5SDimitry Andric       const TargetRegisterClass *RC
1368bcb0991SDimitry Andric         = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1370b57cec5SDimitry Andric       if (!RC)
1380b57cec5SDimitry Andric         return true;
1398bcb0991SDimitry Andric       return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
1400b57cec5SDimitry Andric     }
1410b57cec5SDimitry Andric 
1428bcb0991SDimitry Andric     if (!isVCC(SrcReg, *MRI)) {
1430b57cec5SDimitry Andric       // TODO: Should probably leave the copy and let copyPhysReg expand it.
1448bcb0991SDimitry Andric       if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
1450b57cec5SDimitry Andric         return false;
1460b57cec5SDimitry Andric 
147480093f4SDimitry Andric       const TargetRegisterClass *SrcRC
148480093f4SDimitry Andric         = TRI.getConstrainedRegClassForOperand(Src, *MRI);
149480093f4SDimitry Andric 
150bdd1243dSDimitry Andric       std::optional<ValueAndVReg> ConstVal =
151349cc55cSDimitry Andric           getIConstantVRegValWithLookThrough(SrcReg, *MRI, true);
152fe6060f1SDimitry Andric       if (ConstVal) {
153fe6060f1SDimitry Andric         unsigned MovOpc =
154fe6060f1SDimitry Andric             STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
155fe6060f1SDimitry Andric         BuildMI(*BB, &I, DL, TII.get(MovOpc), DstReg)
156fe6060f1SDimitry Andric             .addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
157fe6060f1SDimitry Andric       } else {
158480093f4SDimitry Andric         Register MaskedReg = MRI->createVirtualRegister(SrcRC);
159480093f4SDimitry Andric 
160480093f4SDimitry Andric         // We can't trust the high bits at this point, so clear them.
161480093f4SDimitry Andric 
162480093f4SDimitry Andric         // TODO: Skip masking high bits if def is known boolean.
163480093f4SDimitry Andric 
1645f757f3fSDimitry Andric         bool IsSGPR = TRI.isSGPRClass(SrcRC);
165fe6060f1SDimitry Andric         unsigned AndOpc =
1665f757f3fSDimitry Andric             IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
1675f757f3fSDimitry Andric         auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
168480093f4SDimitry Andric             .addImm(1)
169480093f4SDimitry Andric             .addReg(SrcReg);
1705f757f3fSDimitry Andric         if (IsSGPR)
1715f757f3fSDimitry Andric           And.setOperandDead(3); // Dead scc
1725f757f3fSDimitry Andric 
1730b57cec5SDimitry Andric         BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
1740b57cec5SDimitry Andric             .addImm(0)
175480093f4SDimitry Andric             .addReg(MaskedReg);
176fe6060f1SDimitry Andric       }
1770b57cec5SDimitry Andric 
1788bcb0991SDimitry Andric       if (!MRI->getRegClassOrNull(SrcReg))
179480093f4SDimitry Andric         MRI->setRegClass(SrcReg, SrcRC);
1800b57cec5SDimitry Andric       I.eraseFromParent();
1810b57cec5SDimitry Andric       return true;
1820b57cec5SDimitry Andric     }
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric     const TargetRegisterClass *RC =
1858bcb0991SDimitry Andric       TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1868bcb0991SDimitry Andric     if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1870b57cec5SDimitry Andric       return false;
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric     return true;
1900b57cec5SDimitry Andric   }
1910b57cec5SDimitry Andric 
1920b57cec5SDimitry Andric   for (const MachineOperand &MO : I.operands()) {
193e8d8bef9SDimitry Andric     if (MO.getReg().isPhysical())
1940b57cec5SDimitry Andric       continue;
1950b57cec5SDimitry Andric 
1960b57cec5SDimitry Andric     const TargetRegisterClass *RC =
1978bcb0991SDimitry Andric             TRI.getConstrainedRegClassForOperand(MO, *MRI);
1980b57cec5SDimitry Andric     if (!RC)
1990b57cec5SDimitry Andric       continue;
2008bcb0991SDimitry Andric     RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
2010b57cec5SDimitry Andric   }
2020b57cec5SDimitry Andric   return true;
2030b57cec5SDimitry Andric }
2040b57cec5SDimitry Andric 
2050b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
2060b57cec5SDimitry Andric   const Register DefReg = I.getOperand(0).getReg();
2078bcb0991SDimitry Andric   const LLT DefTy = MRI->getType(DefReg);
2085ffd83dbSDimitry Andric 
209*0fca6ea1SDimitry Andric   // S1 G_PHIs should not be selected in instruction-select, instead:
210*0fca6ea1SDimitry Andric   // - divergent S1 G_PHI should go through lane mask merging algorithm
211*0fca6ea1SDimitry Andric   //   and be fully inst-selected in AMDGPUGlobalISelDivergenceLowering
212*0fca6ea1SDimitry Andric   // - uniform S1 G_PHI should be lowered into S32 G_PHI in AMDGPURegBankSelect
213*0fca6ea1SDimitry Andric   if (DefTy == LLT::scalar(1))
214*0fca6ea1SDimitry Andric     return false;
2150b57cec5SDimitry Andric 
2160b57cec5SDimitry Andric   // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy)
2170b57cec5SDimitry Andric 
2180b57cec5SDimitry Andric   const RegClassOrRegBank &RegClassOrBank =
2198bcb0991SDimitry Andric     MRI->getRegClassOrRegBank(DefReg);
2200b57cec5SDimitry Andric 
2210b57cec5SDimitry Andric   const TargetRegisterClass *DefRC
2220b57cec5SDimitry Andric     = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2230b57cec5SDimitry Andric   if (!DefRC) {
2240b57cec5SDimitry Andric     if (!DefTy.isValid()) {
2250b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2260b57cec5SDimitry Andric       return false;
2270b57cec5SDimitry Andric     }
2280b57cec5SDimitry Andric 
2290b57cec5SDimitry Andric     const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
23081ad6265SDimitry Andric     DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
2310b57cec5SDimitry Andric     if (!DefRC) {
2320b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2330b57cec5SDimitry Andric       return false;
2340b57cec5SDimitry Andric     }
2350b57cec5SDimitry Andric   }
2360b57cec5SDimitry Andric 
237480093f4SDimitry Andric   // TODO: Verify that all registers have the same bank
2380b57cec5SDimitry Andric   I.setDesc(TII.get(TargetOpcode::PHI));
2398bcb0991SDimitry Andric   return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
2400b57cec5SDimitry Andric }
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric MachineOperand
2430b57cec5SDimitry Andric AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
2440b57cec5SDimitry Andric                                            const TargetRegisterClass &SubRC,
2450b57cec5SDimitry Andric                                            unsigned SubIdx) const {
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric   MachineInstr *MI = MO.getParent();
2480b57cec5SDimitry Andric   MachineBasicBlock *BB = MO.getParent()->getParent();
2498bcb0991SDimitry Andric   Register DstReg = MRI->createVirtualRegister(&SubRC);
2500b57cec5SDimitry Andric 
2510b57cec5SDimitry Andric   if (MO.isReg()) {
2520b57cec5SDimitry Andric     unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
2538bcb0991SDimitry Andric     Register Reg = MO.getReg();
2540b57cec5SDimitry Andric     BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
2550b57cec5SDimitry Andric             .addReg(Reg, 0, ComposedSubIdx);
2560b57cec5SDimitry Andric 
2570b57cec5SDimitry Andric     return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
2580b57cec5SDimitry Andric                                      MO.isKill(), MO.isDead(), MO.isUndef(),
2590b57cec5SDimitry Andric                                      MO.isEarlyClobber(), 0, MO.isDebug(),
2600b57cec5SDimitry Andric                                      MO.isInternalRead());
2610b57cec5SDimitry Andric   }
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric   assert(MO.isImm());
2640b57cec5SDimitry Andric 
2650b57cec5SDimitry Andric   APInt Imm(64, MO.getImm());
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric   switch (SubIdx) {
2680b57cec5SDimitry Andric   default:
2690b57cec5SDimitry Andric     llvm_unreachable("do not know to split immediate with this sub index.");
2700b57cec5SDimitry Andric   case AMDGPU::sub0:
2710b57cec5SDimitry Andric     return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
2720b57cec5SDimitry Andric   case AMDGPU::sub1:
2730b57cec5SDimitry Andric     return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
2740b57cec5SDimitry Andric   }
2750b57cec5SDimitry Andric }
2760b57cec5SDimitry Andric 
2770b57cec5SDimitry Andric static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
2780b57cec5SDimitry Andric   switch (Opc) {
2790b57cec5SDimitry Andric   case AMDGPU::G_AND:
2800b57cec5SDimitry Andric     return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2810b57cec5SDimitry Andric   case AMDGPU::G_OR:
2820b57cec5SDimitry Andric     return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
2830b57cec5SDimitry Andric   case AMDGPU::G_XOR:
2840b57cec5SDimitry Andric     return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
2850b57cec5SDimitry Andric   default:
2860b57cec5SDimitry Andric     llvm_unreachable("not a bit op");
2870b57cec5SDimitry Andric   }
2880b57cec5SDimitry Andric }
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
291e8d8bef9SDimitry Andric   Register DstReg = I.getOperand(0).getReg();
2928bcb0991SDimitry Andric   unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2930b57cec5SDimitry Andric 
2948bcb0991SDimitry Andric   const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
295e8d8bef9SDimitry Andric   if (DstRB->getID() != AMDGPU::SGPRRegBankID &&
296e8d8bef9SDimitry Andric       DstRB->getID() != AMDGPU::VCCRegBankID)
297e8d8bef9SDimitry Andric     return false;
2980b57cec5SDimitry Andric 
299e8d8bef9SDimitry Andric   bool Is64 = Size > 32 || (DstRB->getID() == AMDGPU::VCCRegBankID &&
300e8d8bef9SDimitry Andric                             STI.isWave64());
301e8d8bef9SDimitry Andric   I.setDesc(TII.get(getLogicalBitOpcode(I.getOpcode(), Is64)));
3020b57cec5SDimitry Andric 
303480093f4SDimitry Andric   // Dead implicit-def of scc
304480093f4SDimitry Andric   I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
305480093f4SDimitry Andric                                          true, // isImp
306480093f4SDimitry Andric                                          false, // isKill
307480093f4SDimitry Andric                                          true)); // isDead
3088bcb0991SDimitry Andric   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3090b57cec5SDimitry Andric }
3100b57cec5SDimitry Andric 
3110b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
3120b57cec5SDimitry Andric   MachineBasicBlock *BB = I.getParent();
3130b57cec5SDimitry Andric   MachineFunction *MF = BB->getParent();
3140b57cec5SDimitry Andric   Register DstReg = I.getOperand(0).getReg();
3150b57cec5SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
3165ffd83dbSDimitry Andric   LLT Ty = MRI->getType(DstReg);
3175ffd83dbSDimitry Andric   if (Ty.isVector())
3185ffd83dbSDimitry Andric     return false;
3195ffd83dbSDimitry Andric 
3205ffd83dbSDimitry Andric   unsigned Size = Ty.getSizeInBits();
3218bcb0991SDimitry Andric   const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3220b57cec5SDimitry Andric   const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
3230b57cec5SDimitry Andric   const bool Sub = I.getOpcode() == TargetOpcode::G_SUB;
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric   if (Size == 32) {
3260b57cec5SDimitry Andric     if (IsSALU) {
3270b57cec5SDimitry Andric       const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
3280b57cec5SDimitry Andric       MachineInstr *Add =
3290b57cec5SDimitry Andric         BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
3300b57cec5SDimitry Andric         .add(I.getOperand(1))
3315f757f3fSDimitry Andric         .add(I.getOperand(2))
3325f757f3fSDimitry Andric         .setOperandDead(3); // Dead scc
3330b57cec5SDimitry Andric       I.eraseFromParent();
3340b57cec5SDimitry Andric       return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
3350b57cec5SDimitry Andric     }
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric     if (STI.hasAddNoCarry()) {
3380b57cec5SDimitry Andric       const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
3390b57cec5SDimitry Andric       I.setDesc(TII.get(Opc));
3400b57cec5SDimitry Andric       I.addOperand(*MF, MachineOperand::CreateImm(0));
3410b57cec5SDimitry Andric       I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
3420b57cec5SDimitry Andric       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3430b57cec5SDimitry Andric     }
3440b57cec5SDimitry Andric 
345e8d8bef9SDimitry Andric     const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
3460b57cec5SDimitry Andric 
3478bcb0991SDimitry Andric     Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
3480b57cec5SDimitry Andric     MachineInstr *Add
3490b57cec5SDimitry Andric       = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg)
3500b57cec5SDimitry Andric       .addDef(UnusedCarry, RegState::Dead)
3510b57cec5SDimitry Andric       .add(I.getOperand(1))
3520b57cec5SDimitry Andric       .add(I.getOperand(2))
3530b57cec5SDimitry Andric       .addImm(0);
3540b57cec5SDimitry Andric     I.eraseFromParent();
3550b57cec5SDimitry Andric     return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
3560b57cec5SDimitry Andric   }
3570b57cec5SDimitry Andric 
3580b57cec5SDimitry Andric   assert(!Sub && "illegal sub should not reach here");
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric   const TargetRegisterClass &RC
3610b57cec5SDimitry Andric     = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
3620b57cec5SDimitry Andric   const TargetRegisterClass &HalfRC
3630b57cec5SDimitry Andric     = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
3640b57cec5SDimitry Andric 
3650b57cec5SDimitry Andric   MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
3660b57cec5SDimitry Andric   MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
3670b57cec5SDimitry Andric   MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
3680b57cec5SDimitry Andric   MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
3690b57cec5SDimitry Andric 
3708bcb0991SDimitry Andric   Register DstLo = MRI->createVirtualRegister(&HalfRC);
3718bcb0991SDimitry Andric   Register DstHi = MRI->createVirtualRegister(&HalfRC);
3720b57cec5SDimitry Andric 
3730b57cec5SDimitry Andric   if (IsSALU) {
3740b57cec5SDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
3750b57cec5SDimitry Andric       .add(Lo1)
3760b57cec5SDimitry Andric       .add(Lo2);
3770b57cec5SDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
3780b57cec5SDimitry Andric       .add(Hi1)
3795f757f3fSDimitry Andric       .add(Hi2)
3805f757f3fSDimitry Andric       .setOperandDead(3); // Dead scc
3810b57cec5SDimitry Andric   } else {
3820b57cec5SDimitry Andric     const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
3838bcb0991SDimitry Andric     Register CarryReg = MRI->createVirtualRegister(CarryRC);
384e8d8bef9SDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
3850b57cec5SDimitry Andric       .addDef(CarryReg)
3860b57cec5SDimitry Andric       .add(Lo1)
3870b57cec5SDimitry Andric       .add(Lo2)
3880b57cec5SDimitry Andric       .addImm(0);
3890b57cec5SDimitry Andric     MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
3908bcb0991SDimitry Andric       .addDef(MRI->createVirtualRegister(CarryRC), RegState::Dead)
3910b57cec5SDimitry Andric       .add(Hi1)
3920b57cec5SDimitry Andric       .add(Hi2)
3930b57cec5SDimitry Andric       .addReg(CarryReg, RegState::Kill)
3940b57cec5SDimitry Andric       .addImm(0);
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric     if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI))
3970b57cec5SDimitry Andric       return false;
3980b57cec5SDimitry Andric   }
3990b57cec5SDimitry Andric 
4000b57cec5SDimitry Andric   BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
4010b57cec5SDimitry Andric     .addReg(DstLo)
4020b57cec5SDimitry Andric     .addImm(AMDGPU::sub0)
4030b57cec5SDimitry Andric     .addReg(DstHi)
4040b57cec5SDimitry Andric     .addImm(AMDGPU::sub1);
4050b57cec5SDimitry Andric 
4060b57cec5SDimitry Andric 
4078bcb0991SDimitry Andric   if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
4088bcb0991SDimitry Andric     return false;
4098bcb0991SDimitry Andric 
4108bcb0991SDimitry Andric   I.eraseFromParent();
4118bcb0991SDimitry Andric   return true;
4128bcb0991SDimitry Andric }
4138bcb0991SDimitry Andric 
414480093f4SDimitry Andric bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
415480093f4SDimitry Andric   MachineInstr &I) const {
4168bcb0991SDimitry Andric   MachineBasicBlock *BB = I.getParent();
4178bcb0991SDimitry Andric   MachineFunction *MF = BB->getParent();
4188bcb0991SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
4198bcb0991SDimitry Andric   Register Dst0Reg = I.getOperand(0).getReg();
4208bcb0991SDimitry Andric   Register Dst1Reg = I.getOperand(1).getReg();
421480093f4SDimitry Andric   const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO ||
422480093f4SDimitry Andric                      I.getOpcode() == AMDGPU::G_UADDE;
423480093f4SDimitry Andric   const bool HasCarryIn = I.getOpcode() == AMDGPU::G_UADDE ||
424480093f4SDimitry Andric                           I.getOpcode() == AMDGPU::G_USUBE;
4258bcb0991SDimitry Andric 
426480093f4SDimitry Andric   if (isVCC(Dst1Reg, *MRI)) {
427e8d8bef9SDimitry Andric     unsigned NoCarryOpc =
428e8d8bef9SDimitry Andric         IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
429480093f4SDimitry Andric     unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
430480093f4SDimitry Andric     I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
4318bcb0991SDimitry Andric     I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
4328bcb0991SDimitry Andric     I.addOperand(*MF, MachineOperand::CreateImm(0));
4338bcb0991SDimitry Andric     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
4348bcb0991SDimitry Andric   }
4358bcb0991SDimitry Andric 
4368bcb0991SDimitry Andric   Register Src0Reg = I.getOperand(2).getReg();
4378bcb0991SDimitry Andric   Register Src1Reg = I.getOperand(3).getReg();
438480093f4SDimitry Andric 
439480093f4SDimitry Andric   if (HasCarryIn) {
440480093f4SDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
441480093f4SDimitry Andric       .addReg(I.getOperand(4).getReg());
442480093f4SDimitry Andric   }
443480093f4SDimitry Andric 
444480093f4SDimitry Andric   unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
445480093f4SDimitry Andric   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
446480093f4SDimitry Andric 
4475f757f3fSDimitry Andric   auto CarryInst = BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
4488bcb0991SDimitry Andric     .add(I.getOperand(2))
4498bcb0991SDimitry Andric     .add(I.getOperand(3));
4505f757f3fSDimitry Andric 
4515f757f3fSDimitry Andric   if (MRI->use_nodbg_empty(Dst1Reg)) {
4525f757f3fSDimitry Andric     CarryInst.setOperandDead(3); // Dead scc
4535f757f3fSDimitry Andric   } else {
4548bcb0991SDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg)
4558bcb0991SDimitry Andric       .addReg(AMDGPU::SCC);
456480093f4SDimitry Andric     if (!MRI->getRegClassOrNull(Dst1Reg))
457480093f4SDimitry Andric       MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
4585f757f3fSDimitry Andric   }
4598bcb0991SDimitry Andric 
460480093f4SDimitry Andric   if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
461480093f4SDimitry Andric       !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
462480093f4SDimitry Andric       !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
463480093f4SDimitry Andric     return false;
464480093f4SDimitry Andric 
465480093f4SDimitry Andric   if (HasCarryIn &&
466480093f4SDimitry Andric       !RBI.constrainGenericRegister(I.getOperand(4).getReg(),
467480093f4SDimitry Andric                                     AMDGPU::SReg_32RegClass, *MRI))
4680b57cec5SDimitry Andric     return false;
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric   I.eraseFromParent();
4710b57cec5SDimitry Andric   return true;
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric 
47481ad6265SDimitry Andric bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
47581ad6265SDimitry Andric     MachineInstr &I) const {
47681ad6265SDimitry Andric   MachineBasicBlock *BB = I.getParent();
47781ad6265SDimitry Andric   MachineFunction *MF = BB->getParent();
47881ad6265SDimitry Andric   const bool IsUnsigned = I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
47981ad6265SDimitry Andric 
48081ad6265SDimitry Andric   unsigned Opc;
481bdd1243dSDimitry Andric   if (Subtarget->hasMADIntraFwdBug())
48281ad6265SDimitry Andric     Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
48381ad6265SDimitry Andric                      : AMDGPU::V_MAD_I64_I32_gfx11_e64;
48481ad6265SDimitry Andric   else
48581ad6265SDimitry Andric     Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
48681ad6265SDimitry Andric   I.setDesc(TII.get(Opc));
48781ad6265SDimitry Andric   I.addOperand(*MF, MachineOperand::CreateImm(0));
48881ad6265SDimitry Andric   I.addImplicitDefUseOperands(*MF);
48981ad6265SDimitry Andric   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
49081ad6265SDimitry Andric }
49181ad6265SDimitry Andric 
4925ffd83dbSDimitry Andric // TODO: We should probably legalize these to only using 32-bit results.
4930b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
4940b57cec5SDimitry Andric   MachineBasicBlock *BB = I.getParent();
495480093f4SDimitry Andric   Register DstReg = I.getOperand(0).getReg();
496480093f4SDimitry Andric   Register SrcReg = I.getOperand(1).getReg();
497480093f4SDimitry Andric   LLT DstTy = MRI->getType(DstReg);
498480093f4SDimitry Andric   LLT SrcTy = MRI->getType(SrcReg);
499480093f4SDimitry Andric   const unsigned SrcSize = SrcTy.getSizeInBits();
5005ffd83dbSDimitry Andric   unsigned DstSize = DstTy.getSizeInBits();
501480093f4SDimitry Andric 
502480093f4SDimitry Andric   // TODO: Should handle any multiple of 32 offset.
5038bcb0991SDimitry Andric   unsigned Offset = I.getOperand(2).getImm();
5045ffd83dbSDimitry Andric   if (Offset % 32 != 0 || DstSize > 128)
5055ffd83dbSDimitry Andric     return false;
5065ffd83dbSDimitry Andric 
5075ffd83dbSDimitry Andric   // 16-bit operations really use 32-bit registers.
5085ffd83dbSDimitry Andric   // FIXME: Probably should not allow 16-bit G_EXTRACT results.
5095ffd83dbSDimitry Andric   if (DstSize == 16)
5105ffd83dbSDimitry Andric     DstSize = 32;
5115ffd83dbSDimitry Andric 
5125ffd83dbSDimitry Andric   const TargetRegisterClass *DstRC =
5135ffd83dbSDimitry Andric     TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI);
5145ffd83dbSDimitry Andric   if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
5158bcb0991SDimitry Andric     return false;
5168bcb0991SDimitry Andric 
517480093f4SDimitry Andric   const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
518480093f4SDimitry Andric   const TargetRegisterClass *SrcRC =
51981ad6265SDimitry Andric       TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
520480093f4SDimitry Andric   if (!SrcRC)
521480093f4SDimitry Andric     return false;
5225ffd83dbSDimitry Andric   unsigned SubReg = SIRegisterInfo::getSubRegFromChannel(Offset / 32,
5235ffd83dbSDimitry Andric                                                          DstSize / 32);
5245ffd83dbSDimitry Andric   SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
5255ffd83dbSDimitry Andric   if (!SrcRC)
5265ffd83dbSDimitry Andric     return false;
527480093f4SDimitry Andric 
5285ffd83dbSDimitry Andric   SrcReg = constrainOperandRegClass(*MF, TRI, *MRI, TII, RBI, I,
5295ffd83dbSDimitry Andric                                     *SrcRC, I.getOperand(1));
5300b57cec5SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
5315ffd83dbSDimitry Andric   BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg)
5325ffd83dbSDimitry Andric     .addReg(SrcReg, 0, SubReg);
5330b57cec5SDimitry Andric 
5340b57cec5SDimitry Andric   I.eraseFromParent();
5350b57cec5SDimitry Andric   return true;
5360b57cec5SDimitry Andric }
5370b57cec5SDimitry Andric 
5380b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
5390b57cec5SDimitry Andric   MachineBasicBlock *BB = MI.getParent();
5400b57cec5SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
5418bcb0991SDimitry Andric   LLT DstTy = MRI->getType(DstReg);
5428bcb0991SDimitry Andric   LLT SrcTy = MRI->getType(MI.getOperand(1).getReg());
5430b57cec5SDimitry Andric 
5440b57cec5SDimitry Andric   const unsigned SrcSize = SrcTy.getSizeInBits();
5450b57cec5SDimitry Andric   if (SrcSize < 32)
546480093f4SDimitry Andric     return selectImpl(MI, *CoverageInfo);
5470b57cec5SDimitry Andric 
5480b57cec5SDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
5498bcb0991SDimitry Andric   const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
5500b57cec5SDimitry Andric   const unsigned DstSize = DstTy.getSizeInBits();
5510b57cec5SDimitry Andric   const TargetRegisterClass *DstRC =
55281ad6265SDimitry Andric       TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
5530b57cec5SDimitry Andric   if (!DstRC)
5540b57cec5SDimitry Andric     return false;
5550b57cec5SDimitry Andric 
5560b57cec5SDimitry Andric   ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
5570b57cec5SDimitry Andric   MachineInstrBuilder MIB =
5580b57cec5SDimitry Andric     BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
5590b57cec5SDimitry Andric   for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) {
5600b57cec5SDimitry Andric     MachineOperand &Src = MI.getOperand(I + 1);
5610b57cec5SDimitry Andric     MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef()));
5620b57cec5SDimitry Andric     MIB.addImm(SubRegs[I]);
5630b57cec5SDimitry Andric 
5640b57cec5SDimitry Andric     const TargetRegisterClass *SrcRC
5658bcb0991SDimitry Andric       = TRI.getConstrainedRegClassForOperand(Src, *MRI);
5668bcb0991SDimitry Andric     if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
5670b57cec5SDimitry Andric       return false;
5680b57cec5SDimitry Andric   }
5690b57cec5SDimitry Andric 
5708bcb0991SDimitry Andric   if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
5710b57cec5SDimitry Andric     return false;
5720b57cec5SDimitry Andric 
5730b57cec5SDimitry Andric   MI.eraseFromParent();
5740b57cec5SDimitry Andric   return true;
5750b57cec5SDimitry Andric }
5760b57cec5SDimitry Andric 
5770b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const {
5780b57cec5SDimitry Andric   MachineBasicBlock *BB = MI.getParent();
5790b57cec5SDimitry Andric   const int NumDst = MI.getNumOperands() - 1;
5800b57cec5SDimitry Andric 
5810b57cec5SDimitry Andric   MachineOperand &Src = MI.getOperand(NumDst);
5820b57cec5SDimitry Andric 
5830b57cec5SDimitry Andric   Register SrcReg = Src.getReg();
5840b57cec5SDimitry Andric   Register DstReg0 = MI.getOperand(0).getReg();
5858bcb0991SDimitry Andric   LLT DstTy = MRI->getType(DstReg0);
5868bcb0991SDimitry Andric   LLT SrcTy = MRI->getType(SrcReg);
5870b57cec5SDimitry Andric 
5880b57cec5SDimitry Andric   const unsigned DstSize = DstTy.getSizeInBits();
5890b57cec5SDimitry Andric   const unsigned SrcSize = SrcTy.getSizeInBits();
5900b57cec5SDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
5918bcb0991SDimitry Andric   const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
5920b57cec5SDimitry Andric 
5930b57cec5SDimitry Andric   const TargetRegisterClass *SrcRC =
59481ad6265SDimitry Andric       TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
5958bcb0991SDimitry Andric   if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
5960b57cec5SDimitry Andric     return false;
5970b57cec5SDimitry Andric 
5980b57cec5SDimitry Andric   // Note we could have mixed SGPR and VGPR destination banks for an SGPR
5990b57cec5SDimitry Andric   // source, and this relies on the fact that the same subregister indices are
6000b57cec5SDimitry Andric   // used for both.
6010b57cec5SDimitry Andric   ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
6020b57cec5SDimitry Andric   for (int I = 0, E = NumDst; I != E; ++I) {
6030b57cec5SDimitry Andric     MachineOperand &Dst = MI.getOperand(I);
6040b57cec5SDimitry Andric     BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg())
605e8d8bef9SDimitry Andric       .addReg(SrcReg, 0, SubRegs[I]);
606e8d8bef9SDimitry Andric 
607e8d8bef9SDimitry Andric     // Make sure the subregister index is valid for the source register.
608e8d8bef9SDimitry Andric     SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[I]);
609e8d8bef9SDimitry Andric     if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
610e8d8bef9SDimitry Andric       return false;
6110b57cec5SDimitry Andric 
6120b57cec5SDimitry Andric     const TargetRegisterClass *DstRC =
6138bcb0991SDimitry Andric       TRI.getConstrainedRegClassForOperand(Dst, *MRI);
6148bcb0991SDimitry Andric     if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
6150b57cec5SDimitry Andric       return false;
6160b57cec5SDimitry Andric   }
6170b57cec5SDimitry Andric 
6180b57cec5SDimitry Andric   MI.eraseFromParent();
6190b57cec5SDimitry Andric   return true;
6200b57cec5SDimitry Andric }
6210b57cec5SDimitry Andric 
622bdd1243dSDimitry Andric bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {
623bdd1243dSDimitry Andric   assert(MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
624bdd1243dSDimitry Andric          MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
6255ffd83dbSDimitry Andric 
6265ffd83dbSDimitry Andric   Register Src0 = MI.getOperand(1).getReg();
6275ffd83dbSDimitry Andric   Register Src1 = MI.getOperand(2).getReg();
628bdd1243dSDimitry Andric   LLT SrcTy = MRI->getType(Src0);
629bdd1243dSDimitry Andric   const unsigned SrcSize = SrcTy.getSizeInBits();
630bdd1243dSDimitry Andric 
631bdd1243dSDimitry Andric   // BUILD_VECTOR with >=32 bits source is handled by MERGE_VALUE.
632bdd1243dSDimitry Andric   if (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
633bdd1243dSDimitry Andric     return selectG_MERGE_VALUES(MI);
634bdd1243dSDimitry Andric   }
635bdd1243dSDimitry Andric 
636bdd1243dSDimitry Andric   // Selection logic below is for V2S16 only.
637bdd1243dSDimitry Andric   // For G_BUILD_VECTOR_TRUNC, additionally check that the operands are s32.
638bdd1243dSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
639bdd1243dSDimitry Andric   if (MRI->getType(Dst) != LLT::fixed_vector(2, 16) ||
640bdd1243dSDimitry Andric       (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
641bdd1243dSDimitry Andric        SrcTy != LLT::scalar(32)))
642bdd1243dSDimitry Andric     return selectImpl(MI, *CoverageInfo);
643bdd1243dSDimitry Andric 
644bdd1243dSDimitry Andric   const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
645bdd1243dSDimitry Andric   if (DstBank->getID() == AMDGPU::AGPRRegBankID)
6465ffd83dbSDimitry Andric     return false;
6475ffd83dbSDimitry Andric 
648bdd1243dSDimitry Andric   assert(DstBank->getID() == AMDGPU::SGPRRegBankID ||
649bdd1243dSDimitry Andric          DstBank->getID() == AMDGPU::VGPRRegBankID);
650bdd1243dSDimitry Andric   const bool IsVector = DstBank->getID() == AMDGPU::VGPRRegBankID;
651bdd1243dSDimitry Andric 
6525ffd83dbSDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
6535ffd83dbSDimitry Andric   MachineBasicBlock *BB = MI.getParent();
6545ffd83dbSDimitry Andric 
655bdd1243dSDimitry Andric   // First, before trying TableGen patterns, check if both sources are
656bdd1243dSDimitry Andric   // constants. In those cases, we can trivially compute the final constant
657bdd1243dSDimitry Andric   // and emit a simple move.
658349cc55cSDimitry Andric   auto ConstSrc1 = getAnyConstantVRegValWithLookThrough(Src1, *MRI, true, true);
659e8d8bef9SDimitry Andric   if (ConstSrc1) {
660e8d8bef9SDimitry Andric     auto ConstSrc0 =
661349cc55cSDimitry Andric         getAnyConstantVRegValWithLookThrough(Src0, *MRI, true, true);
662e8d8bef9SDimitry Andric     if (ConstSrc0) {
663e8d8bef9SDimitry Andric       const int64_t K0 = ConstSrc0->Value.getSExtValue();
664e8d8bef9SDimitry Andric       const int64_t K1 = ConstSrc1->Value.getSExtValue();
665e8d8bef9SDimitry Andric       uint32_t Lo16 = static_cast<uint32_t>(K0) & 0xffff;
666e8d8bef9SDimitry Andric       uint32_t Hi16 = static_cast<uint32_t>(K1) & 0xffff;
667bdd1243dSDimitry Andric       uint32_t Imm = Lo16 | (Hi16 << 16);
668e8d8bef9SDimitry Andric 
669bdd1243dSDimitry Andric       // VALU
670bdd1243dSDimitry Andric       if (IsVector) {
671bdd1243dSDimitry Andric         BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), Dst).addImm(Imm);
672bdd1243dSDimitry Andric         MI.eraseFromParent();
673bdd1243dSDimitry Andric         return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
674bdd1243dSDimitry Andric       }
675bdd1243dSDimitry Andric 
676bdd1243dSDimitry Andric       // SALU
677bdd1243dSDimitry Andric       BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst).addImm(Imm);
678e8d8bef9SDimitry Andric       MI.eraseFromParent();
679e8d8bef9SDimitry Andric       return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
680e8d8bef9SDimitry Andric     }
681e8d8bef9SDimitry Andric   }
682e8d8bef9SDimitry Andric 
683bdd1243dSDimitry Andric   // Now try TableGen patterns.
684bdd1243dSDimitry Andric   if (selectImpl(MI, *CoverageInfo))
685bdd1243dSDimitry Andric     return true;
686bdd1243dSDimitry Andric 
6875ffd83dbSDimitry Andric   // TODO: This should probably be a combine somewhere
688bdd1243dSDimitry Andric   // (build_vector $src0, undef)  -> copy $src0
6895ffd83dbSDimitry Andric   MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI);
690bdd1243dSDimitry Andric   if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
6915ffd83dbSDimitry Andric     MI.setDesc(TII.get(AMDGPU::COPY));
69281ad6265SDimitry Andric     MI.removeOperand(2);
693bdd1243dSDimitry Andric     const auto &RC =
694bdd1243dSDimitry Andric         IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
695bdd1243dSDimitry Andric     return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
696bdd1243dSDimitry Andric            RBI.constrainGenericRegister(Src0, RC, *MRI);
697bdd1243dSDimitry Andric   }
698bdd1243dSDimitry Andric 
699bdd1243dSDimitry Andric   // TODO: Can be improved?
700bdd1243dSDimitry Andric   if (IsVector) {
701bdd1243dSDimitry Andric     Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
702bdd1243dSDimitry Andric     auto MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
703bdd1243dSDimitry Andric                    .addImm(0xFFFF)
704bdd1243dSDimitry Andric                    .addReg(Src0);
705bdd1243dSDimitry Andric     if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI))
706bdd1243dSDimitry Andric       return false;
707bdd1243dSDimitry Andric 
708bdd1243dSDimitry Andric     MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
709bdd1243dSDimitry Andric               .addReg(Src1)
710bdd1243dSDimitry Andric               .addImm(16)
711bdd1243dSDimitry Andric               .addReg(TmpReg);
712bdd1243dSDimitry Andric     if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI))
713bdd1243dSDimitry Andric       return false;
714bdd1243dSDimitry Andric 
715bdd1243dSDimitry Andric     MI.eraseFromParent();
716bdd1243dSDimitry Andric     return true;
7175ffd83dbSDimitry Andric   }
7185ffd83dbSDimitry Andric 
7195ffd83dbSDimitry Andric   Register ShiftSrc0;
7205ffd83dbSDimitry Andric   Register ShiftSrc1;
7215ffd83dbSDimitry Andric 
7225ffd83dbSDimitry Andric   // With multiple uses of the shift, this will duplicate the shift and
7235ffd83dbSDimitry Andric   // increase register pressure.
7245ffd83dbSDimitry Andric   //
725bdd1243dSDimitry Andric   // (build_vector (lshr_oneuse $src0, 16), (lshr_oneuse $src1, 16)
7265ffd83dbSDimitry Andric   //  => (S_PACK_HH_B32_B16 $src0, $src1)
727bdd1243dSDimitry Andric   // (build_vector (lshr_oneuse SReg_32:$src0, 16), $src1)
72881ad6265SDimitry Andric   //  => (S_PACK_HL_B32_B16 $src0, $src1)
729bdd1243dSDimitry Andric   // (build_vector $src0, (lshr_oneuse SReg_32:$src1, 16))
7305ffd83dbSDimitry Andric   //  => (S_PACK_LH_B32_B16 $src0, $src1)
731bdd1243dSDimitry Andric   // (build_vector $src0, $src1)
7325ffd83dbSDimitry Andric   //  => (S_PACK_LL_B32_B16 $src0, $src1)
7335ffd83dbSDimitry Andric 
7345ffd83dbSDimitry Andric   bool Shift0 = mi_match(
735e8d8bef9SDimitry Andric       Src0, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc0), m_SpecificICst(16))));
7365ffd83dbSDimitry Andric 
7375ffd83dbSDimitry Andric   bool Shift1 = mi_match(
738e8d8bef9SDimitry Andric       Src1, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc1), m_SpecificICst(16))));
7395ffd83dbSDimitry Andric 
7405ffd83dbSDimitry Andric   unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
7415ffd83dbSDimitry Andric   if (Shift0 && Shift1) {
7425ffd83dbSDimitry Andric     Opc = AMDGPU::S_PACK_HH_B32_B16;
7435ffd83dbSDimitry Andric     MI.getOperand(1).setReg(ShiftSrc0);
7445ffd83dbSDimitry Andric     MI.getOperand(2).setReg(ShiftSrc1);
7455ffd83dbSDimitry Andric   } else if (Shift1) {
7465ffd83dbSDimitry Andric     Opc = AMDGPU::S_PACK_LH_B32_B16;
7475ffd83dbSDimitry Andric     MI.getOperand(2).setReg(ShiftSrc1);
74881ad6265SDimitry Andric   } else if (Shift0) {
749bdd1243dSDimitry Andric     auto ConstSrc1 =
750bdd1243dSDimitry Andric         getAnyConstantVRegValWithLookThrough(Src1, *MRI, true, true);
75181ad6265SDimitry Andric     if (ConstSrc1 && ConstSrc1->Value == 0) {
7525ffd83dbSDimitry Andric       // build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16
7535ffd83dbSDimitry Andric       auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
7545ffd83dbSDimitry Andric                      .addReg(ShiftSrc0)
7555f757f3fSDimitry Andric                      .addImm(16)
7565f757f3fSDimitry Andric                      .setOperandDead(3); // Dead scc
7575ffd83dbSDimitry Andric 
7585ffd83dbSDimitry Andric       MI.eraseFromParent();
7595ffd83dbSDimitry Andric       return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
7605ffd83dbSDimitry Andric     }
76181ad6265SDimitry Andric     if (STI.hasSPackHL()) {
76281ad6265SDimitry Andric       Opc = AMDGPU::S_PACK_HL_B32_B16;
76381ad6265SDimitry Andric       MI.getOperand(1).setReg(ShiftSrc0);
76481ad6265SDimitry Andric     }
76581ad6265SDimitry Andric   }
7665ffd83dbSDimitry Andric 
7675ffd83dbSDimitry Andric   MI.setDesc(TII.get(Opc));
7685ffd83dbSDimitry Andric   return constrainSelectedInstRegOperands(MI, TII, TRI, RBI);
7695ffd83dbSDimitry Andric }
7705ffd83dbSDimitry Andric 
7710b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
7720b57cec5SDimitry Andric   const MachineOperand &MO = I.getOperand(0);
7730b57cec5SDimitry Andric 
7740b57cec5SDimitry Andric   // FIXME: Interface for getConstrainedRegClassForOperand needs work. The
7750b57cec5SDimitry Andric   // regbank check here is to know why getConstrainedRegClassForOperand failed.
7768bcb0991SDimitry Andric   const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
7778bcb0991SDimitry Andric   if ((!RC && !MRI->getRegBankOrNull(MO.getReg())) ||
7788bcb0991SDimitry Andric       (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI))) {
7790b57cec5SDimitry Andric     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
7800b57cec5SDimitry Andric     return true;
7810b57cec5SDimitry Andric   }
7820b57cec5SDimitry Andric 
7830b57cec5SDimitry Andric   return false;
7840b57cec5SDimitry Andric }
7850b57cec5SDimitry Andric 
7860b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
7870b57cec5SDimitry Andric   MachineBasicBlock *BB = I.getParent();
7888bcb0991SDimitry Andric 
7898bcb0991SDimitry Andric   Register DstReg = I.getOperand(0).getReg();
7908bcb0991SDimitry Andric   Register Src0Reg = I.getOperand(1).getReg();
7918bcb0991SDimitry Andric   Register Src1Reg = I.getOperand(2).getReg();
7928bcb0991SDimitry Andric   LLT Src1Ty = MRI->getType(Src1Reg);
7938bcb0991SDimitry Andric 
7948bcb0991SDimitry Andric   unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
7958bcb0991SDimitry Andric   unsigned InsSize = Src1Ty.getSizeInBits();
7968bcb0991SDimitry Andric 
7978bcb0991SDimitry Andric   int64_t Offset = I.getOperand(3).getImm();
7985ffd83dbSDimitry Andric 
7995ffd83dbSDimitry Andric   // FIXME: These cases should have been illegal and unnecessary to check here.
8005ffd83dbSDimitry Andric   if (Offset % 32 != 0 || InsSize % 32 != 0)
8018bcb0991SDimitry Andric     return false;
8028bcb0991SDimitry Andric 
803e8d8bef9SDimitry Andric   // Currently not handled by getSubRegFromChannel.
804e8d8bef9SDimitry Andric   if (InsSize > 128)
805e8d8bef9SDimitry Andric     return false;
806e8d8bef9SDimitry Andric 
8078bcb0991SDimitry Andric   unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32);
8088bcb0991SDimitry Andric   if (SubReg == AMDGPU::NoSubRegister)
8098bcb0991SDimitry Andric     return false;
8108bcb0991SDimitry Andric 
8118bcb0991SDimitry Andric   const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
8128bcb0991SDimitry Andric   const TargetRegisterClass *DstRC =
81381ad6265SDimitry Andric       TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
8148bcb0991SDimitry Andric   if (!DstRC)
8158bcb0991SDimitry Andric     return false;
8168bcb0991SDimitry Andric 
8178bcb0991SDimitry Andric   const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
8188bcb0991SDimitry Andric   const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
8198bcb0991SDimitry Andric   const TargetRegisterClass *Src0RC =
82081ad6265SDimitry Andric       TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
8218bcb0991SDimitry Andric   const TargetRegisterClass *Src1RC =
82281ad6265SDimitry Andric       TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
8238bcb0991SDimitry Andric 
8248bcb0991SDimitry Andric   // Deal with weird cases where the class only partially supports the subreg
8258bcb0991SDimitry Andric   // index.
8268bcb0991SDimitry Andric   Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
8275ffd83dbSDimitry Andric   if (!Src0RC || !Src1RC)
8288bcb0991SDimitry Andric     return false;
8298bcb0991SDimitry Andric 
8308bcb0991SDimitry Andric   if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
8318bcb0991SDimitry Andric       !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
8328bcb0991SDimitry Andric       !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
8338bcb0991SDimitry Andric     return false;
8348bcb0991SDimitry Andric 
8358bcb0991SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
8368bcb0991SDimitry Andric   BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
8378bcb0991SDimitry Andric     .addReg(Src0Reg)
8388bcb0991SDimitry Andric     .addReg(Src1Reg)
8390b57cec5SDimitry Andric     .addImm(SubReg);
8400b57cec5SDimitry Andric 
8410b57cec5SDimitry Andric   I.eraseFromParent();
8420b57cec5SDimitry Andric   return true;
8430b57cec5SDimitry Andric }
8440b57cec5SDimitry Andric 
845fe6060f1SDimitry Andric bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(MachineInstr &MI) const {
846fe6060f1SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
847fe6060f1SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
848fe6060f1SDimitry Andric   Register OffsetReg = MI.getOperand(2).getReg();
849fe6060f1SDimitry Andric   Register WidthReg = MI.getOperand(3).getReg();
850fe6060f1SDimitry Andric 
851fe6060f1SDimitry Andric   assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
852fe6060f1SDimitry Andric          "scalar BFX instructions are expanded in regbankselect");
853fe6060f1SDimitry Andric   assert(MRI->getType(MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
854fe6060f1SDimitry Andric          "64-bit vector BFX instructions are expanded in regbankselect");
855fe6060f1SDimitry Andric 
856fe6060f1SDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
857fe6060f1SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
858fe6060f1SDimitry Andric 
859fe6060f1SDimitry Andric   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SBFX;
860fe6060f1SDimitry Andric   unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
861fe6060f1SDimitry Andric   auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), DstReg)
862fe6060f1SDimitry Andric                  .addReg(SrcReg)
863fe6060f1SDimitry Andric                  .addReg(OffsetReg)
864fe6060f1SDimitry Andric                  .addReg(WidthReg);
865fe6060f1SDimitry Andric   MI.eraseFromParent();
866fe6060f1SDimitry Andric   return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
867fe6060f1SDimitry Andric }
868fe6060f1SDimitry Andric 
8695ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const {
8705ffd83dbSDimitry Andric   if (STI.getLDSBankCount() != 16)
8715ffd83dbSDimitry Andric     return selectImpl(MI, *CoverageInfo);
8725ffd83dbSDimitry Andric 
8735ffd83dbSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
8745ffd83dbSDimitry Andric   Register Src0 = MI.getOperand(2).getReg();
8755ffd83dbSDimitry Andric   Register M0Val = MI.getOperand(6).getReg();
8765ffd83dbSDimitry Andric   if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
8775ffd83dbSDimitry Andric       !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
8785ffd83dbSDimitry Andric       !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
8795ffd83dbSDimitry Andric     return false;
8805ffd83dbSDimitry Andric 
8815ffd83dbSDimitry Andric   // This requires 2 instructions. It is possible to write a pattern to support
8825ffd83dbSDimitry Andric   // this, but the generated isel emitter doesn't correctly deal with multiple
8835ffd83dbSDimitry Andric   // output instructions using the same physical register input. The copy to m0
8845ffd83dbSDimitry Andric   // is incorrectly placed before the second instruction.
8855ffd83dbSDimitry Andric   //
8865ffd83dbSDimitry Andric   // TODO: Match source modifiers.
8875ffd83dbSDimitry Andric 
8885ffd83dbSDimitry Andric   Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8895ffd83dbSDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
8905ffd83dbSDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
8915ffd83dbSDimitry Andric 
8925ffd83dbSDimitry Andric   BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
8935ffd83dbSDimitry Andric     .addReg(M0Val);
8945ffd83dbSDimitry Andric   BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
8955ffd83dbSDimitry Andric     .addImm(2)
8965ffd83dbSDimitry Andric     .addImm(MI.getOperand(4).getImm())  // $attr
8975ffd83dbSDimitry Andric     .addImm(MI.getOperand(3).getImm()); // $attrchan
8985ffd83dbSDimitry Andric 
8995ffd83dbSDimitry Andric   BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_P1LV_F16), Dst)
9005ffd83dbSDimitry Andric     .addImm(0)                          // $src0_modifiers
9015ffd83dbSDimitry Andric     .addReg(Src0)                       // $src0
9025ffd83dbSDimitry Andric     .addImm(MI.getOperand(4).getImm())  // $attr
9035ffd83dbSDimitry Andric     .addImm(MI.getOperand(3).getImm())  // $attrchan
9045ffd83dbSDimitry Andric     .addImm(0)                          // $src2_modifiers
9055ffd83dbSDimitry Andric     .addReg(InterpMov)                  // $src2 - 2 f16 values selected by high
9065ffd83dbSDimitry Andric     .addImm(MI.getOperand(5).getImm())  // $high
9075ffd83dbSDimitry Andric     .addImm(0)                          // $clamp
9085ffd83dbSDimitry Andric     .addImm(0);                         // $omod
9095ffd83dbSDimitry Andric 
9105ffd83dbSDimitry Andric   MI.eraseFromParent();
9115ffd83dbSDimitry Andric   return true;
9125ffd83dbSDimitry Andric }
9135ffd83dbSDimitry Andric 
914e8d8bef9SDimitry Andric // Writelane is special in that it can use SGPR and M0 (which would normally
915e8d8bef9SDimitry Andric // count as using the constant bus twice - but in this case it is allowed since
916e8d8bef9SDimitry Andric // the lane selector doesn't count as a use of the constant bus). However, it is
917e8d8bef9SDimitry Andric // still required to abide by the 1 SGPR rule. Fix this up if we might have
918e8d8bef9SDimitry Andric // multiple SGPRs.
919e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const {
920e8d8bef9SDimitry Andric   // With a constant bus limit of at least 2, there's no issue.
921e8d8bef9SDimitry Andric   if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
922e8d8bef9SDimitry Andric     return selectImpl(MI, *CoverageInfo);
923e8d8bef9SDimitry Andric 
924e8d8bef9SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
925e8d8bef9SDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
926e8d8bef9SDimitry Andric   Register VDst = MI.getOperand(0).getReg();
927e8d8bef9SDimitry Andric   Register Val = MI.getOperand(2).getReg();
928e8d8bef9SDimitry Andric   Register LaneSelect = MI.getOperand(3).getReg();
929e8d8bef9SDimitry Andric   Register VDstIn = MI.getOperand(4).getReg();
930e8d8bef9SDimitry Andric 
931e8d8bef9SDimitry Andric   auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
932e8d8bef9SDimitry Andric 
933bdd1243dSDimitry Andric   std::optional<ValueAndVReg> ConstSelect =
934349cc55cSDimitry Andric       getIConstantVRegValWithLookThrough(LaneSelect, *MRI);
935e8d8bef9SDimitry Andric   if (ConstSelect) {
936e8d8bef9SDimitry Andric     // The selector has to be an inline immediate, so we can use whatever for
937e8d8bef9SDimitry Andric     // the other operands.
938e8d8bef9SDimitry Andric     MIB.addReg(Val);
939e8d8bef9SDimitry Andric     MIB.addImm(ConstSelect->Value.getSExtValue() &
940e8d8bef9SDimitry Andric                maskTrailingOnes<uint64_t>(STI.getWavefrontSizeLog2()));
941e8d8bef9SDimitry Andric   } else {
942bdd1243dSDimitry Andric     std::optional<ValueAndVReg> ConstVal =
943349cc55cSDimitry Andric         getIConstantVRegValWithLookThrough(Val, *MRI);
944e8d8bef9SDimitry Andric 
945e8d8bef9SDimitry Andric     // If the value written is an inline immediate, we can get away without a
946e8d8bef9SDimitry Andric     // copy to m0.
947e8d8bef9SDimitry Andric     if (ConstVal && AMDGPU::isInlinableLiteral32(ConstVal->Value.getSExtValue(),
948e8d8bef9SDimitry Andric                                                  STI.hasInv2PiInlineImm())) {
949e8d8bef9SDimitry Andric       MIB.addImm(ConstVal->Value.getSExtValue());
950e8d8bef9SDimitry Andric       MIB.addReg(LaneSelect);
951e8d8bef9SDimitry Andric     } else {
952e8d8bef9SDimitry Andric       MIB.addReg(Val);
953e8d8bef9SDimitry Andric 
954e8d8bef9SDimitry Andric       // If the lane selector was originally in a VGPR and copied with
955e8d8bef9SDimitry Andric       // readfirstlane, there's a hazard to read the same SGPR from the
956e8d8bef9SDimitry Andric       // VALU. Constrain to a different SGPR to help avoid needing a nop later.
957e8d8bef9SDimitry Andric       RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
958e8d8bef9SDimitry Andric 
959e8d8bef9SDimitry Andric       BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
960e8d8bef9SDimitry Andric         .addReg(LaneSelect);
961e8d8bef9SDimitry Andric       MIB.addReg(AMDGPU::M0);
962e8d8bef9SDimitry Andric     }
963e8d8bef9SDimitry Andric   }
964e8d8bef9SDimitry Andric 
965e8d8bef9SDimitry Andric   MIB.addReg(VDstIn);
966e8d8bef9SDimitry Andric 
967e8d8bef9SDimitry Andric   MI.eraseFromParent();
968e8d8bef9SDimitry Andric   return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
969e8d8bef9SDimitry Andric }
970e8d8bef9SDimitry Andric 
9715ffd83dbSDimitry Andric // We need to handle this here because tablegen doesn't support matching
9725ffd83dbSDimitry Andric // instructions with multiple outputs.
9735ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {
9745ffd83dbSDimitry Andric   Register Dst0 = MI.getOperand(0).getReg();
9755ffd83dbSDimitry Andric   Register Dst1 = MI.getOperand(1).getReg();
9765ffd83dbSDimitry Andric 
9775ffd83dbSDimitry Andric   LLT Ty = MRI->getType(Dst0);
9785ffd83dbSDimitry Andric   unsigned Opc;
9795ffd83dbSDimitry Andric   if (Ty == LLT::scalar(32))
980e8d8bef9SDimitry Andric     Opc = AMDGPU::V_DIV_SCALE_F32_e64;
9815ffd83dbSDimitry Andric   else if (Ty == LLT::scalar(64))
982e8d8bef9SDimitry Andric     Opc = AMDGPU::V_DIV_SCALE_F64_e64;
9835ffd83dbSDimitry Andric   else
9845ffd83dbSDimitry Andric     return false;
9855ffd83dbSDimitry Andric 
986e8d8bef9SDimitry Andric   // TODO: Match source modifiers.
987e8d8bef9SDimitry Andric 
9885ffd83dbSDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
9895ffd83dbSDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
9905ffd83dbSDimitry Andric 
9915ffd83dbSDimitry Andric   Register Numer = MI.getOperand(3).getReg();
9925ffd83dbSDimitry Andric   Register Denom = MI.getOperand(4).getReg();
9935ffd83dbSDimitry Andric   unsigned ChooseDenom = MI.getOperand(5).getImm();
9945ffd83dbSDimitry Andric 
9955ffd83dbSDimitry Andric   Register Src0 = ChooseDenom != 0 ? Numer : Denom;
9965ffd83dbSDimitry Andric 
9975ffd83dbSDimitry Andric   auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0)
9985ffd83dbSDimitry Andric     .addDef(Dst1)
999e8d8bef9SDimitry Andric     .addImm(0)     // $src0_modifiers
1000e8d8bef9SDimitry Andric     .addUse(Src0)  // $src0
1001e8d8bef9SDimitry Andric     .addImm(0)     // $src1_modifiers
1002e8d8bef9SDimitry Andric     .addUse(Denom) // $src1
1003e8d8bef9SDimitry Andric     .addImm(0)     // $src2_modifiers
1004e8d8bef9SDimitry Andric     .addUse(Numer) // $src2
1005e8d8bef9SDimitry Andric     .addImm(0)     // $clamp
1006e8d8bef9SDimitry Andric     .addImm(0);    // $omod
10075ffd83dbSDimitry Andric 
10085ffd83dbSDimitry Andric   MI.eraseFromParent();
10095ffd83dbSDimitry Andric   return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
10105ffd83dbSDimitry Andric }
10115ffd83dbSDimitry Andric 
10128bcb0991SDimitry Andric bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
1013*0fca6ea1SDimitry Andric   Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
10140b57cec5SDimitry Andric   switch (IntrinsicID) {
10150b57cec5SDimitry Andric   case Intrinsic::amdgcn_if_break: {
10160b57cec5SDimitry Andric     MachineBasicBlock *BB = I.getParent();
10170b57cec5SDimitry Andric 
1018349cc55cSDimitry Andric     // FIXME: Manually selecting to avoid dealing with the SReg_1 trick
10190b57cec5SDimitry Andric     // SelectionDAG uses for wave32 vs wave64.
10200b57cec5SDimitry Andric     BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
10210b57cec5SDimitry Andric       .add(I.getOperand(0))
10220b57cec5SDimitry Andric       .add(I.getOperand(2))
10230b57cec5SDimitry Andric       .add(I.getOperand(3));
10240b57cec5SDimitry Andric 
10250b57cec5SDimitry Andric     Register DstReg = I.getOperand(0).getReg();
10260b57cec5SDimitry Andric     Register Src0Reg = I.getOperand(2).getReg();
10270b57cec5SDimitry Andric     Register Src1Reg = I.getOperand(3).getReg();
10280b57cec5SDimitry Andric 
10290b57cec5SDimitry Andric     I.eraseFromParent();
10300b57cec5SDimitry Andric 
10318bcb0991SDimitry Andric     for (Register Reg : { DstReg, Src0Reg, Src1Reg })
10328bcb0991SDimitry Andric       MRI->setRegClass(Reg, TRI.getWaveMaskRegClass());
10330b57cec5SDimitry Andric 
10340b57cec5SDimitry Andric     return true;
10350b57cec5SDimitry Andric   }
10365ffd83dbSDimitry Andric   case Intrinsic::amdgcn_interp_p1_f16:
10375ffd83dbSDimitry Andric     return selectInterpP1F16(I);
10385ffd83dbSDimitry Andric   case Intrinsic::amdgcn_wqm:
10395ffd83dbSDimitry Andric     return constrainCopyLikeIntrin(I, AMDGPU::WQM);
10405ffd83dbSDimitry Andric   case Intrinsic::amdgcn_softwqm:
10415ffd83dbSDimitry Andric     return constrainCopyLikeIntrin(I, AMDGPU::SOFT_WQM);
1042fe6060f1SDimitry Andric   case Intrinsic::amdgcn_strict_wwm:
10435ffd83dbSDimitry Andric   case Intrinsic::amdgcn_wwm:
1044fe6060f1SDimitry Andric     return constrainCopyLikeIntrin(I, AMDGPU::STRICT_WWM);
1045fe6060f1SDimitry Andric   case Intrinsic::amdgcn_strict_wqm:
1046fe6060f1SDimitry Andric     return constrainCopyLikeIntrin(I, AMDGPU::STRICT_WQM);
1047e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_writelane:
1048e8d8bef9SDimitry Andric     return selectWritelane(I);
10495ffd83dbSDimitry Andric   case Intrinsic::amdgcn_div_scale:
10505ffd83dbSDimitry Andric     return selectDivScale(I);
10515ffd83dbSDimitry Andric   case Intrinsic::amdgcn_icmp:
1052bdd1243dSDimitry Andric   case Intrinsic::amdgcn_fcmp:
1053bdd1243dSDimitry Andric     if (selectImpl(I, *CoverageInfo))
1054bdd1243dSDimitry Andric       return true;
1055bdd1243dSDimitry Andric     return selectIntrinsicCmp(I);
10565ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ballot:
10575ffd83dbSDimitry Andric     return selectBallot(I);
1058e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_reloc_constant:
1059e8d8bef9SDimitry Andric     return selectRelocConstant(I);
1060e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_groupstaticsize:
1061e8d8bef9SDimitry Andric     return selectGroupStaticSize(I);
1062e8d8bef9SDimitry Andric   case Intrinsic::returnaddress:
1063e8d8bef9SDimitry Andric     return selectReturnAddress(I);
106481ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
106581ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
106681ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
106781ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
106881ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
106981ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1070fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1071fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1072fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1073fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1074fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1075fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1076fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1077fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
107881ad6265SDimitry Andric     return selectSMFMACIntrin(I);
10790b57cec5SDimitry Andric   default:
10808bcb0991SDimitry Andric     return selectImpl(I, *CoverageInfo);
10810b57cec5SDimitry Andric   }
10820b57cec5SDimitry Andric }
10830b57cec5SDimitry Andric 
1084bdd1243dSDimitry Andric static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
1085bdd1243dSDimitry Andric                           const GCNSubtarget &ST) {
1086bdd1243dSDimitry Andric   if (Size != 16 && Size != 32 && Size != 64)
10870b57cec5SDimitry Andric     return -1;
1088bdd1243dSDimitry Andric 
1089bdd1243dSDimitry Andric   if (Size == 16 && !ST.has16BitInsts())
1090bdd1243dSDimitry Andric     return -1;
1091bdd1243dSDimitry Andric 
1092bdd1243dSDimitry Andric   const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc, unsigned S32Opc,
1093bdd1243dSDimitry Andric                           unsigned S64Opc) {
1094bdd1243dSDimitry Andric     if (Size == 16)
1095bdd1243dSDimitry Andric       return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1096bdd1243dSDimitry Andric     if (Size == 32)
1097bdd1243dSDimitry Andric       return S32Opc;
1098bdd1243dSDimitry Andric     return S64Opc;
1099bdd1243dSDimitry Andric   };
1100bdd1243dSDimitry Andric 
11010b57cec5SDimitry Andric   switch (P) {
11020b57cec5SDimitry Andric   default:
11030b57cec5SDimitry Andric     llvm_unreachable("Unknown condition code!");
11040b57cec5SDimitry Andric   case CmpInst::ICMP_NE:
1105bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1106bdd1243dSDimitry Andric                   AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
11070b57cec5SDimitry Andric   case CmpInst::ICMP_EQ:
1108bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1109bdd1243dSDimitry Andric                   AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
11100b57cec5SDimitry Andric   case CmpInst::ICMP_SGT:
1111bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1112bdd1243dSDimitry Andric                   AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
11130b57cec5SDimitry Andric   case CmpInst::ICMP_SGE:
1114bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1115bdd1243dSDimitry Andric                   AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
11160b57cec5SDimitry Andric   case CmpInst::ICMP_SLT:
1117bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1118bdd1243dSDimitry Andric                   AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
11190b57cec5SDimitry Andric   case CmpInst::ICMP_SLE:
1120bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1121bdd1243dSDimitry Andric                   AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
11220b57cec5SDimitry Andric   case CmpInst::ICMP_UGT:
1123bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1124bdd1243dSDimitry Andric                   AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
11250b57cec5SDimitry Andric   case CmpInst::ICMP_UGE:
1126bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1127bdd1243dSDimitry Andric                   AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
11280b57cec5SDimitry Andric   case CmpInst::ICMP_ULT:
1129bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1130bdd1243dSDimitry Andric                   AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
11310b57cec5SDimitry Andric   case CmpInst::ICMP_ULE:
1132bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1133bdd1243dSDimitry Andric                   AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1134bdd1243dSDimitry Andric 
1135bdd1243dSDimitry Andric   case CmpInst::FCMP_OEQ:
1136bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1137bdd1243dSDimitry Andric                   AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1138bdd1243dSDimitry Andric   case CmpInst::FCMP_OGT:
1139bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1140bdd1243dSDimitry Andric                   AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1141bdd1243dSDimitry Andric   case CmpInst::FCMP_OGE:
1142bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1143bdd1243dSDimitry Andric                   AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1144bdd1243dSDimitry Andric   case CmpInst::FCMP_OLT:
1145bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1146bdd1243dSDimitry Andric                   AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1147bdd1243dSDimitry Andric   case CmpInst::FCMP_OLE:
1148bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1149bdd1243dSDimitry Andric                   AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1150bdd1243dSDimitry Andric   case CmpInst::FCMP_ONE:
1151bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1152bdd1243dSDimitry Andric                   AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1153bdd1243dSDimitry Andric   case CmpInst::FCMP_ORD:
1154bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1155bdd1243dSDimitry Andric                   AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1156bdd1243dSDimitry Andric   case CmpInst::FCMP_UNO:
1157bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1158bdd1243dSDimitry Andric                   AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1159bdd1243dSDimitry Andric   case CmpInst::FCMP_UEQ:
1160bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1161bdd1243dSDimitry Andric                   AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1162bdd1243dSDimitry Andric   case CmpInst::FCMP_UGT:
1163bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1164bdd1243dSDimitry Andric                   AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1165bdd1243dSDimitry Andric   case CmpInst::FCMP_UGE:
1166bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1167bdd1243dSDimitry Andric                   AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1168bdd1243dSDimitry Andric   case CmpInst::FCMP_ULT:
1169bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1170bdd1243dSDimitry Andric                   AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1171bdd1243dSDimitry Andric   case CmpInst::FCMP_ULE:
1172bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1173bdd1243dSDimitry Andric                   AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1174bdd1243dSDimitry Andric   case CmpInst::FCMP_UNE:
1175bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1176bdd1243dSDimitry Andric                   AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1177bdd1243dSDimitry Andric   case CmpInst::FCMP_TRUE:
1178bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1179bdd1243dSDimitry Andric                   AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1180bdd1243dSDimitry Andric   case CmpInst::FCMP_FALSE:
1181bdd1243dSDimitry Andric     return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1182bdd1243dSDimitry Andric                   AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
11830b57cec5SDimitry Andric   }
11840b57cec5SDimitry Andric }
11850b57cec5SDimitry Andric 
11860b57cec5SDimitry Andric int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P,
11870b57cec5SDimitry Andric                                               unsigned Size) const {
11880b57cec5SDimitry Andric   if (Size == 64) {
11890b57cec5SDimitry Andric     if (!STI.hasScalarCompareEq64())
11900b57cec5SDimitry Andric       return -1;
11910b57cec5SDimitry Andric 
11920b57cec5SDimitry Andric     switch (P) {
11930b57cec5SDimitry Andric     case CmpInst::ICMP_NE:
11940b57cec5SDimitry Andric       return AMDGPU::S_CMP_LG_U64;
11950b57cec5SDimitry Andric     case CmpInst::ICMP_EQ:
11960b57cec5SDimitry Andric       return AMDGPU::S_CMP_EQ_U64;
11970b57cec5SDimitry Andric     default:
11980b57cec5SDimitry Andric       return -1;
11990b57cec5SDimitry Andric     }
12000b57cec5SDimitry Andric   }
12010b57cec5SDimitry Andric 
12025f757f3fSDimitry Andric   if (Size == 32) {
12030b57cec5SDimitry Andric     switch (P) {
12040b57cec5SDimitry Andric     case CmpInst::ICMP_NE:
12050b57cec5SDimitry Andric       return AMDGPU::S_CMP_LG_U32;
12060b57cec5SDimitry Andric     case CmpInst::ICMP_EQ:
12070b57cec5SDimitry Andric       return AMDGPU::S_CMP_EQ_U32;
12080b57cec5SDimitry Andric     case CmpInst::ICMP_SGT:
12090b57cec5SDimitry Andric       return AMDGPU::S_CMP_GT_I32;
12100b57cec5SDimitry Andric     case CmpInst::ICMP_SGE:
12110b57cec5SDimitry Andric       return AMDGPU::S_CMP_GE_I32;
12120b57cec5SDimitry Andric     case CmpInst::ICMP_SLT:
12130b57cec5SDimitry Andric       return AMDGPU::S_CMP_LT_I32;
12140b57cec5SDimitry Andric     case CmpInst::ICMP_SLE:
12150b57cec5SDimitry Andric       return AMDGPU::S_CMP_LE_I32;
12160b57cec5SDimitry Andric     case CmpInst::ICMP_UGT:
12170b57cec5SDimitry Andric       return AMDGPU::S_CMP_GT_U32;
12180b57cec5SDimitry Andric     case CmpInst::ICMP_UGE:
12190b57cec5SDimitry Andric       return AMDGPU::S_CMP_GE_U32;
12200b57cec5SDimitry Andric     case CmpInst::ICMP_ULT:
12210b57cec5SDimitry Andric       return AMDGPU::S_CMP_LT_U32;
12220b57cec5SDimitry Andric     case CmpInst::ICMP_ULE:
12230b57cec5SDimitry Andric       return AMDGPU::S_CMP_LE_U32;
12245f757f3fSDimitry Andric     case CmpInst::FCMP_OEQ:
12255f757f3fSDimitry Andric       return AMDGPU::S_CMP_EQ_F32;
12265f757f3fSDimitry Andric     case CmpInst::FCMP_OGT:
12275f757f3fSDimitry Andric       return AMDGPU::S_CMP_GT_F32;
12285f757f3fSDimitry Andric     case CmpInst::FCMP_OGE:
12295f757f3fSDimitry Andric       return AMDGPU::S_CMP_GE_F32;
12305f757f3fSDimitry Andric     case CmpInst::FCMP_OLT:
12315f757f3fSDimitry Andric       return AMDGPU::S_CMP_LT_F32;
12325f757f3fSDimitry Andric     case CmpInst::FCMP_OLE:
12335f757f3fSDimitry Andric       return AMDGPU::S_CMP_LE_F32;
12345f757f3fSDimitry Andric     case CmpInst::FCMP_ONE:
12355f757f3fSDimitry Andric       return AMDGPU::S_CMP_LG_F32;
12365f757f3fSDimitry Andric     case CmpInst::FCMP_ORD:
12375f757f3fSDimitry Andric       return AMDGPU::S_CMP_O_F32;
12385f757f3fSDimitry Andric     case CmpInst::FCMP_UNO:
12395f757f3fSDimitry Andric       return AMDGPU::S_CMP_U_F32;
12405f757f3fSDimitry Andric     case CmpInst::FCMP_UEQ:
12415f757f3fSDimitry Andric       return AMDGPU::S_CMP_NLG_F32;
12425f757f3fSDimitry Andric     case CmpInst::FCMP_UGT:
12435f757f3fSDimitry Andric       return AMDGPU::S_CMP_NLE_F32;
12445f757f3fSDimitry Andric     case CmpInst::FCMP_UGE:
12455f757f3fSDimitry Andric       return AMDGPU::S_CMP_NLT_F32;
12465f757f3fSDimitry Andric     case CmpInst::FCMP_ULT:
12475f757f3fSDimitry Andric       return AMDGPU::S_CMP_NGE_F32;
12485f757f3fSDimitry Andric     case CmpInst::FCMP_ULE:
12495f757f3fSDimitry Andric       return AMDGPU::S_CMP_NGT_F32;
12505f757f3fSDimitry Andric     case CmpInst::FCMP_UNE:
12515f757f3fSDimitry Andric       return AMDGPU::S_CMP_NEQ_F32;
12520b57cec5SDimitry Andric     default:
12530b57cec5SDimitry Andric       llvm_unreachable("Unknown condition code!");
12540b57cec5SDimitry Andric     }
12550b57cec5SDimitry Andric   }
12560b57cec5SDimitry Andric 
12575f757f3fSDimitry Andric   if (Size == 16) {
12585f757f3fSDimitry Andric     if (!STI.hasSALUFloatInsts())
12595f757f3fSDimitry Andric       return -1;
12605f757f3fSDimitry Andric 
12615f757f3fSDimitry Andric     switch (P) {
12625f757f3fSDimitry Andric     case CmpInst::FCMP_OEQ:
12635f757f3fSDimitry Andric       return AMDGPU::S_CMP_EQ_F16;
12645f757f3fSDimitry Andric     case CmpInst::FCMP_OGT:
12655f757f3fSDimitry Andric       return AMDGPU::S_CMP_GT_F16;
12665f757f3fSDimitry Andric     case CmpInst::FCMP_OGE:
12675f757f3fSDimitry Andric       return AMDGPU::S_CMP_GE_F16;
12685f757f3fSDimitry Andric     case CmpInst::FCMP_OLT:
12695f757f3fSDimitry Andric       return AMDGPU::S_CMP_LT_F16;
12705f757f3fSDimitry Andric     case CmpInst::FCMP_OLE:
12715f757f3fSDimitry Andric       return AMDGPU::S_CMP_LE_F16;
12725f757f3fSDimitry Andric     case CmpInst::FCMP_ONE:
12735f757f3fSDimitry Andric       return AMDGPU::S_CMP_LG_F16;
12745f757f3fSDimitry Andric     case CmpInst::FCMP_ORD:
12755f757f3fSDimitry Andric       return AMDGPU::S_CMP_O_F16;
12765f757f3fSDimitry Andric     case CmpInst::FCMP_UNO:
12775f757f3fSDimitry Andric       return AMDGPU::S_CMP_U_F16;
12785f757f3fSDimitry Andric     case CmpInst::FCMP_UEQ:
12795f757f3fSDimitry Andric       return AMDGPU::S_CMP_NLG_F16;
12805f757f3fSDimitry Andric     case CmpInst::FCMP_UGT:
12815f757f3fSDimitry Andric       return AMDGPU::S_CMP_NLE_F16;
12825f757f3fSDimitry Andric     case CmpInst::FCMP_UGE:
12835f757f3fSDimitry Andric       return AMDGPU::S_CMP_NLT_F16;
12845f757f3fSDimitry Andric     case CmpInst::FCMP_ULT:
12855f757f3fSDimitry Andric       return AMDGPU::S_CMP_NGE_F16;
12865f757f3fSDimitry Andric     case CmpInst::FCMP_ULE:
12875f757f3fSDimitry Andric       return AMDGPU::S_CMP_NGT_F16;
12885f757f3fSDimitry Andric     case CmpInst::FCMP_UNE:
12895f757f3fSDimitry Andric       return AMDGPU::S_CMP_NEQ_F16;
12905f757f3fSDimitry Andric     default:
12915f757f3fSDimitry Andric       llvm_unreachable("Unknown condition code!");
12925f757f3fSDimitry Andric     }
12935f757f3fSDimitry Andric   }
12945f757f3fSDimitry Andric 
12955f757f3fSDimitry Andric   return -1;
12965f757f3fSDimitry Andric }
12975f757f3fSDimitry Andric 
12985f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const {
12995f757f3fSDimitry Andric 
13000b57cec5SDimitry Andric   MachineBasicBlock *BB = I.getParent();
13010b57cec5SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
13020b57cec5SDimitry Andric 
13038bcb0991SDimitry Andric   Register SrcReg = I.getOperand(2).getReg();
13048bcb0991SDimitry Andric   unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
13050b57cec5SDimitry Andric 
13060b57cec5SDimitry Andric   auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
13070b57cec5SDimitry Andric 
13088bcb0991SDimitry Andric   Register CCReg = I.getOperand(0).getReg();
1309480093f4SDimitry Andric   if (!isVCC(CCReg, *MRI)) {
13100b57cec5SDimitry Andric     int Opcode = getS_CMPOpcode(Pred, Size);
13110b57cec5SDimitry Andric     if (Opcode == -1)
13120b57cec5SDimitry Andric       return false;
13130b57cec5SDimitry Andric     MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
13140b57cec5SDimitry Andric             .add(I.getOperand(2))
13150b57cec5SDimitry Andric             .add(I.getOperand(3));
13160b57cec5SDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
13170b57cec5SDimitry Andric       .addReg(AMDGPU::SCC);
13180b57cec5SDimitry Andric     bool Ret =
13190b57cec5SDimitry Andric         constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) &&
13208bcb0991SDimitry Andric         RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
13210b57cec5SDimitry Andric     I.eraseFromParent();
13220b57cec5SDimitry Andric     return Ret;
13230b57cec5SDimitry Andric   }
13240b57cec5SDimitry Andric 
13255f757f3fSDimitry Andric   if (I.getOpcode() == AMDGPU::G_FCMP)
13265f757f3fSDimitry Andric     return false;
13275f757f3fSDimitry Andric 
1328bdd1243dSDimitry Andric   int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget);
13290b57cec5SDimitry Andric   if (Opcode == -1)
13300b57cec5SDimitry Andric     return false;
13310b57cec5SDimitry Andric 
13320b57cec5SDimitry Andric   MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
13330b57cec5SDimitry Andric             I.getOperand(0).getReg())
13340b57cec5SDimitry Andric             .add(I.getOperand(2))
13350b57cec5SDimitry Andric             .add(I.getOperand(3));
13360b57cec5SDimitry Andric   RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
13378bcb0991SDimitry Andric                                *TRI.getBoolRC(), *MRI);
13380b57cec5SDimitry Andric   bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
13390b57cec5SDimitry Andric   I.eraseFromParent();
13400b57cec5SDimitry Andric   return Ret;
13410b57cec5SDimitry Andric }
13420b57cec5SDimitry Andric 
1343bdd1243dSDimitry Andric bool AMDGPUInstructionSelector::selectIntrinsicCmp(MachineInstr &I) const {
13445ffd83dbSDimitry Andric   Register Dst = I.getOperand(0).getReg();
13455ffd83dbSDimitry Andric   if (isVCC(Dst, *MRI))
13465ffd83dbSDimitry Andric     return false;
13475ffd83dbSDimitry Andric 
1348bdd1243dSDimitry Andric   LLT DstTy = MRI->getType(Dst);
1349bdd1243dSDimitry Andric   if (DstTy.getSizeInBits() != STI.getWavefrontSize())
13505ffd83dbSDimitry Andric     return false;
13515ffd83dbSDimitry Andric 
13525ffd83dbSDimitry Andric   MachineBasicBlock *BB = I.getParent();
13535ffd83dbSDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
13545ffd83dbSDimitry Andric   Register SrcReg = I.getOperand(2).getReg();
13555ffd83dbSDimitry Andric   unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
135604eeddc0SDimitry Andric 
1357bdd1243dSDimitry Andric   // i1 inputs are not supported in GlobalISel.
1358bdd1243dSDimitry Andric   if (Size == 1)
135904eeddc0SDimitry Andric     return false;
1360bdd1243dSDimitry Andric 
1361bdd1243dSDimitry Andric   auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(4).getImm());
1362bdd1243dSDimitry Andric   if (!CmpInst::isIntPredicate(Pred) && !CmpInst::isFPPredicate(Pred)) {
1363bdd1243dSDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
136404eeddc0SDimitry Andric     I.eraseFromParent();
1365bdd1243dSDimitry Andric     return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
136604eeddc0SDimitry Andric   }
13675ffd83dbSDimitry Andric 
1368bdd1243dSDimitry Andric   const int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget);
13695ffd83dbSDimitry Andric   if (Opcode == -1)
13705ffd83dbSDimitry Andric     return false;
13715ffd83dbSDimitry Andric 
137206c3fb27SDimitry Andric   MachineInstrBuilder SelectedMI;
1373bdd1243dSDimitry Andric   MachineOperand &LHS = I.getOperand(2);
1374bdd1243dSDimitry Andric   MachineOperand &RHS = I.getOperand(3);
1375bdd1243dSDimitry Andric   auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
1376bdd1243dSDimitry Andric   auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
1377bdd1243dSDimitry Andric   Register Src0Reg =
1378bdd1243dSDimitry Andric       copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &I, /*ForceVGPR*/ true);
1379bdd1243dSDimitry Andric   Register Src1Reg =
1380bdd1243dSDimitry Andric       copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &I, /*ForceVGPR*/ true);
138106c3fb27SDimitry Andric   SelectedMI = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst);
138206c3fb27SDimitry Andric   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers))
138306c3fb27SDimitry Andric     SelectedMI.addImm(Src0Mods);
138406c3fb27SDimitry Andric   SelectedMI.addReg(Src0Reg);
138506c3fb27SDimitry Andric   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1_modifiers))
138606c3fb27SDimitry Andric     SelectedMI.addImm(Src1Mods);
138706c3fb27SDimitry Andric   SelectedMI.addReg(Src1Reg);
138806c3fb27SDimitry Andric   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::clamp))
138906c3fb27SDimitry Andric     SelectedMI.addImm(0); // clamp
139006c3fb27SDimitry Andric   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel))
139106c3fb27SDimitry Andric     SelectedMI.addImm(0); // op_sel
1392bdd1243dSDimitry Andric 
1393bdd1243dSDimitry Andric   RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1394bdd1243dSDimitry Andric   if (!constrainSelectedInstRegOperands(*SelectedMI, TII, TRI, RBI))
1395bdd1243dSDimitry Andric     return false;
1396bdd1243dSDimitry Andric 
13975ffd83dbSDimitry Andric   I.eraseFromParent();
1398bdd1243dSDimitry Andric   return true;
13990b57cec5SDimitry Andric }
14000b57cec5SDimitry Andric 
14015ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
14025ffd83dbSDimitry Andric   MachineBasicBlock *BB = I.getParent();
14035ffd83dbSDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
14045ffd83dbSDimitry Andric   Register DstReg = I.getOperand(0).getReg();
14055ffd83dbSDimitry Andric   const unsigned Size = MRI->getType(DstReg).getSizeInBits();
14065ffd83dbSDimitry Andric   const bool Is64 = Size == 64;
140706c3fb27SDimitry Andric   const bool IsWave32 = (STI.getWavefrontSize() == 32);
14080b57cec5SDimitry Andric 
140906c3fb27SDimitry Andric   // In the common case, the return type matches the wave size.
141006c3fb27SDimitry Andric   // However we also support emitting i64 ballots in wave32 mode.
141106c3fb27SDimitry Andric   if (Size != STI.getWavefrontSize() && (!Is64 || !IsWave32))
14125ffd83dbSDimitry Andric     return false;
14138bcb0991SDimitry Andric 
1414bdd1243dSDimitry Andric   std::optional<ValueAndVReg> Arg =
1415349cc55cSDimitry Andric       getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI);
14168bcb0991SDimitry Andric 
141706c3fb27SDimitry Andric   const auto BuildCopy = [&](Register SrcReg) {
141806c3fb27SDimitry Andric     if (Size == STI.getWavefrontSize()) {
141906c3fb27SDimitry Andric       BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
142006c3fb27SDimitry Andric           .addReg(SrcReg);
142106c3fb27SDimitry Andric       return;
142206c3fb27SDimitry Andric     }
142306c3fb27SDimitry Andric 
142406c3fb27SDimitry Andric     // If emitting a i64 ballot in wave32, fill the upper bits with zeroes.
142506c3fb27SDimitry Andric     Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
142606c3fb27SDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg).addImm(0);
142706c3fb27SDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
142806c3fb27SDimitry Andric         .addReg(SrcReg)
142906c3fb27SDimitry Andric         .addImm(AMDGPU::sub0)
143006c3fb27SDimitry Andric         .addReg(HiReg)
143106c3fb27SDimitry Andric         .addImm(AMDGPU::sub1);
143206c3fb27SDimitry Andric   };
143306c3fb27SDimitry Andric 
143481ad6265SDimitry Andric   if (Arg) {
1435bdd1243dSDimitry Andric     const int64_t Value = Arg->Value.getSExtValue();
14365ffd83dbSDimitry Andric     if (Value == 0) {
14375ffd83dbSDimitry Andric       unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
14385ffd83dbSDimitry Andric       BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0);
143906c3fb27SDimitry Andric     } else if (Value == -1) // all ones
144006c3fb27SDimitry Andric       BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
144106c3fb27SDimitry Andric     else
14425ffd83dbSDimitry Andric       return false;
144306c3fb27SDimitry Andric   } else
144406c3fb27SDimitry Andric     BuildCopy(I.getOperand(2).getReg());
144506c3fb27SDimitry Andric 
144606c3fb27SDimitry Andric   I.eraseFromParent();
144706c3fb27SDimitry Andric   return true;
14488bcb0991SDimitry Andric }
14498bcb0991SDimitry Andric 
1450e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {
1451e8d8bef9SDimitry Andric   Register DstReg = I.getOperand(0).getReg();
1452e8d8bef9SDimitry Andric   const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
145381ad6265SDimitry Andric   const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1454e8d8bef9SDimitry Andric   if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1455e8d8bef9SDimitry Andric     return false;
1456e8d8bef9SDimitry Andric 
1457e8d8bef9SDimitry Andric   const bool IsVALU = DstBank->getID() == AMDGPU::VGPRRegBankID;
1458e8d8bef9SDimitry Andric 
1459e8d8bef9SDimitry Andric   Module *M = MF->getFunction().getParent();
1460e8d8bef9SDimitry Andric   const MDNode *Metadata = I.getOperand(2).getMetadata();
1461e8d8bef9SDimitry Andric   auto SymbolName = cast<MDString>(Metadata->getOperand(0))->getString();
1462e8d8bef9SDimitry Andric   auto RelocSymbol = cast<GlobalVariable>(
1463e8d8bef9SDimitry Andric     M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext())));
1464e8d8bef9SDimitry Andric 
1465e8d8bef9SDimitry Andric   MachineBasicBlock *BB = I.getParent();
1466e8d8bef9SDimitry Andric   BuildMI(*BB, &I, I.getDebugLoc(),
1467e8d8bef9SDimitry Andric           TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1468e8d8bef9SDimitry Andric     .addGlobalAddress(RelocSymbol, 0, SIInstrInfo::MO_ABS32_LO);
1469e8d8bef9SDimitry Andric 
1470e8d8bef9SDimitry Andric   I.eraseFromParent();
1471e8d8bef9SDimitry Andric   return true;
1472e8d8bef9SDimitry Andric }
1473e8d8bef9SDimitry Andric 
1474e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectGroupStaticSize(MachineInstr &I) const {
1475e8d8bef9SDimitry Andric   Triple::OSType OS = MF->getTarget().getTargetTriple().getOS();
1476e8d8bef9SDimitry Andric 
1477e8d8bef9SDimitry Andric   Register DstReg = I.getOperand(0).getReg();
1478e8d8bef9SDimitry Andric   const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1479e8d8bef9SDimitry Andric   unsigned Mov = DstRB->getID() == AMDGPU::SGPRRegBankID ?
1480e8d8bef9SDimitry Andric     AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1481e8d8bef9SDimitry Andric 
1482e8d8bef9SDimitry Andric   MachineBasicBlock *MBB = I.getParent();
1483e8d8bef9SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
1484e8d8bef9SDimitry Andric 
1485e8d8bef9SDimitry Andric   auto MIB = BuildMI(*MBB, &I, DL, TII.get(Mov), DstReg);
1486e8d8bef9SDimitry Andric 
1487e8d8bef9SDimitry Andric   if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) {
1488e8d8bef9SDimitry Andric     const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
1489e8d8bef9SDimitry Andric     MIB.addImm(MFI->getLDSSize());
1490e8d8bef9SDimitry Andric   } else {
1491e8d8bef9SDimitry Andric     Module *M = MF->getFunction().getParent();
1492e8d8bef9SDimitry Andric     const GlobalValue *GV
1493e8d8bef9SDimitry Andric       = Intrinsic::getDeclaration(M, Intrinsic::amdgcn_groupstaticsize);
1494e8d8bef9SDimitry Andric     MIB.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO);
1495e8d8bef9SDimitry Andric   }
1496e8d8bef9SDimitry Andric 
1497e8d8bef9SDimitry Andric   I.eraseFromParent();
1498e8d8bef9SDimitry Andric   return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1499e8d8bef9SDimitry Andric }
1500e8d8bef9SDimitry Andric 
1501e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
1502e8d8bef9SDimitry Andric   MachineBasicBlock *MBB = I.getParent();
1503e8d8bef9SDimitry Andric   MachineFunction &MF = *MBB->getParent();
1504e8d8bef9SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
1505e8d8bef9SDimitry Andric 
1506e8d8bef9SDimitry Andric   MachineOperand &Dst = I.getOperand(0);
1507e8d8bef9SDimitry Andric   Register DstReg = Dst.getReg();
1508e8d8bef9SDimitry Andric   unsigned Depth = I.getOperand(2).getImm();
1509e8d8bef9SDimitry Andric 
1510e8d8bef9SDimitry Andric   const TargetRegisterClass *RC
1511e8d8bef9SDimitry Andric     = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1512e8d8bef9SDimitry Andric   if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) ||
1513e8d8bef9SDimitry Andric       !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1514e8d8bef9SDimitry Andric     return false;
1515e8d8bef9SDimitry Andric 
1516e8d8bef9SDimitry Andric   // Check for kernel and shader functions
1517e8d8bef9SDimitry Andric   if (Depth != 0 ||
1518e8d8bef9SDimitry Andric       MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1519e8d8bef9SDimitry Andric     BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
1520e8d8bef9SDimitry Andric       .addImm(0);
1521e8d8bef9SDimitry Andric     I.eraseFromParent();
1522e8d8bef9SDimitry Andric     return true;
1523e8d8bef9SDimitry Andric   }
1524e8d8bef9SDimitry Andric 
1525e8d8bef9SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
1526e8d8bef9SDimitry Andric   // There is a call to @llvm.returnaddress in this function
1527e8d8bef9SDimitry Andric   MFI.setReturnAddressIsTaken(true);
1528e8d8bef9SDimitry Andric 
1529e8d8bef9SDimitry Andric   // Get the return address reg and mark it as an implicit live-in
1530e8d8bef9SDimitry Andric   Register ReturnAddrReg = TRI.getReturnAddressReg(MF);
1531e8d8bef9SDimitry Andric   Register LiveIn = getFunctionLiveInPhysReg(MF, TII, ReturnAddrReg,
153204eeddc0SDimitry Andric                                              AMDGPU::SReg_64RegClass, DL);
1533e8d8bef9SDimitry Andric   BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
1534e8d8bef9SDimitry Andric     .addReg(LiveIn);
1535e8d8bef9SDimitry Andric   I.eraseFromParent();
1536e8d8bef9SDimitry Andric   return true;
1537e8d8bef9SDimitry Andric }
1538e8d8bef9SDimitry Andric 
15395ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
1540349cc55cSDimitry Andric   // FIXME: Manually selecting to avoid dealing with the SReg_1 trick
15415ffd83dbSDimitry Andric   // SelectionDAG uses for wave32 vs wave64.
15425ffd83dbSDimitry Andric   MachineBasicBlock *BB = MI.getParent();
15435ffd83dbSDimitry Andric   BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
15445ffd83dbSDimitry Andric       .add(MI.getOperand(1));
15458bcb0991SDimitry Andric 
15465ffd83dbSDimitry Andric   Register Reg = MI.getOperand(1).getReg();
15478bcb0991SDimitry Andric   MI.eraseFromParent();
15488bcb0991SDimitry Andric 
15495ffd83dbSDimitry Andric   if (!MRI->getRegClassOrNull(Reg))
15505ffd83dbSDimitry Andric     MRI->setRegClass(Reg, TRI.getWaveMaskRegClass());
15515ffd83dbSDimitry Andric   return true;
15528bcb0991SDimitry Andric }
15538bcb0991SDimitry Andric 
1554480093f4SDimitry Andric bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1555480093f4SDimitry Andric   MachineInstr &MI, Intrinsic::ID IntrID) const {
1556480093f4SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
1557480093f4SDimitry Andric   MachineFunction *MF = MBB->getParent();
1558480093f4SDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
1559480093f4SDimitry Andric 
1560480093f4SDimitry Andric   unsigned IndexOperand = MI.getOperand(7).getImm();
1561480093f4SDimitry Andric   bool WaveRelease = MI.getOperand(8).getImm() != 0;
1562480093f4SDimitry Andric   bool WaveDone = MI.getOperand(9).getImm() != 0;
1563480093f4SDimitry Andric 
1564480093f4SDimitry Andric   if (WaveDone && !WaveRelease)
1565480093f4SDimitry Andric     report_fatal_error("ds_ordered_count: wave_done requires wave_release");
1566480093f4SDimitry Andric 
1567480093f4SDimitry Andric   unsigned OrderedCountIndex = IndexOperand & 0x3f;
1568480093f4SDimitry Andric   IndexOperand &= ~0x3f;
1569480093f4SDimitry Andric   unsigned CountDw = 0;
1570480093f4SDimitry Andric 
1571480093f4SDimitry Andric   if (STI.getGeneration() >= AMDGPUSubtarget::GFX10) {
1572480093f4SDimitry Andric     CountDw = (IndexOperand >> 24) & 0xf;
1573480093f4SDimitry Andric     IndexOperand &= ~(0xf << 24);
1574480093f4SDimitry Andric 
1575480093f4SDimitry Andric     if (CountDw < 1 || CountDw > 4) {
1576480093f4SDimitry Andric       report_fatal_error(
1577480093f4SDimitry Andric         "ds_ordered_count: dword count must be between 1 and 4");
1578480093f4SDimitry Andric     }
1579480093f4SDimitry Andric   }
1580480093f4SDimitry Andric 
1581480093f4SDimitry Andric   if (IndexOperand)
1582480093f4SDimitry Andric     report_fatal_error("ds_ordered_count: bad index operand");
1583480093f4SDimitry Andric 
1584480093f4SDimitry Andric   unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1585e8d8bef9SDimitry Andric   unsigned ShaderType = SIInstrInfo::getDSShaderTypeValue(*MF);
1586480093f4SDimitry Andric 
1587480093f4SDimitry Andric   unsigned Offset0 = OrderedCountIndex << 2;
158881ad6265SDimitry Andric   unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1589480093f4SDimitry Andric 
1590480093f4SDimitry Andric   if (STI.getGeneration() >= AMDGPUSubtarget::GFX10)
1591480093f4SDimitry Andric     Offset1 |= (CountDw - 1) << 6;
1592480093f4SDimitry Andric 
159381ad6265SDimitry Andric   if (STI.getGeneration() < AMDGPUSubtarget::GFX11)
159481ad6265SDimitry Andric     Offset1 |= ShaderType << 2;
159581ad6265SDimitry Andric 
1596480093f4SDimitry Andric   unsigned Offset = Offset0 | (Offset1 << 8);
1597480093f4SDimitry Andric 
1598480093f4SDimitry Andric   Register M0Val = MI.getOperand(2).getReg();
1599480093f4SDimitry Andric   BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1600480093f4SDimitry Andric     .addReg(M0Val);
1601480093f4SDimitry Andric 
1602480093f4SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
1603480093f4SDimitry Andric   Register ValReg = MI.getOperand(3).getReg();
1604480093f4SDimitry Andric   MachineInstrBuilder DS =
1605480093f4SDimitry Andric     BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1606480093f4SDimitry Andric       .addReg(ValReg)
1607480093f4SDimitry Andric       .addImm(Offset)
1608480093f4SDimitry Andric       .cloneMemRefs(MI);
1609480093f4SDimitry Andric 
1610480093f4SDimitry Andric   if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1611480093f4SDimitry Andric     return false;
1612480093f4SDimitry Andric 
1613480093f4SDimitry Andric   bool Ret = constrainSelectedInstRegOperands(*DS, TII, TRI, RBI);
1614480093f4SDimitry Andric   MI.eraseFromParent();
1615480093f4SDimitry Andric   return Ret;
1616480093f4SDimitry Andric }
1617480093f4SDimitry Andric 
16185ffd83dbSDimitry Andric static unsigned gwsIntrinToOpcode(unsigned IntrID) {
16195ffd83dbSDimitry Andric   switch (IntrID) {
16205ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_init:
16215ffd83dbSDimitry Andric     return AMDGPU::DS_GWS_INIT;
16225ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_barrier:
16235ffd83dbSDimitry Andric     return AMDGPU::DS_GWS_BARRIER;
16245ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_v:
16255ffd83dbSDimitry Andric     return AMDGPU::DS_GWS_SEMA_V;
16265ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_br:
16275ffd83dbSDimitry Andric     return AMDGPU::DS_GWS_SEMA_BR;
16285ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_p:
16295ffd83dbSDimitry Andric     return AMDGPU::DS_GWS_SEMA_P;
16305ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_release_all:
16315ffd83dbSDimitry Andric     return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
16325ffd83dbSDimitry Andric   default:
16335ffd83dbSDimitry Andric     llvm_unreachable("not a gws intrinsic");
16340b57cec5SDimitry Andric   }
16350b57cec5SDimitry Andric }
16360b57cec5SDimitry Andric 
16375ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
16385ffd83dbSDimitry Andric                                                      Intrinsic::ID IID) const {
16395f757f3fSDimitry Andric   if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
16405f757f3fSDimitry Andric                         !STI.hasGWSSemaReleaseAll()))
16415ffd83dbSDimitry Andric     return false;
16420b57cec5SDimitry Andric 
16435ffd83dbSDimitry Andric   // intrinsic ID, vsrc, offset
16445ffd83dbSDimitry Andric   const bool HasVSrc = MI.getNumOperands() == 3;
16455ffd83dbSDimitry Andric   assert(HasVSrc || MI.getNumOperands() == 2);
16465ffd83dbSDimitry Andric 
16475ffd83dbSDimitry Andric   Register BaseOffset = MI.getOperand(HasVSrc ? 2 : 1).getReg();
16485ffd83dbSDimitry Andric   const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
16495ffd83dbSDimitry Andric   if (OffsetRB->getID() != AMDGPU::SGPRRegBankID)
16505ffd83dbSDimitry Andric     return false;
16515ffd83dbSDimitry Andric 
16525ffd83dbSDimitry Andric   MachineInstr *OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI);
16535ffd83dbSDimitry Andric   unsigned ImmOffset;
16545ffd83dbSDimitry Andric 
16555ffd83dbSDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
16565ffd83dbSDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
16575ffd83dbSDimitry Andric 
16585ffd83dbSDimitry Andric   MachineInstr *Readfirstlane = nullptr;
16595ffd83dbSDimitry Andric 
16605ffd83dbSDimitry Andric   // If we legalized the VGPR input, strip out the readfirstlane to analyze the
16615ffd83dbSDimitry Andric   // incoming offset, in case there's an add of a constant. We'll have to put it
16625ffd83dbSDimitry Andric   // back later.
16635ffd83dbSDimitry Andric   if (OffsetDef->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
16645ffd83dbSDimitry Andric     Readfirstlane = OffsetDef;
16655ffd83dbSDimitry Andric     BaseOffset = OffsetDef->getOperand(1).getReg();
16665ffd83dbSDimitry Andric     OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI);
16675ffd83dbSDimitry Andric   }
16685ffd83dbSDimitry Andric 
16695ffd83dbSDimitry Andric   if (OffsetDef->getOpcode() == AMDGPU::G_CONSTANT) {
16705ffd83dbSDimitry Andric     // If we have a constant offset, try to use the 0 in m0 as the base.
16715ffd83dbSDimitry Andric     // TODO: Look into changing the default m0 initialization value. If the
16725ffd83dbSDimitry Andric     // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
16735ffd83dbSDimitry Andric     // the immediate offset.
16745ffd83dbSDimitry Andric 
16755ffd83dbSDimitry Andric     ImmOffset = OffsetDef->getOperand(1).getCImm()->getZExtValue();
16765ffd83dbSDimitry Andric     BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
16775ffd83dbSDimitry Andric       .addImm(0);
16785ffd83dbSDimitry Andric   } else {
1679e8d8bef9SDimitry Andric     std::tie(BaseOffset, ImmOffset) =
168006c3fb27SDimitry Andric         AMDGPU::getBaseWithConstantOffset(*MRI, BaseOffset, KB);
16815ffd83dbSDimitry Andric 
16825ffd83dbSDimitry Andric     if (Readfirstlane) {
16835ffd83dbSDimitry Andric       // We have the constant offset now, so put the readfirstlane back on the
16845ffd83dbSDimitry Andric       // variable component.
16855ffd83dbSDimitry Andric       if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
16865ffd83dbSDimitry Andric         return false;
16875ffd83dbSDimitry Andric 
16885ffd83dbSDimitry Andric       Readfirstlane->getOperand(1).setReg(BaseOffset);
16895ffd83dbSDimitry Andric       BaseOffset = Readfirstlane->getOperand(0).getReg();
16905ffd83dbSDimitry Andric     } else {
16915ffd83dbSDimitry Andric       if (!RBI.constrainGenericRegister(BaseOffset,
16925ffd83dbSDimitry Andric                                         AMDGPU::SReg_32RegClass, *MRI))
16935ffd83dbSDimitry Andric         return false;
16945ffd83dbSDimitry Andric     }
16955ffd83dbSDimitry Andric 
16965ffd83dbSDimitry Andric     Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
16975ffd83dbSDimitry Andric     BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base)
16985ffd83dbSDimitry Andric       .addReg(BaseOffset)
16995f757f3fSDimitry Andric       .addImm(16)
17005f757f3fSDimitry Andric       .setOperandDead(3); // Dead scc
17015ffd83dbSDimitry Andric 
17025ffd83dbSDimitry Andric     BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
17035ffd83dbSDimitry Andric       .addReg(M0Base);
17045ffd83dbSDimitry Andric   }
17055ffd83dbSDimitry Andric 
17065ffd83dbSDimitry Andric   // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
17075ffd83dbSDimitry Andric   // offset field) % 64. Some versions of the programming guide omit the m0
17085ffd83dbSDimitry Andric   // part, or claim it's from offset 0.
17095ffd83dbSDimitry Andric   auto MIB = BuildMI(*MBB, &MI, DL, TII.get(gwsIntrinToOpcode(IID)));
17105ffd83dbSDimitry Andric 
17115ffd83dbSDimitry Andric   if (HasVSrc) {
17125ffd83dbSDimitry Andric     Register VSrc = MI.getOperand(1).getReg();
17135ffd83dbSDimitry Andric     MIB.addReg(VSrc);
1714fe6060f1SDimitry Andric 
17155ffd83dbSDimitry Andric     if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
17165ffd83dbSDimitry Andric       return false;
17175ffd83dbSDimitry Andric   }
17185ffd83dbSDimitry Andric 
17195ffd83dbSDimitry Andric   MIB.addImm(ImmOffset)
17205ffd83dbSDimitry Andric      .cloneMemRefs(MI);
17215ffd83dbSDimitry Andric 
172281ad6265SDimitry Andric   TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0);
172381ad6265SDimitry Andric 
17245ffd83dbSDimitry Andric   MI.eraseFromParent();
17250b57cec5SDimitry Andric   return true;
17260b57cec5SDimitry Andric }
17275ffd83dbSDimitry Andric 
17285ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,
17295ffd83dbSDimitry Andric                                                       bool IsAppend) const {
17305ffd83dbSDimitry Andric   Register PtrBase = MI.getOperand(2).getReg();
17315ffd83dbSDimitry Andric   LLT PtrTy = MRI->getType(PtrBase);
17325ffd83dbSDimitry Andric   bool IsGDS = PtrTy.getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
17335ffd83dbSDimitry Andric 
17345ffd83dbSDimitry Andric   unsigned Offset;
17355ffd83dbSDimitry Andric   std::tie(PtrBase, Offset) = selectDS1Addr1OffsetImpl(MI.getOperand(2));
17365ffd83dbSDimitry Andric 
17375ffd83dbSDimitry Andric   // TODO: Should this try to look through readfirstlane like GWS?
1738e8d8bef9SDimitry Andric   if (!isDSOffsetLegal(PtrBase, Offset)) {
17395ffd83dbSDimitry Andric     PtrBase = MI.getOperand(2).getReg();
17405ffd83dbSDimitry Andric     Offset = 0;
17415ffd83dbSDimitry Andric   }
17425ffd83dbSDimitry Andric 
17435ffd83dbSDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
17445ffd83dbSDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
17455ffd83dbSDimitry Andric   const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
17465ffd83dbSDimitry Andric 
17475ffd83dbSDimitry Andric   BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
17485ffd83dbSDimitry Andric     .addReg(PtrBase);
1749e8d8bef9SDimitry Andric   if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
1750e8d8bef9SDimitry Andric     return false;
1751e8d8bef9SDimitry Andric 
1752e8d8bef9SDimitry Andric   auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), MI.getOperand(0).getReg())
17535ffd83dbSDimitry Andric     .addImm(Offset)
17545ffd83dbSDimitry Andric     .addImm(IsGDS ? -1 : 0)
17555ffd83dbSDimitry Andric     .cloneMemRefs(MI);
17565ffd83dbSDimitry Andric   MI.eraseFromParent();
1757e8d8bef9SDimitry Andric   return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1758e8d8bef9SDimitry Andric }
1759e8d8bef9SDimitry Andric 
1760e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const {
17615f757f3fSDimitry Andric   if (TM.getOptLevel() > CodeGenOptLevel::None) {
1762e8d8bef9SDimitry Andric     unsigned WGSize = STI.getFlatWorkGroupSizes(MF->getFunction()).second;
1763e8d8bef9SDimitry Andric     if (WGSize <= STI.getWavefrontSize()) {
1764e8d8bef9SDimitry Andric       MachineBasicBlock *MBB = MI.getParent();
1765e8d8bef9SDimitry Andric       const DebugLoc &DL = MI.getDebugLoc();
1766e8d8bef9SDimitry Andric       BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::WAVE_BARRIER));
1767e8d8bef9SDimitry Andric       MI.eraseFromParent();
17685ffd83dbSDimitry Andric       return true;
17695ffd83dbSDimitry Andric     }
1770e8d8bef9SDimitry Andric   }
17715f757f3fSDimitry Andric 
17725f757f3fSDimitry Andric   // On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait
17735f757f3fSDimitry Andric   if (STI.hasSplitBarriers()) {
17745f757f3fSDimitry Andric     MachineBasicBlock *MBB = MI.getParent();
17755f757f3fSDimitry Andric     const DebugLoc &DL = MI.getDebugLoc();
17765f757f3fSDimitry Andric     BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
17775f757f3fSDimitry Andric         .addImm(AMDGPU::Barrier::WORKGROUP);
17785f757f3fSDimitry Andric     BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_WAIT))
17795f757f3fSDimitry Andric         .addImm(AMDGPU::Barrier::WORKGROUP);
17805f757f3fSDimitry Andric     MI.eraseFromParent();
17815f757f3fSDimitry Andric     return true;
17825f757f3fSDimitry Andric   }
17835f757f3fSDimitry Andric 
1784e8d8bef9SDimitry Andric   return selectImpl(MI, *CoverageInfo);
1785e8d8bef9SDimitry Andric }
17865ffd83dbSDimitry Andric 
17875ffd83dbSDimitry Andric static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE,
17885ffd83dbSDimitry Andric                          bool &IsTexFail) {
17895ffd83dbSDimitry Andric   if (TexFailCtrl)
17905ffd83dbSDimitry Andric     IsTexFail = true;
17915ffd83dbSDimitry Andric 
179204eeddc0SDimitry Andric   TFE = (TexFailCtrl & 0x1) ? true : false;
17935ffd83dbSDimitry Andric   TexFailCtrl &= ~(uint64_t)0x1;
179404eeddc0SDimitry Andric   LWE = (TexFailCtrl & 0x2) ? true : false;
17955ffd83dbSDimitry Andric   TexFailCtrl &= ~(uint64_t)0x2;
17965ffd83dbSDimitry Andric 
17975ffd83dbSDimitry Andric   return TexFailCtrl == 0;
17985ffd83dbSDimitry Andric }
17995ffd83dbSDimitry Andric 
18005ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectImageIntrinsic(
18015ffd83dbSDimitry Andric   MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
18025ffd83dbSDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
18035ffd83dbSDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
18045ffd83dbSDimitry Andric 
18055ffd83dbSDimitry Andric   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
18065ffd83dbSDimitry Andric     AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
18075ffd83dbSDimitry Andric 
18085ffd83dbSDimitry Andric   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
18095ffd83dbSDimitry Andric   unsigned IntrOpcode = Intr->BaseOpcode;
1810e8d8bef9SDimitry Andric   const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI);
181181ad6265SDimitry Andric   const bool IsGFX11Plus = AMDGPU::isGFX11Plus(STI);
18125f757f3fSDimitry Andric   const bool IsGFX12Plus = AMDGPU::isGFX12Plus(STI);
18135ffd83dbSDimitry Andric 
1814e8d8bef9SDimitry Andric   const unsigned ArgOffset = MI.getNumExplicitDefs() + 1;
18155ffd83dbSDimitry Andric 
18165ffd83dbSDimitry Andric   Register VDataIn, VDataOut;
18175ffd83dbSDimitry Andric   LLT VDataTy;
18185ffd83dbSDimitry Andric   int NumVDataDwords = -1;
181904eeddc0SDimitry Andric   bool IsD16 = MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
182004eeddc0SDimitry Andric                MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
18215ffd83dbSDimitry Andric 
18225ffd83dbSDimitry Andric   bool Unorm;
1823e8d8bef9SDimitry Andric   if (!BaseOpcode->Sampler)
18245ffd83dbSDimitry Andric     Unorm = true;
1825e8d8bef9SDimitry Andric   else
1826e8d8bef9SDimitry Andric     Unorm = MI.getOperand(ArgOffset + Intr->UnormIndex).getImm() != 0;
18275ffd83dbSDimitry Andric 
18285ffd83dbSDimitry Andric   bool TFE;
18295ffd83dbSDimitry Andric   bool LWE;
18305ffd83dbSDimitry Andric   bool IsTexFail = false;
1831e8d8bef9SDimitry Andric   if (!parseTexFail(MI.getOperand(ArgOffset + Intr->TexFailCtrlIndex).getImm(),
1832e8d8bef9SDimitry Andric                     TFE, LWE, IsTexFail))
18335ffd83dbSDimitry Andric     return false;
18345ffd83dbSDimitry Andric 
1835e8d8bef9SDimitry Andric   const int Flags = MI.getOperand(ArgOffset + Intr->NumArgs).getImm();
18365ffd83dbSDimitry Andric   const bool IsA16 = (Flags & 1) != 0;
18375ffd83dbSDimitry Andric   const bool IsG16 = (Flags & 2) != 0;
18385ffd83dbSDimitry Andric 
1839fe6060f1SDimitry Andric   // A16 implies 16 bit gradients if subtarget doesn't support G16
1840fe6060f1SDimitry Andric   if (IsA16 && !STI.hasG16() && !IsG16)
18415ffd83dbSDimitry Andric     return false;
18425ffd83dbSDimitry Andric 
18435ffd83dbSDimitry Andric   unsigned DMask = 0;
18445ffd83dbSDimitry Andric   unsigned DMaskLanes = 0;
18455ffd83dbSDimitry Andric 
18465ffd83dbSDimitry Andric   if (BaseOpcode->Atomic) {
18475ffd83dbSDimitry Andric     VDataOut = MI.getOperand(0).getReg();
18485ffd83dbSDimitry Andric     VDataIn = MI.getOperand(2).getReg();
18495ffd83dbSDimitry Andric     LLT Ty = MRI->getType(VDataIn);
18505ffd83dbSDimitry Andric 
18515ffd83dbSDimitry Andric     // Be careful to allow atomic swap on 16-bit element vectors.
18525ffd83dbSDimitry Andric     const bool Is64Bit = BaseOpcode->AtomicX2 ?
18535ffd83dbSDimitry Andric       Ty.getSizeInBits() == 128 :
18545ffd83dbSDimitry Andric       Ty.getSizeInBits() == 64;
18555ffd83dbSDimitry Andric 
18565ffd83dbSDimitry Andric     if (BaseOpcode->AtomicX2) {
18575ffd83dbSDimitry Andric       assert(MI.getOperand(3).getReg() == AMDGPU::NoRegister);
18585ffd83dbSDimitry Andric 
18595ffd83dbSDimitry Andric       DMask = Is64Bit ? 0xf : 0x3;
18605ffd83dbSDimitry Andric       NumVDataDwords = Is64Bit ? 4 : 2;
18615ffd83dbSDimitry Andric     } else {
18625ffd83dbSDimitry Andric       DMask = Is64Bit ? 0x3 : 0x1;
18635ffd83dbSDimitry Andric       NumVDataDwords = Is64Bit ? 2 : 1;
18645ffd83dbSDimitry Andric     }
18655ffd83dbSDimitry Andric   } else {
1866e8d8bef9SDimitry Andric     DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm();
1867bdd1243dSDimitry Andric     DMaskLanes = BaseOpcode->Gather4 ? 4 : llvm::popcount(DMask);
18685ffd83dbSDimitry Andric 
18695ffd83dbSDimitry Andric     if (BaseOpcode->Store) {
18705ffd83dbSDimitry Andric       VDataIn = MI.getOperand(1).getReg();
18715ffd83dbSDimitry Andric       VDataTy = MRI->getType(VDataIn);
18725ffd83dbSDimitry Andric       NumVDataDwords = (VDataTy.getSizeInBits() + 31) / 32;
1873*0fca6ea1SDimitry Andric     } else if (BaseOpcode->NoReturn) {
1874*0fca6ea1SDimitry Andric       NumVDataDwords = 0;
18755ffd83dbSDimitry Andric     } else {
18765ffd83dbSDimitry Andric       VDataOut = MI.getOperand(0).getReg();
18775ffd83dbSDimitry Andric       VDataTy = MRI->getType(VDataOut);
18785ffd83dbSDimitry Andric       NumVDataDwords = DMaskLanes;
18795ffd83dbSDimitry Andric 
18805ffd83dbSDimitry Andric       if (IsD16 && !STI.hasUnpackedD16VMem())
18815ffd83dbSDimitry Andric         NumVDataDwords = (DMaskLanes + 1) / 2;
18825ffd83dbSDimitry Andric     }
18835ffd83dbSDimitry Andric   }
18845ffd83dbSDimitry Andric 
18855ffd83dbSDimitry Andric   // Set G16 opcode
188606c3fb27SDimitry Andric   if (Subtarget->hasG16() && IsG16) {
18875ffd83dbSDimitry Andric     const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
18885ffd83dbSDimitry Andric         AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode);
18895ffd83dbSDimitry Andric     assert(G16MappingInfo);
18905ffd83dbSDimitry Andric     IntrOpcode = G16MappingInfo->G16; // set opcode to variant with _g16
18915ffd83dbSDimitry Andric   }
18925ffd83dbSDimitry Andric 
18935ffd83dbSDimitry Andric   // TODO: Check this in verifier.
18945ffd83dbSDimitry Andric   assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this");
18955ffd83dbSDimitry Andric 
1896fe6060f1SDimitry Andric   unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
1897fe6060f1SDimitry Andric   if (BaseOpcode->Atomic)
1898fe6060f1SDimitry Andric     CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
18997a6dacacSDimitry Andric   if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) |
19007a6dacacSDimitry Andric                AMDGPU::CPol::VOLATILE))
19015ffd83dbSDimitry Andric     return false;
19025ffd83dbSDimitry Andric 
19035ffd83dbSDimitry Andric   int NumVAddrRegs = 0;
19045ffd83dbSDimitry Andric   int NumVAddrDwords = 0;
1905e8d8bef9SDimitry Andric   for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) {
19065ffd83dbSDimitry Andric     // Skip the $noregs and 0s inserted during legalization.
1907e8d8bef9SDimitry Andric     MachineOperand &AddrOp = MI.getOperand(ArgOffset + I);
19085ffd83dbSDimitry Andric     if (!AddrOp.isReg())
19095ffd83dbSDimitry Andric       continue; // XXX - Break?
19105ffd83dbSDimitry Andric 
19115ffd83dbSDimitry Andric     Register Addr = AddrOp.getReg();
19125ffd83dbSDimitry Andric     if (!Addr)
19135ffd83dbSDimitry Andric       break;
19145ffd83dbSDimitry Andric 
19155ffd83dbSDimitry Andric     ++NumVAddrRegs;
19165ffd83dbSDimitry Andric     NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
19175ffd83dbSDimitry Andric   }
19185ffd83dbSDimitry Andric 
19195ffd83dbSDimitry Andric   // The legalizer preprocessed the intrinsic arguments. If we aren't using
192081ad6265SDimitry Andric   // NSA, these should have been packed into a single value in the first
19215ffd83dbSDimitry Andric   // address register
192206c3fb27SDimitry Andric   const bool UseNSA =
192306c3fb27SDimitry Andric       NumVAddrRegs != 1 &&
192406c3fb27SDimitry Andric       (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
192506c3fb27SDimitry Andric                                    : NumVAddrDwords == NumVAddrRegs);
19265ffd83dbSDimitry Andric   if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
19275ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << "Trying to use NSA on non-NSA target\n");
19285ffd83dbSDimitry Andric     return false;
19295ffd83dbSDimitry Andric   }
19305ffd83dbSDimitry Andric 
19315ffd83dbSDimitry Andric   if (IsTexFail)
19325ffd83dbSDimitry Andric     ++NumVDataDwords;
19335ffd83dbSDimitry Andric 
19345ffd83dbSDimitry Andric   int Opcode = -1;
19355f757f3fSDimitry Andric   if (IsGFX12Plus) {
19365f757f3fSDimitry Andric     Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx12,
19375f757f3fSDimitry Andric                                    NumVDataDwords, NumVAddrDwords);
19385f757f3fSDimitry Andric   } else if (IsGFX11Plus) {
193981ad6265SDimitry Andric     Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
194081ad6265SDimitry Andric                                    UseNSA ? AMDGPU::MIMGEncGfx11NSA
194181ad6265SDimitry Andric                                           : AMDGPU::MIMGEncGfx11Default,
194281ad6265SDimitry Andric                                    NumVDataDwords, NumVAddrDwords);
194381ad6265SDimitry Andric   } else if (IsGFX10Plus) {
19445ffd83dbSDimitry Andric     Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
19455ffd83dbSDimitry Andric                                    UseNSA ? AMDGPU::MIMGEncGfx10NSA
19465ffd83dbSDimitry Andric                                           : AMDGPU::MIMGEncGfx10Default,
19475ffd83dbSDimitry Andric                                    NumVDataDwords, NumVAddrDwords);
19485ffd83dbSDimitry Andric   } else {
194981ad6265SDimitry Andric     if (Subtarget->hasGFX90AInsts()) {
195081ad6265SDimitry Andric       Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a,
195181ad6265SDimitry Andric                                      NumVDataDwords, NumVAddrDwords);
195281ad6265SDimitry Andric       if (Opcode == -1) {
195381ad6265SDimitry Andric         LLVM_DEBUG(
195481ad6265SDimitry Andric             dbgs()
195581ad6265SDimitry Andric             << "requested image instruction is not supported on this GPU\n");
195681ad6265SDimitry Andric         return false;
195781ad6265SDimitry Andric       }
195881ad6265SDimitry Andric     }
195981ad6265SDimitry Andric     if (Opcode == -1 &&
196081ad6265SDimitry Andric         STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
19615ffd83dbSDimitry Andric       Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
19625ffd83dbSDimitry Andric                                      NumVDataDwords, NumVAddrDwords);
19635ffd83dbSDimitry Andric     if (Opcode == -1)
19645ffd83dbSDimitry Andric       Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
19655ffd83dbSDimitry Andric                                      NumVDataDwords, NumVAddrDwords);
19665ffd83dbSDimitry Andric   }
196706c3fb27SDimitry Andric   if (Opcode == -1)
196806c3fb27SDimitry Andric     return false;
19695ffd83dbSDimitry Andric 
19705ffd83dbSDimitry Andric   auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opcode))
19715ffd83dbSDimitry Andric     .cloneMemRefs(MI);
19725ffd83dbSDimitry Andric 
19735ffd83dbSDimitry Andric   if (VDataOut) {
19745ffd83dbSDimitry Andric     if (BaseOpcode->AtomicX2) {
19755ffd83dbSDimitry Andric       const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
19765ffd83dbSDimitry Andric 
19775ffd83dbSDimitry Andric       Register TmpReg = MRI->createVirtualRegister(
19785ffd83dbSDimitry Andric         Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
19795ffd83dbSDimitry Andric       unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
19805ffd83dbSDimitry Andric 
19815ffd83dbSDimitry Andric       MIB.addDef(TmpReg);
1982fe6060f1SDimitry Andric       if (!MRI->use_empty(VDataOut)) {
19835ffd83dbSDimitry Andric         BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), VDataOut)
19845ffd83dbSDimitry Andric             .addReg(TmpReg, RegState::Kill, SubReg);
1985fe6060f1SDimitry Andric       }
19865ffd83dbSDimitry Andric 
19875ffd83dbSDimitry Andric     } else {
19885ffd83dbSDimitry Andric       MIB.addDef(VDataOut); // vdata output
19895ffd83dbSDimitry Andric     }
19905ffd83dbSDimitry Andric   }
19915ffd83dbSDimitry Andric 
19925ffd83dbSDimitry Andric   if (VDataIn)
19935ffd83dbSDimitry Andric     MIB.addReg(VDataIn); // vdata input
19945ffd83dbSDimitry Andric 
1995e8d8bef9SDimitry Andric   for (int I = 0; I != NumVAddrRegs; ++I) {
1996e8d8bef9SDimitry Andric     MachineOperand &SrcOp = MI.getOperand(ArgOffset + Intr->VAddrStart + I);
19975ffd83dbSDimitry Andric     if (SrcOp.isReg()) {
19985ffd83dbSDimitry Andric       assert(SrcOp.getReg() != 0);
19995ffd83dbSDimitry Andric       MIB.addReg(SrcOp.getReg());
20005ffd83dbSDimitry Andric     }
20015ffd83dbSDimitry Andric   }
20025ffd83dbSDimitry Andric 
2003e8d8bef9SDimitry Andric   MIB.addReg(MI.getOperand(ArgOffset + Intr->RsrcIndex).getReg());
20045ffd83dbSDimitry Andric   if (BaseOpcode->Sampler)
2005e8d8bef9SDimitry Andric     MIB.addReg(MI.getOperand(ArgOffset + Intr->SampIndex).getReg());
20065ffd83dbSDimitry Andric 
20075ffd83dbSDimitry Andric   MIB.addImm(DMask); // dmask
20085ffd83dbSDimitry Andric 
2009e8d8bef9SDimitry Andric   if (IsGFX10Plus)
20105ffd83dbSDimitry Andric     MIB.addImm(DimInfo->Encoding);
20115f757f3fSDimitry Andric   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::unorm))
20125ffd83dbSDimitry Andric     MIB.addImm(Unorm);
20135ffd83dbSDimitry Andric 
2014fe6060f1SDimitry Andric   MIB.addImm(CPol);
20155ffd83dbSDimitry Andric   MIB.addImm(IsA16 &&  // a16 or r128
20165ffd83dbSDimitry Andric              STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2017e8d8bef9SDimitry Andric   if (IsGFX10Plus)
20185ffd83dbSDimitry Andric     MIB.addImm(IsA16 ? -1 : 0);
20195ffd83dbSDimitry Andric 
202081ad6265SDimitry Andric   if (!Subtarget->hasGFX90AInsts()) {
20215ffd83dbSDimitry Andric     MIB.addImm(TFE); // tfe
202281ad6265SDimitry Andric   } else if (TFE) {
202381ad6265SDimitry Andric     LLVM_DEBUG(dbgs() << "TFE is not supported on this GPU\n");
202481ad6265SDimitry Andric     return false;
202581ad6265SDimitry Andric   }
202681ad6265SDimitry Andric 
20275f757f3fSDimitry Andric   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::lwe))
20285ffd83dbSDimitry Andric     MIB.addImm(LWE); // lwe
2029e8d8bef9SDimitry Andric   if (!IsGFX10Plus)
20305ffd83dbSDimitry Andric     MIB.addImm(DimInfo->DA ? -1 : 0);
20315ffd83dbSDimitry Andric   if (BaseOpcode->HasD16)
20325ffd83dbSDimitry Andric     MIB.addImm(IsD16 ? -1 : 0);
20335ffd83dbSDimitry Andric 
20345ffd83dbSDimitry Andric   MI.eraseFromParent();
203581ad6265SDimitry Andric   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
203681ad6265SDimitry Andric   TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
203781ad6265SDimitry Andric   return true;
20385ffd83dbSDimitry Andric }
20395ffd83dbSDimitry Andric 
2040bdd1243dSDimitry Andric // We need to handle this here because tablegen doesn't support matching
2041bdd1243dSDimitry Andric // instructions with multiple outputs.
2042bdd1243dSDimitry Andric bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2043bdd1243dSDimitry Andric     MachineInstr &MI) const {
2044bdd1243dSDimitry Andric   Register Dst0 = MI.getOperand(0).getReg();
2045bdd1243dSDimitry Andric   Register Dst1 = MI.getOperand(1).getReg();
2046bdd1243dSDimitry Andric 
2047bdd1243dSDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
2048bdd1243dSDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
2049bdd1243dSDimitry Andric 
2050bdd1243dSDimitry Andric   Register Addr = MI.getOperand(3).getReg();
2051bdd1243dSDimitry Andric   Register Data0 = MI.getOperand(4).getReg();
2052bdd1243dSDimitry Andric   Register Data1 = MI.getOperand(5).getReg();
2053bdd1243dSDimitry Andric   unsigned Offset = MI.getOperand(6).getImm();
2054bdd1243dSDimitry Andric 
2055bdd1243dSDimitry Andric   auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2056bdd1243dSDimitry Andric                  .addDef(Dst1)
2057bdd1243dSDimitry Andric                  .addUse(Addr)
2058bdd1243dSDimitry Andric                  .addUse(Data0)
2059bdd1243dSDimitry Andric                  .addUse(Data1)
2060bdd1243dSDimitry Andric                  .addImm(Offset)
2061bdd1243dSDimitry Andric                  .cloneMemRefs(MI);
2062bdd1243dSDimitry Andric 
2063bdd1243dSDimitry Andric   MI.eraseFromParent();
2064bdd1243dSDimitry Andric   return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
2065bdd1243dSDimitry Andric }
2066bdd1243dSDimitry Andric 
20675ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
20685ffd83dbSDimitry Andric     MachineInstr &I) const {
2069*0fca6ea1SDimitry Andric   Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
20705ffd83dbSDimitry Andric   switch (IntrinsicID) {
20715ffd83dbSDimitry Andric   case Intrinsic::amdgcn_end_cf:
20725ffd83dbSDimitry Andric     return selectEndCfIntrinsic(I);
2073480093f4SDimitry Andric   case Intrinsic::amdgcn_ds_ordered_add:
2074480093f4SDimitry Andric   case Intrinsic::amdgcn_ds_ordered_swap:
2075480093f4SDimitry Andric     return selectDSOrderedIntrinsic(I, IntrinsicID);
20765ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_init:
20775ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_barrier:
20785ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_v:
20795ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_br:
20805ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_p:
20815ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_gws_sema_release_all:
20825ffd83dbSDimitry Andric     return selectDSGWSIntrinsic(I, IntrinsicID);
20835ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_append:
20845ffd83dbSDimitry Andric     return selectDSAppendConsume(I, true);
20855ffd83dbSDimitry Andric   case Intrinsic::amdgcn_ds_consume:
20865ffd83dbSDimitry Andric     return selectDSAppendConsume(I, false);
2087e8d8bef9SDimitry Andric   case Intrinsic::amdgcn_s_barrier:
2088e8d8bef9SDimitry Andric     return selectSBarrier(I);
208981ad6265SDimitry Andric   case Intrinsic::amdgcn_raw_buffer_load_lds:
209006c3fb27SDimitry Andric   case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
209181ad6265SDimitry Andric   case Intrinsic::amdgcn_struct_buffer_load_lds:
209206c3fb27SDimitry Andric   case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
209381ad6265SDimitry Andric     return selectBufferLoadLds(I);
209481ad6265SDimitry Andric   case Intrinsic::amdgcn_global_load_lds:
209581ad6265SDimitry Andric     return selectGlobalLoadLds(I);
209681ad6265SDimitry Andric   case Intrinsic::amdgcn_exp_compr:
209781ad6265SDimitry Andric     if (!STI.hasCompressedExport()) {
209881ad6265SDimitry Andric       Function &F = I.getMF()->getFunction();
209981ad6265SDimitry Andric       DiagnosticInfoUnsupported NoFpRet(
210081ad6265SDimitry Andric           F, "intrinsic not supported on subtarget", I.getDebugLoc(), DS_Error);
210181ad6265SDimitry Andric       F.getContext().diagnose(NoFpRet);
210281ad6265SDimitry Andric       return false;
210381ad6265SDimitry Andric     }
210481ad6265SDimitry Andric     break;
2105bdd1243dSDimitry Andric   case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2106bdd1243dSDimitry Andric     return selectDSBvhStackIntrinsic(I);
21075f757f3fSDimitry Andric   case Intrinsic::amdgcn_s_barrier_init:
21085f757f3fSDimitry Andric   case Intrinsic::amdgcn_s_barrier_join:
21095f757f3fSDimitry Andric   case Intrinsic::amdgcn_s_wakeup_barrier:
21105f757f3fSDimitry Andric   case Intrinsic::amdgcn_s_get_barrier_state:
21115f757f3fSDimitry Andric     return selectNamedBarrierInst(I, IntrinsicID);
21125f757f3fSDimitry Andric   case Intrinsic::amdgcn_s_barrier_signal_isfirst:
21135f757f3fSDimitry Andric   case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
21145f757f3fSDimitry Andric     return selectSBarrierSignalIsfirst(I, IntrinsicID);
21155f757f3fSDimitry Andric   case Intrinsic::amdgcn_s_barrier_leave:
21165f757f3fSDimitry Andric     return selectSBarrierLeave(I);
211781ad6265SDimitry Andric   }
21188bcb0991SDimitry Andric   return selectImpl(I, *CoverageInfo);
21190b57cec5SDimitry Andric }
21200b57cec5SDimitry Andric 
21210b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
21225ffd83dbSDimitry Andric   if (selectImpl(I, *CoverageInfo))
21235ffd83dbSDimitry Andric     return true;
21245ffd83dbSDimitry Andric 
21250b57cec5SDimitry Andric   MachineBasicBlock *BB = I.getParent();
21260b57cec5SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
21270b57cec5SDimitry Andric 
21288bcb0991SDimitry Andric   Register DstReg = I.getOperand(0).getReg();
21298bcb0991SDimitry Andric   unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
21300b57cec5SDimitry Andric   assert(Size <= 32 || Size == 64);
21310b57cec5SDimitry Andric   const MachineOperand &CCOp = I.getOperand(1);
21328bcb0991SDimitry Andric   Register CCReg = CCOp.getReg();
2133480093f4SDimitry Andric   if (!isVCC(CCReg, *MRI)) {
21340b57cec5SDimitry Andric     unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 :
21350b57cec5SDimitry Andric                                          AMDGPU::S_CSELECT_B32;
21360b57cec5SDimitry Andric     MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
21370b57cec5SDimitry Andric             .addReg(CCReg);
21380b57cec5SDimitry Andric 
21390b57cec5SDimitry Andric     // The generic constrainSelectedInstRegOperands doesn't work for the scc register
21400b57cec5SDimitry Andric     // bank, because it does not cover the register class that we used to represent
21410b57cec5SDimitry Andric     // for it.  So we need to manually set the register class here.
21428bcb0991SDimitry Andric     if (!MRI->getRegClassOrNull(CCReg))
21438bcb0991SDimitry Andric         MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
21440b57cec5SDimitry Andric     MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
21450b57cec5SDimitry Andric             .add(I.getOperand(2))
21460b57cec5SDimitry Andric             .add(I.getOperand(3));
21470b57cec5SDimitry Andric 
2148349cc55cSDimitry Andric     bool Ret = false;
2149349cc55cSDimitry Andric     Ret |= constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
2150349cc55cSDimitry Andric     Ret |= constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
21510b57cec5SDimitry Andric     I.eraseFromParent();
21520b57cec5SDimitry Andric     return Ret;
21530b57cec5SDimitry Andric   }
21540b57cec5SDimitry Andric 
21550b57cec5SDimitry Andric   // Wide VGPR select should have been split in RegBankSelect.
21560b57cec5SDimitry Andric   if (Size > 32)
21570b57cec5SDimitry Andric     return false;
21580b57cec5SDimitry Andric 
21590b57cec5SDimitry Andric   MachineInstr *Select =
21600b57cec5SDimitry Andric       BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
21610b57cec5SDimitry Andric               .addImm(0)
21620b57cec5SDimitry Andric               .add(I.getOperand(3))
21630b57cec5SDimitry Andric               .addImm(0)
21640b57cec5SDimitry Andric               .add(I.getOperand(2))
21650b57cec5SDimitry Andric               .add(I.getOperand(1));
21660b57cec5SDimitry Andric 
21670b57cec5SDimitry Andric   bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
21680b57cec5SDimitry Andric   I.eraseFromParent();
21690b57cec5SDimitry Andric   return Ret;
21700b57cec5SDimitry Andric }
21710b57cec5SDimitry Andric 
21720b57cec5SDimitry Andric static int sizeToSubRegIndex(unsigned Size) {
21730b57cec5SDimitry Andric   switch (Size) {
21740b57cec5SDimitry Andric   case 32:
21750b57cec5SDimitry Andric     return AMDGPU::sub0;
21760b57cec5SDimitry Andric   case 64:
21770b57cec5SDimitry Andric     return AMDGPU::sub0_sub1;
21780b57cec5SDimitry Andric   case 96:
21790b57cec5SDimitry Andric     return AMDGPU::sub0_sub1_sub2;
21800b57cec5SDimitry Andric   case 128:
21810b57cec5SDimitry Andric     return AMDGPU::sub0_sub1_sub2_sub3;
21820b57cec5SDimitry Andric   case 256:
21830b57cec5SDimitry Andric     return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
21840b57cec5SDimitry Andric   default:
21850b57cec5SDimitry Andric     if (Size < 32)
21860b57cec5SDimitry Andric       return AMDGPU::sub0;
21870b57cec5SDimitry Andric     if (Size > 256)
21880b57cec5SDimitry Andric       return -1;
218906c3fb27SDimitry Andric     return sizeToSubRegIndex(llvm::bit_ceil(Size));
21900b57cec5SDimitry Andric   }
21910b57cec5SDimitry Andric }
21920b57cec5SDimitry Andric 
21930b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
21948bcb0991SDimitry Andric   Register DstReg = I.getOperand(0).getReg();
21958bcb0991SDimitry Andric   Register SrcReg = I.getOperand(1).getReg();
21968bcb0991SDimitry Andric   const LLT DstTy = MRI->getType(DstReg);
21978bcb0991SDimitry Andric   const LLT SrcTy = MRI->getType(SrcReg);
2198480093f4SDimitry Andric   const LLT S1 = LLT::scalar(1);
2199480093f4SDimitry Andric 
22008bcb0991SDimitry Andric   const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2201480093f4SDimitry Andric   const RegisterBank *DstRB;
2202480093f4SDimitry Andric   if (DstTy == S1) {
2203480093f4SDimitry Andric     // This is a special case. We don't treat s1 for legalization artifacts as
2204480093f4SDimitry Andric     // vcc booleans.
2205480093f4SDimitry Andric     DstRB = SrcRB;
2206480093f4SDimitry Andric   } else {
2207480093f4SDimitry Andric     DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
22080b57cec5SDimitry Andric     if (SrcRB != DstRB)
22090b57cec5SDimitry Andric       return false;
2210480093f4SDimitry Andric   }
22110b57cec5SDimitry Andric 
22125ffd83dbSDimitry Andric   const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
22135ffd83dbSDimitry Andric 
22140b57cec5SDimitry Andric   unsigned DstSize = DstTy.getSizeInBits();
22150b57cec5SDimitry Andric   unsigned SrcSize = SrcTy.getSizeInBits();
22160b57cec5SDimitry Andric 
221781ad6265SDimitry Andric   const TargetRegisterClass *SrcRC =
221881ad6265SDimitry Andric       TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
221981ad6265SDimitry Andric   const TargetRegisterClass *DstRC =
222081ad6265SDimitry Andric       TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
22215ffd83dbSDimitry Andric   if (!SrcRC || !DstRC)
22225ffd83dbSDimitry Andric     return false;
22235ffd83dbSDimitry Andric 
22245ffd83dbSDimitry Andric   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
22255ffd83dbSDimitry Andric       !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
22265ffd83dbSDimitry Andric     LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
22275ffd83dbSDimitry Andric     return false;
22285ffd83dbSDimitry Andric   }
22295ffd83dbSDimitry Andric 
2230fe6060f1SDimitry Andric   if (DstTy == LLT::fixed_vector(2, 16) && SrcTy == LLT::fixed_vector(2, 32)) {
22315ffd83dbSDimitry Andric     MachineBasicBlock *MBB = I.getParent();
22325ffd83dbSDimitry Andric     const DebugLoc &DL = I.getDebugLoc();
22335ffd83dbSDimitry Andric 
22345ffd83dbSDimitry Andric     Register LoReg = MRI->createVirtualRegister(DstRC);
22355ffd83dbSDimitry Andric     Register HiReg = MRI->createVirtualRegister(DstRC);
22365ffd83dbSDimitry Andric     BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), LoReg)
22375ffd83dbSDimitry Andric       .addReg(SrcReg, 0, AMDGPU::sub0);
22385ffd83dbSDimitry Andric     BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), HiReg)
22395ffd83dbSDimitry Andric       .addReg(SrcReg, 0, AMDGPU::sub1);
22405ffd83dbSDimitry Andric 
22415ffd83dbSDimitry Andric     if (IsVALU && STI.hasSDWA()) {
22425ffd83dbSDimitry Andric       // Write the low 16-bits of the high element into the high 16-bits of the
22435ffd83dbSDimitry Andric       // low element.
22445ffd83dbSDimitry Andric       MachineInstr *MovSDWA =
22455ffd83dbSDimitry Andric         BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
22465ffd83dbSDimitry Andric         .addImm(0)                             // $src0_modifiers
22475ffd83dbSDimitry Andric         .addReg(HiReg)                         // $src0
22485ffd83dbSDimitry Andric         .addImm(0)                             // $clamp
22495ffd83dbSDimitry Andric         .addImm(AMDGPU::SDWA::WORD_1)          // $dst_sel
22505ffd83dbSDimitry Andric         .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused
22515ffd83dbSDimitry Andric         .addImm(AMDGPU::SDWA::WORD_0)          // $src0_sel
22525ffd83dbSDimitry Andric         .addReg(LoReg, RegState::Implicit);
22535ffd83dbSDimitry Andric       MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1);
22545ffd83dbSDimitry Andric     } else {
22555ffd83dbSDimitry Andric       Register TmpReg0 = MRI->createVirtualRegister(DstRC);
22565ffd83dbSDimitry Andric       Register TmpReg1 = MRI->createVirtualRegister(DstRC);
22575ffd83dbSDimitry Andric       Register ImmReg = MRI->createVirtualRegister(DstRC);
22585ffd83dbSDimitry Andric       if (IsVALU) {
22595ffd83dbSDimitry Andric         BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
22605ffd83dbSDimitry Andric           .addImm(16)
22615ffd83dbSDimitry Andric           .addReg(HiReg);
22625ffd83dbSDimitry Andric       } else {
22635ffd83dbSDimitry Andric         BuildMI(*MBB, I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0)
22645ffd83dbSDimitry Andric           .addReg(HiReg)
22655f757f3fSDimitry Andric           .addImm(16)
22665f757f3fSDimitry Andric           .setOperandDead(3); // Dead scc
22675ffd83dbSDimitry Andric       }
22685ffd83dbSDimitry Andric 
22695ffd83dbSDimitry Andric       unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
22705ffd83dbSDimitry Andric       unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
22715ffd83dbSDimitry Andric       unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
22725ffd83dbSDimitry Andric 
22735ffd83dbSDimitry Andric       BuildMI(*MBB, I, DL, TII.get(MovOpc), ImmReg)
22745ffd83dbSDimitry Andric         .addImm(0xffff);
22755f757f3fSDimitry Andric       auto And = BuildMI(*MBB, I, DL, TII.get(AndOpc), TmpReg1)
22765ffd83dbSDimitry Andric         .addReg(LoReg)
22775ffd83dbSDimitry Andric         .addReg(ImmReg);
22785f757f3fSDimitry Andric       auto Or = BuildMI(*MBB, I, DL, TII.get(OrOpc), DstReg)
22795ffd83dbSDimitry Andric         .addReg(TmpReg0)
22805ffd83dbSDimitry Andric         .addReg(TmpReg1);
22815f757f3fSDimitry Andric 
22825f757f3fSDimitry Andric       if (!IsVALU) {
22835f757f3fSDimitry Andric         And.setOperandDead(3); // Dead scc
22845f757f3fSDimitry Andric         Or.setOperandDead(3); // Dead scc
22855f757f3fSDimitry Andric       }
22865ffd83dbSDimitry Andric     }
22875ffd83dbSDimitry Andric 
22885ffd83dbSDimitry Andric     I.eraseFromParent();
22895ffd83dbSDimitry Andric     return true;
22905ffd83dbSDimitry Andric   }
22915ffd83dbSDimitry Andric 
22925ffd83dbSDimitry Andric   if (!DstTy.isScalar())
22935ffd83dbSDimitry Andric     return false;
22940b57cec5SDimitry Andric 
22950b57cec5SDimitry Andric   if (SrcSize > 32) {
22960b57cec5SDimitry Andric     int SubRegIdx = sizeToSubRegIndex(DstSize);
22970b57cec5SDimitry Andric     if (SubRegIdx == -1)
22980b57cec5SDimitry Andric       return false;
22990b57cec5SDimitry Andric 
23000b57cec5SDimitry Andric     // Deal with weird cases where the class only partially supports the subreg
23010b57cec5SDimitry Andric     // index.
23025ffd83dbSDimitry Andric     const TargetRegisterClass *SrcWithSubRC
23035ffd83dbSDimitry Andric       = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
23045ffd83dbSDimitry Andric     if (!SrcWithSubRC)
23050b57cec5SDimitry Andric       return false;
23060b57cec5SDimitry Andric 
23075ffd83dbSDimitry Andric     if (SrcWithSubRC != SrcRC) {
23085ffd83dbSDimitry Andric       if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
23095ffd83dbSDimitry Andric         return false;
23100b57cec5SDimitry Andric     }
23110b57cec5SDimitry Andric 
23125ffd83dbSDimitry Andric     I.getOperand(1).setSubReg(SubRegIdx);
23130b57cec5SDimitry Andric   }
23140b57cec5SDimitry Andric 
23150b57cec5SDimitry Andric   I.setDesc(TII.get(TargetOpcode::COPY));
23160b57cec5SDimitry Andric   return true;
23170b57cec5SDimitry Andric }
23180b57cec5SDimitry Andric 
23190b57cec5SDimitry Andric /// \returns true if a bitmask for \p Size bits will be an inline immediate.
23200b57cec5SDimitry Andric static bool shouldUseAndMask(unsigned Size, unsigned &Mask) {
23210b57cec5SDimitry Andric   Mask = maskTrailingOnes<unsigned>(Size);
23220b57cec5SDimitry Andric   int SignedMask = static_cast<int>(Mask);
23230b57cec5SDimitry Andric   return SignedMask >= -16 && SignedMask <= 64;
23240b57cec5SDimitry Andric }
23250b57cec5SDimitry Andric 
2326480093f4SDimitry Andric // Like RegisterBankInfo::getRegBank, but don't assume vcc for s1.
2327480093f4SDimitry Andric const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2328480093f4SDimitry Andric   Register Reg, const MachineRegisterInfo &MRI,
2329480093f4SDimitry Andric   const TargetRegisterInfo &TRI) const {
2330480093f4SDimitry Andric   const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
2331480093f4SDimitry Andric   if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>())
2332480093f4SDimitry Andric     return RB;
2333480093f4SDimitry Andric 
2334480093f4SDimitry Andric   // Ignore the type, since we don't use vcc in artifacts.
2335480093f4SDimitry Andric   if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
2336480093f4SDimitry Andric     return &RBI.getRegBankFromRegClass(*RC, LLT());
2337480093f4SDimitry Andric   return nullptr;
2338480093f4SDimitry Andric }
2339480093f4SDimitry Andric 
23400b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const {
23415ffd83dbSDimitry Andric   bool InReg = I.getOpcode() == AMDGPU::G_SEXT_INREG;
23425ffd83dbSDimitry Andric   bool Signed = I.getOpcode() == AMDGPU::G_SEXT || InReg;
23430b57cec5SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
23440b57cec5SDimitry Andric   MachineBasicBlock &MBB = *I.getParent();
23458bcb0991SDimitry Andric   const Register DstReg = I.getOperand(0).getReg();
23468bcb0991SDimitry Andric   const Register SrcReg = I.getOperand(1).getReg();
23470b57cec5SDimitry Andric 
23488bcb0991SDimitry Andric   const LLT DstTy = MRI->getType(DstReg);
23498bcb0991SDimitry Andric   const LLT SrcTy = MRI->getType(SrcReg);
23505ffd83dbSDimitry Andric   const unsigned SrcSize = I.getOpcode() == AMDGPU::G_SEXT_INREG ?
23515ffd83dbSDimitry Andric     I.getOperand(2).getImm() : SrcTy.getSizeInBits();
23520b57cec5SDimitry Andric   const unsigned DstSize = DstTy.getSizeInBits();
23530b57cec5SDimitry Andric   if (!DstTy.isScalar())
23540b57cec5SDimitry Andric     return false;
23550b57cec5SDimitry Andric 
2356480093f4SDimitry Andric   // Artifact casts should never use vcc.
2357480093f4SDimitry Andric   const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2358480093f4SDimitry Andric 
2359e8d8bef9SDimitry Andric   // FIXME: This should probably be illegal and split earlier.
2360e8d8bef9SDimitry Andric   if (I.getOpcode() == AMDGPU::G_ANYEXT) {
2361e8d8bef9SDimitry Andric     if (DstSize <= 32)
2362e8d8bef9SDimitry Andric       return selectCOPY(I);
2363e8d8bef9SDimitry Andric 
2364e8d8bef9SDimitry Andric     const TargetRegisterClass *SrcRC =
236581ad6265SDimitry Andric         TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2366e8d8bef9SDimitry Andric     const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2367e8d8bef9SDimitry Andric     const TargetRegisterClass *DstRC =
236881ad6265SDimitry Andric         TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2369e8d8bef9SDimitry Andric 
2370e8d8bef9SDimitry Andric     Register UndefReg = MRI->createVirtualRegister(SrcRC);
2371e8d8bef9SDimitry Andric     BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2372e8d8bef9SDimitry Andric     BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2373e8d8bef9SDimitry Andric       .addReg(SrcReg)
2374e8d8bef9SDimitry Andric       .addImm(AMDGPU::sub0)
2375e8d8bef9SDimitry Andric       .addReg(UndefReg)
2376e8d8bef9SDimitry Andric       .addImm(AMDGPU::sub1);
2377e8d8bef9SDimitry Andric     I.eraseFromParent();
2378e8d8bef9SDimitry Andric 
2379e8d8bef9SDimitry Andric     return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2380e8d8bef9SDimitry Andric            RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2381e8d8bef9SDimitry Andric   }
2382e8d8bef9SDimitry Andric 
23830b57cec5SDimitry Andric   if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
23840b57cec5SDimitry Andric     // 64-bit should have been split up in RegBankSelect
23850b57cec5SDimitry Andric 
23860b57cec5SDimitry Andric     // Try to use an and with a mask if it will save code size.
23870b57cec5SDimitry Andric     unsigned Mask;
23880b57cec5SDimitry Andric     if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
23890b57cec5SDimitry Andric       MachineInstr *ExtI =
23900b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg)
23910b57cec5SDimitry Andric         .addImm(Mask)
23920b57cec5SDimitry Andric         .addReg(SrcReg);
23938bcb0991SDimitry Andric       I.eraseFromParent();
23940b57cec5SDimitry Andric       return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
23950b57cec5SDimitry Andric     }
23960b57cec5SDimitry Andric 
2397e8d8bef9SDimitry Andric     const unsigned BFE = Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
23980b57cec5SDimitry Andric     MachineInstr *ExtI =
23990b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII.get(BFE), DstReg)
24000b57cec5SDimitry Andric       .addReg(SrcReg)
24010b57cec5SDimitry Andric       .addImm(0) // Offset
24020b57cec5SDimitry Andric       .addImm(SrcSize); // Width
24038bcb0991SDimitry Andric     I.eraseFromParent();
24040b57cec5SDimitry Andric     return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
24050b57cec5SDimitry Andric   }
24060b57cec5SDimitry Andric 
24070b57cec5SDimitry Andric   if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
24085ffd83dbSDimitry Andric     const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
24095ffd83dbSDimitry Andric       AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
24105ffd83dbSDimitry Andric     if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
24110b57cec5SDimitry Andric       return false;
24120b57cec5SDimitry Andric 
24130b57cec5SDimitry Andric     if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
24140b57cec5SDimitry Andric       const unsigned SextOpc = SrcSize == 8 ?
24150b57cec5SDimitry Andric         AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
24160b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg)
24170b57cec5SDimitry Andric         .addReg(SrcReg);
24188bcb0991SDimitry Andric       I.eraseFromParent();
24198bcb0991SDimitry Andric       return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
24200b57cec5SDimitry Andric     }
24210b57cec5SDimitry Andric 
2422bdd1243dSDimitry Andric     // Using a single 32-bit SALU to calculate the high half is smaller than
2423bdd1243dSDimitry Andric     // S_BFE with a literal constant operand.
2424bdd1243dSDimitry Andric     if (DstSize > 32 && SrcSize == 32) {
2425bdd1243dSDimitry Andric       Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2426bdd1243dSDimitry Andric       unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2427bdd1243dSDimitry Andric       if (Signed) {
2428bdd1243dSDimitry Andric         BuildMI(MBB, I, DL, TII.get(AMDGPU::S_ASHR_I32), HiReg)
2429bdd1243dSDimitry Andric           .addReg(SrcReg, 0, SubReg)
24305f757f3fSDimitry Andric           .addImm(31)
24315f757f3fSDimitry Andric           .setOperandDead(3); // Dead scc
2432bdd1243dSDimitry Andric       } else {
2433bdd1243dSDimitry Andric         BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
2434bdd1243dSDimitry Andric           .addImm(0);
2435bdd1243dSDimitry Andric       }
2436bdd1243dSDimitry Andric       BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2437bdd1243dSDimitry Andric         .addReg(SrcReg, 0, SubReg)
2438bdd1243dSDimitry Andric         .addImm(AMDGPU::sub0)
2439bdd1243dSDimitry Andric         .addReg(HiReg)
2440bdd1243dSDimitry Andric         .addImm(AMDGPU::sub1);
2441bdd1243dSDimitry Andric       I.eraseFromParent();
2442bdd1243dSDimitry Andric       return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2443bdd1243dSDimitry Andric                                           *MRI);
2444bdd1243dSDimitry Andric     }
2445bdd1243dSDimitry Andric 
24460b57cec5SDimitry Andric     const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
24470b57cec5SDimitry Andric     const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
24480b57cec5SDimitry Andric 
24490b57cec5SDimitry Andric     // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width.
24505ffd83dbSDimitry Andric     if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
24510b57cec5SDimitry Andric       // We need a 64-bit register source, but the high bits don't matter.
24528bcb0991SDimitry Andric       Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
24538bcb0991SDimitry Andric       Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2454bdd1243dSDimitry Andric       unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
24555ffd83dbSDimitry Andric 
24560b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
24570b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg)
24585ffd83dbSDimitry Andric         .addReg(SrcReg, 0, SubReg)
24590b57cec5SDimitry Andric         .addImm(AMDGPU::sub0)
24600b57cec5SDimitry Andric         .addReg(UndefReg)
24610b57cec5SDimitry Andric         .addImm(AMDGPU::sub1);
24620b57cec5SDimitry Andric 
24630b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII.get(BFE64), DstReg)
24640b57cec5SDimitry Andric         .addReg(ExtReg)
24650b57cec5SDimitry Andric         .addImm(SrcSize << 16);
24660b57cec5SDimitry Andric 
24678bcb0991SDimitry Andric       I.eraseFromParent();
24688bcb0991SDimitry Andric       return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
24690b57cec5SDimitry Andric     }
24700b57cec5SDimitry Andric 
24710b57cec5SDimitry Andric     unsigned Mask;
24720b57cec5SDimitry Andric     if (!Signed && shouldUseAndMask(SrcSize, Mask)) {
24730b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg)
24740b57cec5SDimitry Andric         .addReg(SrcReg)
24755f757f3fSDimitry Andric         .addImm(Mask)
24765f757f3fSDimitry Andric         .setOperandDead(3); // Dead scc
24770b57cec5SDimitry Andric     } else {
24780b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII.get(BFE32), DstReg)
24790b57cec5SDimitry Andric         .addReg(SrcReg)
24800b57cec5SDimitry Andric         .addImm(SrcSize << 16);
24810b57cec5SDimitry Andric     }
24820b57cec5SDimitry Andric 
24838bcb0991SDimitry Andric     I.eraseFromParent();
24848bcb0991SDimitry Andric     return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
24850b57cec5SDimitry Andric   }
24860b57cec5SDimitry Andric 
24870b57cec5SDimitry Andric   return false;
24880b57cec5SDimitry Andric }
24890b57cec5SDimitry Andric 
24905f757f3fSDimitry Andric static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In,
24915f757f3fSDimitry Andric                            Register &Out) {
24925f757f3fSDimitry Andric   Register LShlSrc;
24935f757f3fSDimitry Andric   if (mi_match(In, MRI,
24945f757f3fSDimitry Andric                m_GTrunc(m_GLShr(m_Reg(LShlSrc), m_SpecificICst(16))))) {
24955f757f3fSDimitry Andric     Out = LShlSrc;
24965f757f3fSDimitry Andric     return true;
24975f757f3fSDimitry Andric   }
24985f757f3fSDimitry Andric   return false;
24995f757f3fSDimitry Andric }
25005f757f3fSDimitry Andric 
25015f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const {
25025f757f3fSDimitry Andric   if (!Subtarget->hasSALUFloatInsts())
25035f757f3fSDimitry Andric     return false;
25045f757f3fSDimitry Andric 
25055f757f3fSDimitry Andric   Register Dst = I.getOperand(0).getReg();
25065f757f3fSDimitry Andric   const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
25075f757f3fSDimitry Andric   if (DstRB->getID() != AMDGPU::SGPRRegBankID)
25085f757f3fSDimitry Andric     return false;
25095f757f3fSDimitry Andric 
25105f757f3fSDimitry Andric   Register Src = I.getOperand(1).getReg();
25115f757f3fSDimitry Andric 
25125f757f3fSDimitry Andric   if (MRI->getType(Dst) == LLT::scalar(32) &&
25135f757f3fSDimitry Andric       MRI->getType(Src) == LLT::scalar(16)) {
25145f757f3fSDimitry Andric     if (isExtractHiElt(*MRI, Src, Src)) {
25155f757f3fSDimitry Andric       MachineBasicBlock *BB = I.getParent();
25165f757f3fSDimitry Andric       BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
25175f757f3fSDimitry Andric           .addUse(Src);
25185f757f3fSDimitry Andric       I.eraseFromParent();
25195f757f3fSDimitry Andric       return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
25205f757f3fSDimitry Andric     }
25215f757f3fSDimitry Andric   }
25225f757f3fSDimitry Andric 
25235f757f3fSDimitry Andric   return false;
25245f757f3fSDimitry Andric }
25255f757f3fSDimitry Andric 
25260b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
25270b57cec5SDimitry Andric   MachineBasicBlock *BB = I.getParent();
25280b57cec5SDimitry Andric   MachineOperand &ImmOp = I.getOperand(1);
2529e8d8bef9SDimitry Andric   Register DstReg = I.getOperand(0).getReg();
2530e8d8bef9SDimitry Andric   unsigned Size = MRI->getType(DstReg).getSizeInBits();
25315f757f3fSDimitry Andric   bool IsFP = false;
25320b57cec5SDimitry Andric 
25330b57cec5SDimitry Andric   // The AMDGPU backend only supports Imm operands and not CImm or FPImm.
25340b57cec5SDimitry Andric   if (ImmOp.isFPImm()) {
25350b57cec5SDimitry Andric     const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt();
25360b57cec5SDimitry Andric     ImmOp.ChangeToImmediate(Imm.getZExtValue());
25375f757f3fSDimitry Andric     IsFP = true;
25380b57cec5SDimitry Andric   } else if (ImmOp.isCImm()) {
2539e8d8bef9SDimitry Andric     ImmOp.ChangeToImmediate(ImmOp.getCImm()->getSExtValue());
25400b57cec5SDimitry Andric   } else {
2541e8d8bef9SDimitry Andric     llvm_unreachable("Not supported by g_constants");
25420b57cec5SDimitry Andric   }
25430b57cec5SDimitry Andric 
2544e8d8bef9SDimitry Andric   const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2545e8d8bef9SDimitry Andric   const bool IsSgpr = DstRB->getID() == AMDGPU::SGPRRegBankID;
25460b57cec5SDimitry Andric 
2547e8d8bef9SDimitry Andric   unsigned Opcode;
2548e8d8bef9SDimitry Andric   if (DstRB->getID() == AMDGPU::VCCRegBankID) {
2549e8d8bef9SDimitry Andric     Opcode = STI.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
25505f757f3fSDimitry Andric   } else if (Size == 64 &&
25515f757f3fSDimitry Andric              AMDGPU::isValid32BitLiteral(I.getOperand(1).getImm(), IsFP)) {
25525f757f3fSDimitry Andric     Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO;
25535f757f3fSDimitry Andric     I.setDesc(TII.get(Opcode));
25545f757f3fSDimitry Andric     I.addImplicitDefUseOperands(*MF);
25555f757f3fSDimitry Andric     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2556e8d8bef9SDimitry Andric   } else {
2557e8d8bef9SDimitry Andric     Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2558e8d8bef9SDimitry Andric 
2559e8d8bef9SDimitry Andric     // We should never produce s1 values on banks other than VCC. If the user of
2560e8d8bef9SDimitry Andric     // this already constrained the register, we may incorrectly think it's VCC
2561e8d8bef9SDimitry Andric     // if it wasn't originally.
2562e8d8bef9SDimitry Andric     if (Size == 1)
2563e8d8bef9SDimitry Andric       return false;
2564e8d8bef9SDimitry Andric   }
2565e8d8bef9SDimitry Andric 
2566e8d8bef9SDimitry Andric   if (Size != 64) {
25670b57cec5SDimitry Andric     I.setDesc(TII.get(Opcode));
25680b57cec5SDimitry Andric     I.addImplicitDefUseOperands(*MF);
25690b57cec5SDimitry Andric     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
25700b57cec5SDimitry Andric   }
25710b57cec5SDimitry Andric 
25728bcb0991SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
25738bcb0991SDimitry Andric 
25748bcb0991SDimitry Andric   APInt Imm(Size, I.getOperand(1).getImm());
25758bcb0991SDimitry Andric 
25768bcb0991SDimitry Andric   MachineInstr *ResInst;
25778bcb0991SDimitry Andric   if (IsSgpr && TII.isInlineConstant(Imm)) {
25788bcb0991SDimitry Andric     ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
25798bcb0991SDimitry Andric       .addImm(I.getOperand(1).getImm());
25808bcb0991SDimitry Andric   } else {
25818bcb0991SDimitry Andric     const TargetRegisterClass *RC = IsSgpr ?
25828bcb0991SDimitry Andric       &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass;
25838bcb0991SDimitry Andric     Register LoReg = MRI->createVirtualRegister(RC);
25848bcb0991SDimitry Andric     Register HiReg = MRI->createVirtualRegister(RC);
25850b57cec5SDimitry Andric 
25860b57cec5SDimitry Andric     BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
25870b57cec5SDimitry Andric       .addImm(Imm.trunc(32).getZExtValue());
25880b57cec5SDimitry Andric 
25890b57cec5SDimitry Andric     BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
25900b57cec5SDimitry Andric       .addImm(Imm.ashr(32).getZExtValue());
25910b57cec5SDimitry Andric 
25928bcb0991SDimitry Andric     ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
25930b57cec5SDimitry Andric       .addReg(LoReg)
25940b57cec5SDimitry Andric       .addImm(AMDGPU::sub0)
25950b57cec5SDimitry Andric       .addReg(HiReg)
25960b57cec5SDimitry Andric       .addImm(AMDGPU::sub1);
25978bcb0991SDimitry Andric   }
25980b57cec5SDimitry Andric 
25990b57cec5SDimitry Andric   // We can't call constrainSelectedInstRegOperands here, because it doesn't
26000b57cec5SDimitry Andric   // work for target independent opcodes
26010b57cec5SDimitry Andric   I.eraseFromParent();
26020b57cec5SDimitry Andric   const TargetRegisterClass *DstRC =
26038bcb0991SDimitry Andric     TRI.getConstrainedRegClassForOperand(ResInst->getOperand(0), *MRI);
26040b57cec5SDimitry Andric   if (!DstRC)
26050b57cec5SDimitry Andric     return true;
26068bcb0991SDimitry Andric   return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI);
26070b57cec5SDimitry Andric }
26080b57cec5SDimitry Andric 
26095ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const {
26105ffd83dbSDimitry Andric   // Only manually handle the f64 SGPR case.
26115ffd83dbSDimitry Andric   //
26125ffd83dbSDimitry Andric   // FIXME: This is a workaround for 2.5 different tablegen problems. Because
26135ffd83dbSDimitry Andric   // the bit ops theoretically have a second result due to the implicit def of
26145ffd83dbSDimitry Andric   // SCC, the GlobalISelEmitter is overly conservative and rejects it. Fixing
26155ffd83dbSDimitry Andric   // that is easy by disabling the check. The result works, but uses a
26165ffd83dbSDimitry Andric   // nonsensical sreg32orlds_and_sreg_1 regclass.
26175ffd83dbSDimitry Andric   //
26185ffd83dbSDimitry Andric   // The DAG emitter is more problematic, and incorrectly adds both S_XOR_B32 to
26195ffd83dbSDimitry Andric   // the variadic REG_SEQUENCE operands.
26205ffd83dbSDimitry Andric 
26215ffd83dbSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
26225ffd83dbSDimitry Andric   const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
26235ffd83dbSDimitry Andric   if (DstRB->getID() != AMDGPU::SGPRRegBankID ||
26245ffd83dbSDimitry Andric       MRI->getType(Dst) != LLT::scalar(64))
26255ffd83dbSDimitry Andric     return false;
26265ffd83dbSDimitry Andric 
26275ffd83dbSDimitry Andric   Register Src = MI.getOperand(1).getReg();
26285ffd83dbSDimitry Andric   MachineInstr *Fabs = getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
26295ffd83dbSDimitry Andric   if (Fabs)
26305ffd83dbSDimitry Andric     Src = Fabs->getOperand(1).getReg();
26315ffd83dbSDimitry Andric 
26325ffd83dbSDimitry Andric   if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
26335ffd83dbSDimitry Andric       !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
26345ffd83dbSDimitry Andric     return false;
26355ffd83dbSDimitry Andric 
26365ffd83dbSDimitry Andric   MachineBasicBlock *BB = MI.getParent();
26375ffd83dbSDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
26385ffd83dbSDimitry Andric   Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
26395ffd83dbSDimitry Andric   Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
26405ffd83dbSDimitry Andric   Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
26415ffd83dbSDimitry Andric   Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
26425ffd83dbSDimitry Andric 
26435ffd83dbSDimitry Andric   BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)
26445ffd83dbSDimitry Andric     .addReg(Src, 0, AMDGPU::sub0);
26455ffd83dbSDimitry Andric   BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)
26465ffd83dbSDimitry Andric     .addReg(Src, 0, AMDGPU::sub1);
26475ffd83dbSDimitry Andric   BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
26485ffd83dbSDimitry Andric     .addImm(0x80000000);
26495ffd83dbSDimitry Andric 
26505ffd83dbSDimitry Andric   // Set or toggle sign bit.
26515ffd83dbSDimitry Andric   unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
26525ffd83dbSDimitry Andric   BuildMI(*BB, &MI, DL, TII.get(Opc), OpReg)
26535ffd83dbSDimitry Andric     .addReg(HiReg)
26545f757f3fSDimitry Andric     .addReg(ConstReg)
26555f757f3fSDimitry Andric     .setOperandDead(3); // Dead scc
26565ffd83dbSDimitry Andric   BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
26575ffd83dbSDimitry Andric     .addReg(LoReg)
26585ffd83dbSDimitry Andric     .addImm(AMDGPU::sub0)
26595ffd83dbSDimitry Andric     .addReg(OpReg)
26605ffd83dbSDimitry Andric     .addImm(AMDGPU::sub1);
26615ffd83dbSDimitry Andric   MI.eraseFromParent();
26625ffd83dbSDimitry Andric   return true;
26635ffd83dbSDimitry Andric }
26645ffd83dbSDimitry Andric 
26655ffd83dbSDimitry Andric // FIXME: This is a workaround for the same tablegen problems as G_FNEG
26665ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const {
26675ffd83dbSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
26685ffd83dbSDimitry Andric   const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
26695ffd83dbSDimitry Andric   if (DstRB->getID() != AMDGPU::SGPRRegBankID ||
26705ffd83dbSDimitry Andric       MRI->getType(Dst) != LLT::scalar(64))
26715ffd83dbSDimitry Andric     return false;
26725ffd83dbSDimitry Andric 
26735ffd83dbSDimitry Andric   Register Src = MI.getOperand(1).getReg();
26745ffd83dbSDimitry Andric   MachineBasicBlock *BB = MI.getParent();
26755ffd83dbSDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
26765ffd83dbSDimitry Andric   Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
26775ffd83dbSDimitry Andric   Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
26785ffd83dbSDimitry Andric   Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
26795ffd83dbSDimitry Andric   Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
26805ffd83dbSDimitry Andric 
26815ffd83dbSDimitry Andric   if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
26825ffd83dbSDimitry Andric       !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
26835ffd83dbSDimitry Andric     return false;
26845ffd83dbSDimitry Andric 
26855ffd83dbSDimitry Andric   BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)
26865ffd83dbSDimitry Andric     .addReg(Src, 0, AMDGPU::sub0);
26875ffd83dbSDimitry Andric   BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)
26885ffd83dbSDimitry Andric     .addReg(Src, 0, AMDGPU::sub1);
26895ffd83dbSDimitry Andric   BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
26905ffd83dbSDimitry Andric     .addImm(0x7fffffff);
26915ffd83dbSDimitry Andric 
26925ffd83dbSDimitry Andric   // Clear sign bit.
26935ffd83dbSDimitry Andric   // TODO: Should this used S_BITSET0_*?
26945ffd83dbSDimitry Andric   BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_AND_B32), OpReg)
26955ffd83dbSDimitry Andric     .addReg(HiReg)
26965f757f3fSDimitry Andric     .addReg(ConstReg)
26975f757f3fSDimitry Andric     .setOperandDead(3); // Dead scc
26985ffd83dbSDimitry Andric   BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
26995ffd83dbSDimitry Andric     .addReg(LoReg)
27005ffd83dbSDimitry Andric     .addImm(AMDGPU::sub0)
27015ffd83dbSDimitry Andric     .addReg(OpReg)
27025ffd83dbSDimitry Andric     .addImm(AMDGPU::sub1);
27035ffd83dbSDimitry Andric 
27045ffd83dbSDimitry Andric   MI.eraseFromParent();
27055ffd83dbSDimitry Andric   return true;
27065ffd83dbSDimitry Andric }
27075ffd83dbSDimitry Andric 
27080b57cec5SDimitry Andric static bool isConstant(const MachineInstr &MI) {
27090b57cec5SDimitry Andric   return MI.getOpcode() == TargetOpcode::G_CONSTANT;
27100b57cec5SDimitry Andric }
27110b57cec5SDimitry Andric 
27120b57cec5SDimitry Andric void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
27130b57cec5SDimitry Andric     const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
27140b57cec5SDimitry Andric 
27151db9f3b2SDimitry Andric   unsigned OpNo = Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
27161db9f3b2SDimitry Andric   const MachineInstr *PtrMI =
27171db9f3b2SDimitry Andric       MRI.getUniqueVRegDef(Load.getOperand(OpNo).getReg());
27180b57cec5SDimitry Andric 
27190b57cec5SDimitry Andric   assert(PtrMI);
27200b57cec5SDimitry Andric 
2721480093f4SDimitry Andric   if (PtrMI->getOpcode() != TargetOpcode::G_PTR_ADD)
27220b57cec5SDimitry Andric     return;
27230b57cec5SDimitry Andric 
2724fcaf7f86SDimitry Andric   GEPInfo GEPInfo;
27250b57cec5SDimitry Andric 
27268bcb0991SDimitry Andric   for (unsigned i = 1; i != 3; ++i) {
27270b57cec5SDimitry Andric     const MachineOperand &GEPOp = PtrMI->getOperand(i);
27280b57cec5SDimitry Andric     const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
27290b57cec5SDimitry Andric     assert(OpDef);
27308bcb0991SDimitry Andric     if (i == 2 && isConstant(*OpDef)) {
27318bcb0991SDimitry Andric       // TODO: Could handle constant base + variable offset, but a combine
27328bcb0991SDimitry Andric       // probably should have commuted it.
27330b57cec5SDimitry Andric       assert(GEPInfo.Imm == 0);
27340b57cec5SDimitry Andric       GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
27350b57cec5SDimitry Andric       continue;
27360b57cec5SDimitry Andric     }
27370b57cec5SDimitry Andric     const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
27380b57cec5SDimitry Andric     if (OpBank->getID() == AMDGPU::SGPRRegBankID)
27390b57cec5SDimitry Andric       GEPInfo.SgprParts.push_back(GEPOp.getReg());
27400b57cec5SDimitry Andric     else
27410b57cec5SDimitry Andric       GEPInfo.VgprParts.push_back(GEPOp.getReg());
27420b57cec5SDimitry Andric   }
27430b57cec5SDimitry Andric 
27440b57cec5SDimitry Andric   AddrInfo.push_back(GEPInfo);
27450b57cec5SDimitry Andric   getAddrModeInfo(*PtrMI, MRI, AddrInfo);
27460b57cec5SDimitry Andric }
27470b57cec5SDimitry Andric 
2748e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::isSGPR(Register Reg) const {
2749e8d8bef9SDimitry Andric   return RBI.getRegBank(Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
2750e8d8bef9SDimitry Andric }
2751e8d8bef9SDimitry Andric 
27520b57cec5SDimitry Andric bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const {
27530b57cec5SDimitry Andric   if (!MI.hasOneMemOperand())
27540b57cec5SDimitry Andric     return false;
27550b57cec5SDimitry Andric 
27560b57cec5SDimitry Andric   const MachineMemOperand *MMO = *MI.memoperands_begin();
27570b57cec5SDimitry Andric   const Value *Ptr = MMO->getValue();
27580b57cec5SDimitry Andric 
27590b57cec5SDimitry Andric   // UndefValue means this is a load of a kernel input.  These are uniform.
27600b57cec5SDimitry Andric   // Sometimes LDS instructions have constant pointers.
27610b57cec5SDimitry Andric   // If Ptr is null, then that means this mem operand contains a
27620b57cec5SDimitry Andric   // PseudoSourceValue like GOT.
27630b57cec5SDimitry Andric   if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
27640b57cec5SDimitry Andric       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
27650b57cec5SDimitry Andric     return true;
27660b57cec5SDimitry Andric 
27670b57cec5SDimitry Andric   if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
27680b57cec5SDimitry Andric     return true;
27690b57cec5SDimitry Andric 
27701db9f3b2SDimitry Andric   if (MI.getOpcode() == AMDGPU::G_PREFETCH)
27711db9f3b2SDimitry Andric     return RBI.getRegBank(MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
27721db9f3b2SDimitry Andric            AMDGPU::SGPRRegBankID;
27731db9f3b2SDimitry Andric 
27740b57cec5SDimitry Andric   const Instruction *I = dyn_cast<Instruction>(Ptr);
27750b57cec5SDimitry Andric   return I && I->getMetadata("amdgpu.uniform");
27760b57cec5SDimitry Andric }
27770b57cec5SDimitry Andric 
27780b57cec5SDimitry Andric bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
27790b57cec5SDimitry Andric   for (const GEPInfo &GEPInfo : AddrInfo) {
27800b57cec5SDimitry Andric     if (!GEPInfo.VgprParts.empty())
27810b57cec5SDimitry Andric       return true;
27820b57cec5SDimitry Andric   }
27830b57cec5SDimitry Andric   return false;
27840b57cec5SDimitry Andric }
27850b57cec5SDimitry Andric 
27868bcb0991SDimitry Andric void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
27878bcb0991SDimitry Andric   const LLT PtrTy = MRI->getType(I.getOperand(1).getReg());
27888bcb0991SDimitry Andric   unsigned AS = PtrTy.getAddressSpace();
27898bcb0991SDimitry Andric   if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) &&
27908bcb0991SDimitry Andric       STI.ldsRequiresM0Init()) {
2791e8d8bef9SDimitry Andric     MachineBasicBlock *BB = I.getParent();
2792e8d8bef9SDimitry Andric 
2793349cc55cSDimitry Andric     // If DS instructions require M0 initialization, insert it before selecting.
27948bcb0991SDimitry Andric     BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
27958bcb0991SDimitry Andric       .addImm(-1);
27968bcb0991SDimitry Andric   }
27978bcb0991SDimitry Andric }
27988bcb0991SDimitry Andric 
2799e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2800e8d8bef9SDimitry Andric   MachineInstr &I) const {
28018bcb0991SDimitry Andric   initM0(I);
28028bcb0991SDimitry Andric   return selectImpl(I, *CoverageInfo);
28030b57cec5SDimitry Andric }
28040b57cec5SDimitry Andric 
2805349cc55cSDimitry Andric static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) {
2806349cc55cSDimitry Andric   if (Reg.isPhysical())
2807349cc55cSDimitry Andric     return false;
2808349cc55cSDimitry Andric 
2809349cc55cSDimitry Andric   MachineInstr &MI = *MRI.getUniqueVRegDef(Reg);
2810349cc55cSDimitry Andric   const unsigned Opcode = MI.getOpcode();
2811349cc55cSDimitry Andric 
2812349cc55cSDimitry Andric   if (Opcode == AMDGPU::COPY)
2813349cc55cSDimitry Andric     return isVCmpResult(MI.getOperand(1).getReg(), MRI);
2814349cc55cSDimitry Andric 
2815349cc55cSDimitry Andric   if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2816349cc55cSDimitry Andric       Opcode == AMDGPU::G_XOR)
2817349cc55cSDimitry Andric     return isVCmpResult(MI.getOperand(1).getReg(), MRI) &&
2818349cc55cSDimitry Andric            isVCmpResult(MI.getOperand(2).getReg(), MRI);
2819349cc55cSDimitry Andric 
28205f757f3fSDimitry Andric   if (auto *GI = dyn_cast<GIntrinsic>(&MI))
28215f757f3fSDimitry Andric     return GI->is(Intrinsic::amdgcn_class);
2822349cc55cSDimitry Andric 
2823349cc55cSDimitry Andric   return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2824349cc55cSDimitry Andric }
2825349cc55cSDimitry Andric 
28260b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
28270b57cec5SDimitry Andric   MachineBasicBlock *BB = I.getParent();
28280b57cec5SDimitry Andric   MachineOperand &CondOp = I.getOperand(0);
28290b57cec5SDimitry Andric   Register CondReg = CondOp.getReg();
28300b57cec5SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
28310b57cec5SDimitry Andric 
28320b57cec5SDimitry Andric   unsigned BrOpcode;
28330b57cec5SDimitry Andric   Register CondPhysReg;
28340b57cec5SDimitry Andric   const TargetRegisterClass *ConstrainRC;
28350b57cec5SDimitry Andric 
28360b57cec5SDimitry Andric   // In SelectionDAG, we inspect the IR block for uniformity metadata to decide
28370b57cec5SDimitry Andric   // whether the branch is uniform when selecting the instruction. In
28380b57cec5SDimitry Andric   // GlobalISel, we should push that decision into RegBankSelect. Assume for now
28390b57cec5SDimitry Andric   // RegBankSelect knows what it's doing if the branch condition is scc, even
28400b57cec5SDimitry Andric   // though it currently does not.
2841480093f4SDimitry Andric   if (!isVCC(CondReg, *MRI)) {
2842480093f4SDimitry Andric     if (MRI->getType(CondReg) != LLT::scalar(32))
2843480093f4SDimitry Andric       return false;
2844480093f4SDimitry Andric 
28450b57cec5SDimitry Andric     CondPhysReg = AMDGPU::SCC;
28460b57cec5SDimitry Andric     BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2847e8d8bef9SDimitry Andric     ConstrainRC = &AMDGPU::SReg_32RegClass;
2848480093f4SDimitry Andric   } else {
28490b57cec5SDimitry Andric     // FIXME: Should scc->vcc copies and with exec?
2850349cc55cSDimitry Andric 
2851349cc55cSDimitry Andric     // Unless the value of CondReg is a result of a V_CMP* instruction then we
2852349cc55cSDimitry Andric     // need to insert an and with exec.
2853349cc55cSDimitry Andric     if (!isVCmpResult(CondReg, *MRI)) {
2854349cc55cSDimitry Andric       const bool Is64 = STI.isWave64();
2855349cc55cSDimitry Andric       const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2856349cc55cSDimitry Andric       const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2857349cc55cSDimitry Andric 
2858349cc55cSDimitry Andric       Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
2859349cc55cSDimitry Andric       BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg)
2860349cc55cSDimitry Andric           .addReg(CondReg)
28615f757f3fSDimitry Andric           .addReg(Exec)
28625f757f3fSDimitry Andric           .setOperandDead(3); // Dead scc
2863349cc55cSDimitry Andric       CondReg = TmpReg;
2864349cc55cSDimitry Andric     }
2865349cc55cSDimitry Andric 
28660b57cec5SDimitry Andric     CondPhysReg = TRI.getVCC();
28670b57cec5SDimitry Andric     BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
28680b57cec5SDimitry Andric     ConstrainRC = TRI.getBoolRC();
2869480093f4SDimitry Andric   }
28700b57cec5SDimitry Andric 
28718bcb0991SDimitry Andric   if (!MRI->getRegClassOrNull(CondReg))
28728bcb0991SDimitry Andric     MRI->setRegClass(CondReg, ConstrainRC);
28730b57cec5SDimitry Andric 
28740b57cec5SDimitry Andric   BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg)
28750b57cec5SDimitry Andric     .addReg(CondReg);
28760b57cec5SDimitry Andric   BuildMI(*BB, &I, DL, TII.get(BrOpcode))
28770b57cec5SDimitry Andric     .addMBB(I.getOperand(1).getMBB());
28780b57cec5SDimitry Andric 
28790b57cec5SDimitry Andric   I.eraseFromParent();
28800b57cec5SDimitry Andric   return true;
28810b57cec5SDimitry Andric }
28820b57cec5SDimitry Andric 
2883e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
28845ffd83dbSDimitry Andric   MachineInstr &I) const {
28850b57cec5SDimitry Andric   Register DstReg = I.getOperand(0).getReg();
28868bcb0991SDimitry Andric   const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
28870b57cec5SDimitry Andric   const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
28880b57cec5SDimitry Andric   I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
28890b57cec5SDimitry Andric   if (IsVGPR)
28900b57cec5SDimitry Andric     I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
28910b57cec5SDimitry Andric 
28920b57cec5SDimitry Andric   return RBI.constrainGenericRegister(
28938bcb0991SDimitry Andric     DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
28940b57cec5SDimitry Andric }
28950b57cec5SDimitry Andric 
28965ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
28978bcb0991SDimitry Andric   Register DstReg = I.getOperand(0).getReg();
28988bcb0991SDimitry Andric   Register SrcReg = I.getOperand(1).getReg();
28995ffd83dbSDimitry Andric   Register MaskReg = I.getOperand(2).getReg();
29005ffd83dbSDimitry Andric   LLT Ty = MRI->getType(DstReg);
29015ffd83dbSDimitry Andric   LLT MaskTy = MRI->getType(MaskReg);
290204eeddc0SDimitry Andric   MachineBasicBlock *BB = I.getParent();
290304eeddc0SDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
29048bcb0991SDimitry Andric 
29058bcb0991SDimitry Andric   const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
29068bcb0991SDimitry Andric   const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
29075ffd83dbSDimitry Andric   const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
29088bcb0991SDimitry Andric   const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID;
29095ffd83dbSDimitry Andric   if (DstRB != SrcRB) // Should only happen for hand written MIR.
29105ffd83dbSDimitry Andric     return false;
29115ffd83dbSDimitry Andric 
291204eeddc0SDimitry Andric   // Try to avoid emitting a bit operation when we only need to touch half of
291304eeddc0SDimitry Andric   // the 64-bit pointer.
291406c3fb27SDimitry Andric   APInt MaskOnes = KB->getKnownOnes(MaskReg).zext(64);
291504eeddc0SDimitry Andric   const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
291604eeddc0SDimitry Andric   const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
291704eeddc0SDimitry Andric 
291804eeddc0SDimitry Andric   const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
291904eeddc0SDimitry Andric   const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
292004eeddc0SDimitry Andric 
292104eeddc0SDimitry Andric   if (!IsVGPR && Ty.getSizeInBits() == 64 &&
292204eeddc0SDimitry Andric       !CanCopyLow32 && !CanCopyHi32) {
292304eeddc0SDimitry Andric     auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg)
292404eeddc0SDimitry Andric       .addReg(SrcReg)
29255f757f3fSDimitry Andric       .addReg(MaskReg)
29265f757f3fSDimitry Andric       .setOperandDead(3); // Dead scc
292704eeddc0SDimitry Andric     I.eraseFromParent();
292804eeddc0SDimitry Andric     return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
292904eeddc0SDimitry Andric   }
293004eeddc0SDimitry Andric 
29318bcb0991SDimitry Andric   unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
29328bcb0991SDimitry Andric   const TargetRegisterClass &RegRC
29338bcb0991SDimitry Andric     = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
29348bcb0991SDimitry Andric 
293581ad6265SDimitry Andric   const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
293681ad6265SDimitry Andric   const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
29375ffd83dbSDimitry Andric   const TargetRegisterClass *MaskRC =
293881ad6265SDimitry Andric       TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
29395ffd83dbSDimitry Andric 
29408bcb0991SDimitry Andric   if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
29415ffd83dbSDimitry Andric       !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
29425ffd83dbSDimitry Andric       !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
29438bcb0991SDimitry Andric     return false;
29448bcb0991SDimitry Andric 
29458bcb0991SDimitry Andric   if (Ty.getSizeInBits() == 32) {
29465ffd83dbSDimitry Andric     assert(MaskTy.getSizeInBits() == 32 &&
29475ffd83dbSDimitry Andric            "ptrmask should have been narrowed during legalize");
29485ffd83dbSDimitry Andric 
29495f757f3fSDimitry Andric     auto NewOp = BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg)
29508bcb0991SDimitry Andric       .addReg(SrcReg)
29515ffd83dbSDimitry Andric       .addReg(MaskReg);
29525f757f3fSDimitry Andric 
29535f757f3fSDimitry Andric     if (!IsVGPR)
29545f757f3fSDimitry Andric       NewOp.setOperandDead(3); // Dead scc
29558bcb0991SDimitry Andric     I.eraseFromParent();
29568bcb0991SDimitry Andric     return true;
29578bcb0991SDimitry Andric   }
29588bcb0991SDimitry Andric 
29598bcb0991SDimitry Andric   Register HiReg = MRI->createVirtualRegister(&RegRC);
29608bcb0991SDimitry Andric   Register LoReg = MRI->createVirtualRegister(&RegRC);
29618bcb0991SDimitry Andric 
29625ffd83dbSDimitry Andric   // Extract the subregisters from the source pointer.
29638bcb0991SDimitry Andric   BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg)
29648bcb0991SDimitry Andric     .addReg(SrcReg, 0, AMDGPU::sub0);
29658bcb0991SDimitry Andric   BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg)
29668bcb0991SDimitry Andric     .addReg(SrcReg, 0, AMDGPU::sub1);
29678bcb0991SDimitry Andric 
29685ffd83dbSDimitry Andric   Register MaskedLo, MaskedHi;
29695ffd83dbSDimitry Andric 
297004eeddc0SDimitry Andric   if (CanCopyLow32) {
29715ffd83dbSDimitry Andric     // If all the bits in the low half are 1, we only need a copy for it.
29725ffd83dbSDimitry Andric     MaskedLo = LoReg;
29735ffd83dbSDimitry Andric   } else {
29745ffd83dbSDimitry Andric     // Extract the mask subregister and apply the and.
29755ffd83dbSDimitry Andric     Register MaskLo = MRI->createVirtualRegister(&RegRC);
29765ffd83dbSDimitry Andric     MaskedLo = MRI->createVirtualRegister(&RegRC);
29775ffd83dbSDimitry Andric 
29785ffd83dbSDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskLo)
29795ffd83dbSDimitry Andric       .addReg(MaskReg, 0, AMDGPU::sub0);
29805ffd83dbSDimitry Andric     BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedLo)
29818bcb0991SDimitry Andric       .addReg(LoReg)
29825ffd83dbSDimitry Andric       .addReg(MaskLo);
29835ffd83dbSDimitry Andric   }
29845ffd83dbSDimitry Andric 
298504eeddc0SDimitry Andric   if (CanCopyHi32) {
29865ffd83dbSDimitry Andric     // If all the bits in the high half are 1, we only need a copy for it.
29875ffd83dbSDimitry Andric     MaskedHi = HiReg;
29885ffd83dbSDimitry Andric   } else {
29895ffd83dbSDimitry Andric     Register MaskHi = MRI->createVirtualRegister(&RegRC);
29905ffd83dbSDimitry Andric     MaskedHi = MRI->createVirtualRegister(&RegRC);
29915ffd83dbSDimitry Andric 
29925ffd83dbSDimitry Andric     BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskHi)
29935ffd83dbSDimitry Andric       .addReg(MaskReg, 0, AMDGPU::sub1);
29945ffd83dbSDimitry Andric     BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedHi)
29958bcb0991SDimitry Andric       .addReg(HiReg)
29965ffd83dbSDimitry Andric       .addReg(MaskHi);
29975ffd83dbSDimitry Andric   }
29985ffd83dbSDimitry Andric 
29995ffd83dbSDimitry Andric   BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
30005ffd83dbSDimitry Andric     .addReg(MaskedLo)
30015ffd83dbSDimitry Andric     .addImm(AMDGPU::sub0)
30025ffd83dbSDimitry Andric     .addReg(MaskedHi)
30038bcb0991SDimitry Andric     .addImm(AMDGPU::sub1);
30048bcb0991SDimitry Andric   I.eraseFromParent();
30058bcb0991SDimitry Andric   return true;
30068bcb0991SDimitry Andric }
30078bcb0991SDimitry Andric 
30085ffd83dbSDimitry Andric /// Return the register to use for the index value, and the subregister to use
30095ffd83dbSDimitry Andric /// for the indirectly accessed register.
30105ffd83dbSDimitry Andric static std::pair<Register, unsigned>
3011bdd1243dSDimitry Andric computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI,
3012bdd1243dSDimitry Andric                         const TargetRegisterClass *SuperRC, Register IdxReg,
3013bdd1243dSDimitry Andric                         unsigned EltSize, GISelKnownBits &KnownBits) {
30145ffd83dbSDimitry Andric   Register IdxBaseReg;
30155ffd83dbSDimitry Andric   int Offset;
30165ffd83dbSDimitry Andric 
3017bdd1243dSDimitry Andric   std::tie(IdxBaseReg, Offset) =
3018bdd1243dSDimitry Andric       AMDGPU::getBaseWithConstantOffset(MRI, IdxReg, &KnownBits);
30195ffd83dbSDimitry Andric   if (IdxBaseReg == AMDGPU::NoRegister) {
30205ffd83dbSDimitry Andric     // This will happen if the index is a known constant. This should ordinarily
30215ffd83dbSDimitry Andric     // be legalized out, but handle it as a register just in case.
30225ffd83dbSDimitry Andric     assert(Offset == 0);
30235ffd83dbSDimitry Andric     IdxBaseReg = IdxReg;
30245ffd83dbSDimitry Andric   }
30255ffd83dbSDimitry Andric 
30265ffd83dbSDimitry Andric   ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SuperRC, EltSize);
30275ffd83dbSDimitry Andric 
30285ffd83dbSDimitry Andric   // Skip out of bounds offsets, or else we would end up using an undefined
30295ffd83dbSDimitry Andric   // register.
30305ffd83dbSDimitry Andric   if (static_cast<unsigned>(Offset) >= SubRegs.size())
3031bdd1243dSDimitry Andric     return std::pair(IdxReg, SubRegs[0]);
3032bdd1243dSDimitry Andric   return std::pair(IdxBaseReg, SubRegs[Offset]);
30335ffd83dbSDimitry Andric }
30345ffd83dbSDimitry Andric 
3035480093f4SDimitry Andric bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3036480093f4SDimitry Andric   MachineInstr &MI) const {
3037480093f4SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
3038480093f4SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
3039480093f4SDimitry Andric   Register IdxReg = MI.getOperand(2).getReg();
3040480093f4SDimitry Andric 
3041480093f4SDimitry Andric   LLT DstTy = MRI->getType(DstReg);
3042480093f4SDimitry Andric   LLT SrcTy = MRI->getType(SrcReg);
3043480093f4SDimitry Andric 
3044480093f4SDimitry Andric   const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3045480093f4SDimitry Andric   const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3046480093f4SDimitry Andric   const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3047480093f4SDimitry Andric 
3048480093f4SDimitry Andric   // The index must be scalar. If it wasn't RegBankSelect should have moved this
3049480093f4SDimitry Andric   // into a waterfall loop.
3050480093f4SDimitry Andric   if (IdxRB->getID() != AMDGPU::SGPRRegBankID)
3051480093f4SDimitry Andric     return false;
3052480093f4SDimitry Andric 
305381ad6265SDimitry Andric   const TargetRegisterClass *SrcRC =
305481ad6265SDimitry Andric       TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
305581ad6265SDimitry Andric   const TargetRegisterClass *DstRC =
305681ad6265SDimitry Andric       TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
30575ffd83dbSDimitry Andric   if (!SrcRC || !DstRC)
30585ffd83dbSDimitry Andric     return false;
3059480093f4SDimitry Andric   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3060480093f4SDimitry Andric       !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3061480093f4SDimitry Andric       !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3062480093f4SDimitry Andric     return false;
3063480093f4SDimitry Andric 
3064480093f4SDimitry Andric   MachineBasicBlock *BB = MI.getParent();
3065480093f4SDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
3066480093f4SDimitry Andric   const bool Is64 = DstTy.getSizeInBits() == 64;
3067480093f4SDimitry Andric 
30685ffd83dbSDimitry Andric   unsigned SubReg;
3069bdd1243dSDimitry Andric   std::tie(IdxReg, SubReg) = computeIndirectRegIndex(
307006c3fb27SDimitry Andric       *MRI, TRI, SrcRC, IdxReg, DstTy.getSizeInBits() / 8, *KB);
3071480093f4SDimitry Andric 
3072480093f4SDimitry Andric   if (SrcRB->getID() == AMDGPU::SGPRRegBankID) {
3073480093f4SDimitry Andric     if (DstTy.getSizeInBits() != 32 && !Is64)
3074480093f4SDimitry Andric       return false;
3075480093f4SDimitry Andric 
3076480093f4SDimitry Andric     BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3077480093f4SDimitry Andric       .addReg(IdxReg);
3078480093f4SDimitry Andric 
3079480093f4SDimitry Andric     unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3080480093f4SDimitry Andric     BuildMI(*BB, &MI, DL, TII.get(Opc), DstReg)
3081480093f4SDimitry Andric       .addReg(SrcReg, 0, SubReg)
3082480093f4SDimitry Andric       .addReg(SrcReg, RegState::Implicit);
3083480093f4SDimitry Andric     MI.eraseFromParent();
3084480093f4SDimitry Andric     return true;
3085480093f4SDimitry Andric   }
3086480093f4SDimitry Andric 
3087480093f4SDimitry Andric   if (SrcRB->getID() != AMDGPU::VGPRRegBankID || DstTy.getSizeInBits() != 32)
3088480093f4SDimitry Andric     return false;
3089480093f4SDimitry Andric 
3090480093f4SDimitry Andric   if (!STI.useVGPRIndexMode()) {
3091480093f4SDimitry Andric     BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3092480093f4SDimitry Andric       .addReg(IdxReg);
3093480093f4SDimitry Andric     BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3094e8d8bef9SDimitry Andric       .addReg(SrcReg, 0, SubReg)
3095480093f4SDimitry Andric       .addReg(SrcReg, RegState::Implicit);
3096480093f4SDimitry Andric     MI.eraseFromParent();
3097480093f4SDimitry Andric     return true;
3098480093f4SDimitry Andric   }
3099480093f4SDimitry Andric 
3100e8d8bef9SDimitry Andric   const MCInstrDesc &GPRIDXDesc =
3101e8d8bef9SDimitry Andric       TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC), true);
3102e8d8bef9SDimitry Andric   BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg)
3103e8d8bef9SDimitry Andric       .addReg(SrcReg)
3104480093f4SDimitry Andric       .addReg(IdxReg)
3105e8d8bef9SDimitry Andric       .addImm(SubReg);
3106480093f4SDimitry Andric 
3107480093f4SDimitry Andric   MI.eraseFromParent();
3108480093f4SDimitry Andric   return true;
3109480093f4SDimitry Andric }
3110480093f4SDimitry Andric 
31115ffd83dbSDimitry Andric // TODO: Fold insert_vector_elt (extract_vector_elt) into movrelsd
31125ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
31135ffd83dbSDimitry Andric   MachineInstr &MI) const {
31145ffd83dbSDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
31155ffd83dbSDimitry Andric   Register VecReg = MI.getOperand(1).getReg();
31165ffd83dbSDimitry Andric   Register ValReg = MI.getOperand(2).getReg();
31175ffd83dbSDimitry Andric   Register IdxReg = MI.getOperand(3).getReg();
31185ffd83dbSDimitry Andric 
31195ffd83dbSDimitry Andric   LLT VecTy = MRI->getType(DstReg);
31205ffd83dbSDimitry Andric   LLT ValTy = MRI->getType(ValReg);
31215ffd83dbSDimitry Andric   unsigned VecSize = VecTy.getSizeInBits();
31225ffd83dbSDimitry Andric   unsigned ValSize = ValTy.getSizeInBits();
31235ffd83dbSDimitry Andric 
31245ffd83dbSDimitry Andric   const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
31255ffd83dbSDimitry Andric   const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
31265ffd83dbSDimitry Andric   const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
31275ffd83dbSDimitry Andric 
31285ffd83dbSDimitry Andric   assert(VecTy.getElementType() == ValTy);
31295ffd83dbSDimitry Andric 
31305ffd83dbSDimitry Andric   // The index must be scalar. If it wasn't RegBankSelect should have moved this
31315ffd83dbSDimitry Andric   // into a waterfall loop.
31325ffd83dbSDimitry Andric   if (IdxRB->getID() != AMDGPU::SGPRRegBankID)
31335ffd83dbSDimitry Andric     return false;
31345ffd83dbSDimitry Andric 
313581ad6265SDimitry Andric   const TargetRegisterClass *VecRC =
313681ad6265SDimitry Andric       TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
313781ad6265SDimitry Andric   const TargetRegisterClass *ValRC =
313881ad6265SDimitry Andric       TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
31395ffd83dbSDimitry Andric 
31405ffd83dbSDimitry Andric   if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
31415ffd83dbSDimitry Andric       !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
31425ffd83dbSDimitry Andric       !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
31435ffd83dbSDimitry Andric       !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
31445ffd83dbSDimitry Andric     return false;
31455ffd83dbSDimitry Andric 
31465ffd83dbSDimitry Andric   if (VecRB->getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
31475ffd83dbSDimitry Andric     return false;
31485ffd83dbSDimitry Andric 
31495ffd83dbSDimitry Andric   unsigned SubReg;
315006c3fb27SDimitry Andric   std::tie(IdxReg, SubReg) =
315106c3fb27SDimitry Andric       computeIndirectRegIndex(*MRI, TRI, VecRC, IdxReg, ValSize / 8, *KB);
31525ffd83dbSDimitry Andric 
31535ffd83dbSDimitry Andric   const bool IndexMode = VecRB->getID() == AMDGPU::VGPRRegBankID &&
31545ffd83dbSDimitry Andric                          STI.useVGPRIndexMode();
31555ffd83dbSDimitry Andric 
31565ffd83dbSDimitry Andric   MachineBasicBlock *BB = MI.getParent();
31575ffd83dbSDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
31585ffd83dbSDimitry Andric 
3159e8d8bef9SDimitry Andric   if (!IndexMode) {
31605ffd83dbSDimitry Andric     BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
31615ffd83dbSDimitry Andric       .addReg(IdxReg);
31625ffd83dbSDimitry Andric 
3163e8d8bef9SDimitry Andric     const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3164e8d8bef9SDimitry Andric         VecSize, ValSize, VecRB->getID() == AMDGPU::SGPRRegBankID);
31655ffd83dbSDimitry Andric     BuildMI(*BB, MI, DL, RegWriteOp, DstReg)
31665ffd83dbSDimitry Andric         .addReg(VecReg)
31675ffd83dbSDimitry Andric         .addReg(ValReg)
31685ffd83dbSDimitry Andric         .addImm(SubReg);
3169e8d8bef9SDimitry Andric     MI.eraseFromParent();
3170e8d8bef9SDimitry Andric     return true;
3171e8d8bef9SDimitry Andric   }
31725ffd83dbSDimitry Andric 
3173e8d8bef9SDimitry Andric   const MCInstrDesc &GPRIDXDesc =
3174e8d8bef9SDimitry Andric       TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false);
3175e8d8bef9SDimitry Andric   BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg)
3176e8d8bef9SDimitry Andric       .addReg(VecReg)
3177e8d8bef9SDimitry Andric       .addReg(ValReg)
3178e8d8bef9SDimitry Andric       .addReg(IdxReg)
3179e8d8bef9SDimitry Andric       .addImm(SubReg);
31805ffd83dbSDimitry Andric 
31815ffd83dbSDimitry Andric   MI.eraseFromParent();
31825ffd83dbSDimitry Andric   return true;
31835ffd83dbSDimitry Andric }
31845ffd83dbSDimitry Andric 
318581ad6265SDimitry Andric bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const {
31865f757f3fSDimitry Andric   assert(!AMDGPU::isGFX12Plus(STI));
318781ad6265SDimitry Andric   unsigned Opc;
318881ad6265SDimitry Andric   unsigned Size = MI.getOperand(3).getImm();
318981ad6265SDimitry Andric 
319081ad6265SDimitry Andric   // The struct intrinsic variants add one additional operand over raw.
319181ad6265SDimitry Andric   const bool HasVIndex = MI.getNumOperands() == 9;
319281ad6265SDimitry Andric   Register VIndex;
319381ad6265SDimitry Andric   int OpOffset = 0;
319481ad6265SDimitry Andric   if (HasVIndex) {
319581ad6265SDimitry Andric     VIndex = MI.getOperand(4).getReg();
319681ad6265SDimitry Andric     OpOffset = 1;
319781ad6265SDimitry Andric   }
319881ad6265SDimitry Andric 
319981ad6265SDimitry Andric   Register VOffset = MI.getOperand(4 + OpOffset).getReg();
3200bdd1243dSDimitry Andric   std::optional<ValueAndVReg> MaybeVOffset =
320181ad6265SDimitry Andric       getIConstantVRegValWithLookThrough(VOffset, *MRI);
320281ad6265SDimitry Andric   const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
320381ad6265SDimitry Andric 
320481ad6265SDimitry Andric   switch (Size) {
320581ad6265SDimitry Andric   default:
320681ad6265SDimitry Andric     return false;
320781ad6265SDimitry Andric   case 1:
320881ad6265SDimitry Andric     Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
320981ad6265SDimitry Andric                                  : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
321081ad6265SDimitry Andric                     : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
321181ad6265SDimitry Andric                                  : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
321281ad6265SDimitry Andric     break;
321381ad6265SDimitry Andric   case 2:
321481ad6265SDimitry Andric     Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
321581ad6265SDimitry Andric                                  : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
321681ad6265SDimitry Andric                     : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
321781ad6265SDimitry Andric                                  : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
321881ad6265SDimitry Andric     break;
321981ad6265SDimitry Andric   case 4:
322081ad6265SDimitry Andric     Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
322181ad6265SDimitry Andric                                  : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
322281ad6265SDimitry Andric                     : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
322381ad6265SDimitry Andric                                  : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
322481ad6265SDimitry Andric     break;
322581ad6265SDimitry Andric   }
322681ad6265SDimitry Andric 
322781ad6265SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
322881ad6265SDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
322981ad6265SDimitry Andric   BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
323081ad6265SDimitry Andric     .add(MI.getOperand(2));
323181ad6265SDimitry Andric 
323281ad6265SDimitry Andric   auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc));
323381ad6265SDimitry Andric 
323481ad6265SDimitry Andric   if (HasVIndex && HasVOffset) {
323581ad6265SDimitry Andric     Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
323681ad6265SDimitry Andric     BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
323781ad6265SDimitry Andric       .addReg(VIndex)
323881ad6265SDimitry Andric       .addImm(AMDGPU::sub0)
323981ad6265SDimitry Andric       .addReg(VOffset)
324081ad6265SDimitry Andric       .addImm(AMDGPU::sub1);
324181ad6265SDimitry Andric 
324281ad6265SDimitry Andric     MIB.addReg(IdxReg);
324381ad6265SDimitry Andric   } else if (HasVIndex) {
324481ad6265SDimitry Andric     MIB.addReg(VIndex);
324581ad6265SDimitry Andric   } else if (HasVOffset) {
324681ad6265SDimitry Andric     MIB.addReg(VOffset);
324781ad6265SDimitry Andric   }
324881ad6265SDimitry Andric 
324981ad6265SDimitry Andric   MIB.add(MI.getOperand(1));            // rsrc
325081ad6265SDimitry Andric   MIB.add(MI.getOperand(5 + OpOffset)); // soffset
325181ad6265SDimitry Andric   MIB.add(MI.getOperand(6 + OpOffset)); // imm offset
325281ad6265SDimitry Andric   unsigned Aux = MI.getOperand(7 + OpOffset).getImm();
325381ad6265SDimitry Andric   MIB.addImm(Aux & AMDGPU::CPol::ALL);                  // cpol
32545f757f3fSDimitry Andric   MIB.addImm(Aux & AMDGPU::CPol::SWZ_pregfx12 ? 1 : 0); // swz
325581ad6265SDimitry Andric 
325681ad6265SDimitry Andric   MachineMemOperand *LoadMMO = *MI.memoperands_begin();
325781ad6265SDimitry Andric   MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
325881ad6265SDimitry Andric   LoadPtrI.Offset = MI.getOperand(6 + OpOffset).getImm();
325981ad6265SDimitry Andric   MachinePointerInfo StorePtrI = LoadPtrI;
326081ad6265SDimitry Andric   StorePtrI.V = nullptr;
326181ad6265SDimitry Andric   StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
326281ad6265SDimitry Andric 
326381ad6265SDimitry Andric   auto F = LoadMMO->getFlags() &
326481ad6265SDimitry Andric            ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
326581ad6265SDimitry Andric   LoadMMO = MF->getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad,
326681ad6265SDimitry Andric                                      Size, LoadMMO->getBaseAlign());
326781ad6265SDimitry Andric 
326881ad6265SDimitry Andric   MachineMemOperand *StoreMMO =
326981ad6265SDimitry Andric       MF->getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore,
327081ad6265SDimitry Andric                                sizeof(int32_t), LoadMMO->getBaseAlign());
327181ad6265SDimitry Andric 
327281ad6265SDimitry Andric   MIB.setMemRefs({LoadMMO, StoreMMO});
327381ad6265SDimitry Andric 
327481ad6265SDimitry Andric   MI.eraseFromParent();
327581ad6265SDimitry Andric   return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
327681ad6265SDimitry Andric }
327781ad6265SDimitry Andric 
327881ad6265SDimitry Andric /// Match a zero extend from a 32-bit value to 64-bits.
327981ad6265SDimitry Andric static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) {
328081ad6265SDimitry Andric   Register ZExtSrc;
328181ad6265SDimitry Andric   if (mi_match(Reg, MRI, m_GZExt(m_Reg(ZExtSrc))))
328281ad6265SDimitry Andric     return MRI.getType(ZExtSrc) == LLT::scalar(32) ? ZExtSrc : Register();
328381ad6265SDimitry Andric 
328481ad6265SDimitry Andric   // Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0)
328581ad6265SDimitry Andric   const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
328681ad6265SDimitry Andric   if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3287753f127fSDimitry Andric     return Register();
328881ad6265SDimitry Andric 
3289fcaf7f86SDimitry Andric   assert(Def->getNumOperands() == 3 &&
3290fcaf7f86SDimitry Andric          MRI.getType(Def->getOperand(0).getReg()) == LLT::scalar(64));
329181ad6265SDimitry Andric   if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) {
329281ad6265SDimitry Andric     return Def->getOperand(1).getReg();
329381ad6265SDimitry Andric   }
329481ad6265SDimitry Andric 
329581ad6265SDimitry Andric   return Register();
329681ad6265SDimitry Andric }
329781ad6265SDimitry Andric 
329881ad6265SDimitry Andric bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{
329981ad6265SDimitry Andric   unsigned Opc;
330081ad6265SDimitry Andric   unsigned Size = MI.getOperand(3).getImm();
330181ad6265SDimitry Andric 
330281ad6265SDimitry Andric   switch (Size) {
330381ad6265SDimitry Andric   default:
330481ad6265SDimitry Andric     return false;
330581ad6265SDimitry Andric   case 1:
330681ad6265SDimitry Andric     Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
330781ad6265SDimitry Andric     break;
330881ad6265SDimitry Andric   case 2:
330981ad6265SDimitry Andric     Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
331081ad6265SDimitry Andric     break;
331181ad6265SDimitry Andric   case 4:
331281ad6265SDimitry Andric     Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
331381ad6265SDimitry Andric     break;
331481ad6265SDimitry Andric   }
331581ad6265SDimitry Andric 
331681ad6265SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
331781ad6265SDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
331881ad6265SDimitry Andric   BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
331981ad6265SDimitry Andric     .add(MI.getOperand(2));
332081ad6265SDimitry Andric 
332181ad6265SDimitry Andric   Register Addr = MI.getOperand(1).getReg();
332281ad6265SDimitry Andric   Register VOffset;
332381ad6265SDimitry Andric   // Try to split SAddr and VOffset. Global and LDS pointers share the same
332481ad6265SDimitry Andric   // immediate offset, so we cannot use a regular SelectGlobalSAddr().
332581ad6265SDimitry Andric   if (!isSGPR(Addr)) {
332681ad6265SDimitry Andric     auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
332781ad6265SDimitry Andric     if (isSGPR(AddrDef->Reg)) {
332881ad6265SDimitry Andric       Addr = AddrDef->Reg;
332981ad6265SDimitry Andric     } else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
333081ad6265SDimitry Andric       Register SAddr =
333181ad6265SDimitry Andric           getSrcRegIgnoringCopies(AddrDef->MI->getOperand(1).getReg(), *MRI);
3332bdd1243dSDimitry Andric       if (isSGPR(SAddr)) {
333381ad6265SDimitry Andric         Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
333481ad6265SDimitry Andric         if (Register Off = matchZeroExtendFromS32(*MRI, PtrBaseOffset)) {
333581ad6265SDimitry Andric           Addr = SAddr;
333681ad6265SDimitry Andric           VOffset = Off;
333781ad6265SDimitry Andric         }
333881ad6265SDimitry Andric       }
333981ad6265SDimitry Andric     }
334081ad6265SDimitry Andric   }
334181ad6265SDimitry Andric 
334281ad6265SDimitry Andric   if (isSGPR(Addr)) {
334381ad6265SDimitry Andric     Opc = AMDGPU::getGlobalSaddrOp(Opc);
334481ad6265SDimitry Andric     if (!VOffset) {
334581ad6265SDimitry Andric       VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
334681ad6265SDimitry Andric       BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
334781ad6265SDimitry Andric         .addImm(0);
334881ad6265SDimitry Andric     }
334981ad6265SDimitry Andric   }
335081ad6265SDimitry Andric 
335181ad6265SDimitry Andric   auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc))
335281ad6265SDimitry Andric     .addReg(Addr);
335381ad6265SDimitry Andric 
335481ad6265SDimitry Andric   if (isSGPR(Addr))
335581ad6265SDimitry Andric     MIB.addReg(VOffset);
335681ad6265SDimitry Andric 
335781ad6265SDimitry Andric   MIB.add(MI.getOperand(4))  // offset
335881ad6265SDimitry Andric      .add(MI.getOperand(5)); // cpol
335981ad6265SDimitry Andric 
336081ad6265SDimitry Andric   MachineMemOperand *LoadMMO = *MI.memoperands_begin();
336181ad6265SDimitry Andric   MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
336281ad6265SDimitry Andric   LoadPtrI.Offset = MI.getOperand(4).getImm();
336381ad6265SDimitry Andric   MachinePointerInfo StorePtrI = LoadPtrI;
336481ad6265SDimitry Andric   LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS;
336581ad6265SDimitry Andric   StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
336681ad6265SDimitry Andric   auto F = LoadMMO->getFlags() &
336781ad6265SDimitry Andric            ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad);
336881ad6265SDimitry Andric   LoadMMO = MF->getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad,
336981ad6265SDimitry Andric                                      Size, LoadMMO->getBaseAlign());
337081ad6265SDimitry Andric   MachineMemOperand *StoreMMO =
337181ad6265SDimitry Andric       MF->getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore,
337281ad6265SDimitry Andric                                sizeof(int32_t), Align(4));
337381ad6265SDimitry Andric 
337481ad6265SDimitry Andric   MIB.setMemRefs({LoadMMO, StoreMMO});
337581ad6265SDimitry Andric 
337681ad6265SDimitry Andric   MI.eraseFromParent();
337781ad6265SDimitry Andric   return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
337881ad6265SDimitry Andric }
337981ad6265SDimitry Andric 
3380e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectBVHIntrinsic(MachineInstr &MI) const{
3381e8d8bef9SDimitry Andric   MI.setDesc(TII.get(MI.getOperand(1).getImm()));
338281ad6265SDimitry Andric   MI.removeOperand(1);
338381ad6265SDimitry Andric   MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
338481ad6265SDimitry Andric   return true;
338581ad6265SDimitry Andric }
338681ad6265SDimitry Andric 
338781ad6265SDimitry Andric bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const {
338881ad6265SDimitry Andric   unsigned Opc;
33895f757f3fSDimitry Andric   switch (cast<GIntrinsic>(MI).getIntrinsicID()) {
339081ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
339181ad6265SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
339281ad6265SDimitry Andric     break;
339381ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
339481ad6265SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
339581ad6265SDimitry Andric     break;
339681ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
339781ad6265SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
339881ad6265SDimitry Andric     break;
339981ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
340081ad6265SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
340181ad6265SDimitry Andric     break;
340281ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
340381ad6265SDimitry Andric     Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
340481ad6265SDimitry Andric     break;
340581ad6265SDimitry Andric   case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
340681ad6265SDimitry Andric     Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
340781ad6265SDimitry Andric     break;
3408fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3409fcaf7f86SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3410fcaf7f86SDimitry Andric     break;
3411fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3412fcaf7f86SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3413fcaf7f86SDimitry Andric     break;
3414fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3415fcaf7f86SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3416fcaf7f86SDimitry Andric     break;
3417fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3418fcaf7f86SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3419fcaf7f86SDimitry Andric     break;
3420fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3421fcaf7f86SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3422fcaf7f86SDimitry Andric     break;
3423fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3424fcaf7f86SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3425fcaf7f86SDimitry Andric     break;
3426fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3427fcaf7f86SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3428fcaf7f86SDimitry Andric     break;
3429fcaf7f86SDimitry Andric   case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3430fcaf7f86SDimitry Andric     Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3431fcaf7f86SDimitry Andric     break;
343281ad6265SDimitry Andric   default:
343381ad6265SDimitry Andric     llvm_unreachable("unhandled smfmac intrinsic");
343481ad6265SDimitry Andric   }
343581ad6265SDimitry Andric 
343681ad6265SDimitry Andric   auto VDst_In = MI.getOperand(4);
343781ad6265SDimitry Andric 
343881ad6265SDimitry Andric   MI.setDesc(TII.get(Opc));
343981ad6265SDimitry Andric   MI.removeOperand(4); // VDst_In
344081ad6265SDimitry Andric   MI.removeOperand(1); // Intrinsic ID
344181ad6265SDimitry Andric   MI.addOperand(VDst_In); // Readd VDst_In to the end
3442e8d8bef9SDimitry Andric   MI.addImplicitDefUseOperands(*MI.getParent()->getParent());
3443e8d8bef9SDimitry Andric   return true;
3444e8d8bef9SDimitry Andric }
3445e8d8bef9SDimitry Andric 
344604eeddc0SDimitry Andric bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const {
344704eeddc0SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
344804eeddc0SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
344904eeddc0SDimitry Andric   const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
345004eeddc0SDimitry Andric   const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
345104eeddc0SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
345204eeddc0SDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
345304eeddc0SDimitry Andric 
345404eeddc0SDimitry Andric   if (IsVALU) {
345504eeddc0SDimitry Andric     BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
345604eeddc0SDimitry Andric       .addImm(Subtarget->getWavefrontSizeLog2())
345704eeddc0SDimitry Andric       .addReg(SrcReg);
345804eeddc0SDimitry Andric   } else {
345904eeddc0SDimitry Andric     BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg)
346004eeddc0SDimitry Andric       .addReg(SrcReg)
34615f757f3fSDimitry Andric       .addImm(Subtarget->getWavefrontSizeLog2())
34625f757f3fSDimitry Andric       .setOperandDead(3); // Dead scc
346304eeddc0SDimitry Andric   }
346404eeddc0SDimitry Andric 
346504eeddc0SDimitry Andric   const TargetRegisterClass &RC =
346604eeddc0SDimitry Andric       IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
346704eeddc0SDimitry Andric   if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
346804eeddc0SDimitry Andric     return false;
346904eeddc0SDimitry Andric 
347004eeddc0SDimitry Andric   MI.eraseFromParent();
347104eeddc0SDimitry Andric   return true;
347204eeddc0SDimitry Andric }
347304eeddc0SDimitry Andric 
34745f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {
34755f757f3fSDimitry Andric   Register SrcReg = MI.getOperand(0).getReg();
34765f757f3fSDimitry Andric   if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
34775f757f3fSDimitry Andric     return false;
34785f757f3fSDimitry Andric 
34795f757f3fSDimitry Andric   MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
34805f757f3fSDimitry Andric   Register SP =
34815f757f3fSDimitry Andric       Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
34825f757f3fSDimitry Andric   Register WaveAddr = getWaveAddress(DefMI);
34835f757f3fSDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
34845f757f3fSDimitry Andric   const DebugLoc &DL = MI.getDebugLoc();
34855f757f3fSDimitry Andric 
34865f757f3fSDimitry Andric   if (!WaveAddr) {
34875f757f3fSDimitry Andric     WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
34885f757f3fSDimitry Andric     BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), WaveAddr)
34895f757f3fSDimitry Andric       .addReg(SrcReg)
34905f757f3fSDimitry Andric       .addImm(Subtarget->getWavefrontSizeLog2())
34915f757f3fSDimitry Andric       .setOperandDead(3); // Dead scc
34925f757f3fSDimitry Andric   }
34935f757f3fSDimitry Andric 
34945f757f3fSDimitry Andric   BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), SP)
34955f757f3fSDimitry Andric     .addReg(WaveAddr);
34965f757f3fSDimitry Andric 
34975f757f3fSDimitry Andric   MI.eraseFromParent();
34985f757f3fSDimitry Andric   return true;
34995f757f3fSDimitry Andric }
35005f757f3fSDimitry Andric 
35018bcb0991SDimitry Andric bool AMDGPUInstructionSelector::select(MachineInstr &I) {
35020b57cec5SDimitry Andric 
35038bcb0991SDimitry Andric   if (!I.isPreISelOpcode()) {
35040b57cec5SDimitry Andric     if (I.isCopy())
35050b57cec5SDimitry Andric       return selectCOPY(I);
35060b57cec5SDimitry Andric     return true;
35070b57cec5SDimitry Andric   }
35080b57cec5SDimitry Andric 
35090b57cec5SDimitry Andric   switch (I.getOpcode()) {
35100b57cec5SDimitry Andric   case TargetOpcode::G_AND:
35110b57cec5SDimitry Andric   case TargetOpcode::G_OR:
35120b57cec5SDimitry Andric   case TargetOpcode::G_XOR:
35135ffd83dbSDimitry Andric     if (selectImpl(I, *CoverageInfo))
35140b57cec5SDimitry Andric       return true;
35155ffd83dbSDimitry Andric     return selectG_AND_OR_XOR(I);
35160b57cec5SDimitry Andric   case TargetOpcode::G_ADD:
35170b57cec5SDimitry Andric   case TargetOpcode::G_SUB:
3518*0fca6ea1SDimitry Andric   case TargetOpcode::G_PTR_ADD:
35198bcb0991SDimitry Andric     if (selectImpl(I, *CoverageInfo))
35200b57cec5SDimitry Andric       return true;
35218bcb0991SDimitry Andric     return selectG_ADD_SUB(I);
35228bcb0991SDimitry Andric   case TargetOpcode::G_UADDO:
35238bcb0991SDimitry Andric   case TargetOpcode::G_USUBO:
3524480093f4SDimitry Andric   case TargetOpcode::G_UADDE:
3525480093f4SDimitry Andric   case TargetOpcode::G_USUBE:
3526480093f4SDimitry Andric     return selectG_UADDO_USUBO_UADDE_USUBE(I);
352781ad6265SDimitry Andric   case AMDGPU::G_AMDGPU_MAD_U64_U32:
352881ad6265SDimitry Andric   case AMDGPU::G_AMDGPU_MAD_I64_I32:
352981ad6265SDimitry Andric     return selectG_AMDGPU_MAD_64_32(I);
35300b57cec5SDimitry Andric   case TargetOpcode::G_INTTOPTR:
35310b57cec5SDimitry Andric   case TargetOpcode::G_BITCAST:
35328bcb0991SDimitry Andric   case TargetOpcode::G_PTRTOINT:
3533*0fca6ea1SDimitry Andric   case TargetOpcode::G_FREEZE:
35340b57cec5SDimitry Andric     return selectCOPY(I);
35350b57cec5SDimitry Andric   case TargetOpcode::G_CONSTANT:
35360b57cec5SDimitry Andric   case TargetOpcode::G_FCONSTANT:
35370b57cec5SDimitry Andric     return selectG_CONSTANT(I);
35385ffd83dbSDimitry Andric   case TargetOpcode::G_FNEG:
35395ffd83dbSDimitry Andric     if (selectImpl(I, *CoverageInfo))
35405ffd83dbSDimitry Andric       return true;
35415ffd83dbSDimitry Andric     return selectG_FNEG(I);
35425ffd83dbSDimitry Andric   case TargetOpcode::G_FABS:
35435ffd83dbSDimitry Andric     if (selectImpl(I, *CoverageInfo))
35445ffd83dbSDimitry Andric       return true;
35455ffd83dbSDimitry Andric     return selectG_FABS(I);
35460b57cec5SDimitry Andric   case TargetOpcode::G_EXTRACT:
35470b57cec5SDimitry Andric     return selectG_EXTRACT(I);
35480b57cec5SDimitry Andric   case TargetOpcode::G_MERGE_VALUES:
35490b57cec5SDimitry Andric   case TargetOpcode::G_CONCAT_VECTORS:
35500b57cec5SDimitry Andric     return selectG_MERGE_VALUES(I);
35510b57cec5SDimitry Andric   case TargetOpcode::G_UNMERGE_VALUES:
35520b57cec5SDimitry Andric     return selectG_UNMERGE_VALUES(I);
3553bdd1243dSDimitry Andric   case TargetOpcode::G_BUILD_VECTOR:
35545ffd83dbSDimitry Andric   case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3555bdd1243dSDimitry Andric     return selectG_BUILD_VECTOR(I);
35560b57cec5SDimitry Andric   case TargetOpcode::G_IMPLICIT_DEF:
35570b57cec5SDimitry Andric     return selectG_IMPLICIT_DEF(I);
35580b57cec5SDimitry Andric   case TargetOpcode::G_INSERT:
35590b57cec5SDimitry Andric     return selectG_INSERT(I);
35600b57cec5SDimitry Andric   case TargetOpcode::G_INTRINSIC:
35615f757f3fSDimitry Andric   case TargetOpcode::G_INTRINSIC_CONVERGENT:
35628bcb0991SDimitry Andric     return selectG_INTRINSIC(I);
35630b57cec5SDimitry Andric   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
35645f757f3fSDimitry Andric   case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
35658bcb0991SDimitry Andric     return selectG_INTRINSIC_W_SIDE_EFFECTS(I);
35660b57cec5SDimitry Andric   case TargetOpcode::G_ICMP:
35675f757f3fSDimitry Andric   case TargetOpcode::G_FCMP:
35685f757f3fSDimitry Andric     if (selectG_ICMP_or_FCMP(I))
35690b57cec5SDimitry Andric       return true;
35708bcb0991SDimitry Andric     return selectImpl(I, *CoverageInfo);
35710b57cec5SDimitry Andric   case TargetOpcode::G_LOAD:
3572e8d8bef9SDimitry Andric   case TargetOpcode::G_STORE:
35738bcb0991SDimitry Andric   case TargetOpcode::G_ATOMIC_CMPXCHG:
35748bcb0991SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XCHG:
35758bcb0991SDimitry Andric   case TargetOpcode::G_ATOMICRMW_ADD:
35768bcb0991SDimitry Andric   case TargetOpcode::G_ATOMICRMW_SUB:
35778bcb0991SDimitry Andric   case TargetOpcode::G_ATOMICRMW_AND:
35788bcb0991SDimitry Andric   case TargetOpcode::G_ATOMICRMW_OR:
35798bcb0991SDimitry Andric   case TargetOpcode::G_ATOMICRMW_XOR:
35808bcb0991SDimitry Andric   case TargetOpcode::G_ATOMICRMW_MIN:
35818bcb0991SDimitry Andric   case TargetOpcode::G_ATOMICRMW_MAX:
35828bcb0991SDimitry Andric   case TargetOpcode::G_ATOMICRMW_UMIN:
35838bcb0991SDimitry Andric   case TargetOpcode::G_ATOMICRMW_UMAX:
358406c3fb27SDimitry Andric   case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
358506c3fb27SDimitry Andric   case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
35868bcb0991SDimitry Andric   case TargetOpcode::G_ATOMICRMW_FADD:
3587*0fca6ea1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_FMIN:
3588*0fca6ea1SDimitry Andric   case TargetOpcode::G_ATOMICRMW_FMAX:
3589e8d8bef9SDimitry Andric     return selectG_LOAD_STORE_ATOMICRMW(I);
35900b57cec5SDimitry Andric   case TargetOpcode::G_SELECT:
35910b57cec5SDimitry Andric     return selectG_SELECT(I);
35920b57cec5SDimitry Andric   case TargetOpcode::G_TRUNC:
35930b57cec5SDimitry Andric     return selectG_TRUNC(I);
35940b57cec5SDimitry Andric   case TargetOpcode::G_SEXT:
35950b57cec5SDimitry Andric   case TargetOpcode::G_ZEXT:
35960b57cec5SDimitry Andric   case TargetOpcode::G_ANYEXT:
35975ffd83dbSDimitry Andric   case TargetOpcode::G_SEXT_INREG:
359806c3fb27SDimitry Andric     // This is a workaround. For extension from type i1, `selectImpl()` uses
359906c3fb27SDimitry Andric     // patterns from TD file and generates an illegal VGPR to SGPR COPY as type
360006c3fb27SDimitry Andric     // i1 can only be hold in a SGPR class.
360106c3fb27SDimitry Andric     if (MRI->getType(I.getOperand(1).getReg()) != LLT::scalar(1) &&
360206c3fb27SDimitry Andric         selectImpl(I, *CoverageInfo))
3603480093f4SDimitry Andric       return true;
36048bcb0991SDimitry Andric     return selectG_SZA_EXT(I);
36055f757f3fSDimitry Andric   case TargetOpcode::G_FPEXT:
36065f757f3fSDimitry Andric     if (selectG_FPEXT(I))
36075f757f3fSDimitry Andric       return true;
36085f757f3fSDimitry Andric     return selectImpl(I, *CoverageInfo);
36090b57cec5SDimitry Andric   case TargetOpcode::G_BRCOND:
36100b57cec5SDimitry Andric     return selectG_BRCOND(I);
36115ffd83dbSDimitry Andric   case TargetOpcode::G_GLOBAL_VALUE:
3612e8d8bef9SDimitry Andric     return selectG_GLOBAL_VALUE(I);
36135ffd83dbSDimitry Andric   case TargetOpcode::G_PTRMASK:
36145ffd83dbSDimitry Andric     return selectG_PTRMASK(I);
3615480093f4SDimitry Andric   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3616480093f4SDimitry Andric     return selectG_EXTRACT_VECTOR_ELT(I);
36175ffd83dbSDimitry Andric   case TargetOpcode::G_INSERT_VECTOR_ELT:
36185ffd83dbSDimitry Andric     return selectG_INSERT_VECTOR_ELT(I);
36195ffd83dbSDimitry Andric   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
362004eeddc0SDimitry Andric   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3621*0fca6ea1SDimitry Andric   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
362204eeddc0SDimitry Andric   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
362304eeddc0SDimitry Andric   case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
36245f757f3fSDimitry Andric     const AMDGPU::ImageDimIntrinsicInfo *Intr =
36255f757f3fSDimitry Andric         AMDGPU::getImageDimIntrinsicInfo(AMDGPU::getIntrinsicID(I));
36265ffd83dbSDimitry Andric     assert(Intr && "not an image intrinsic with image pseudo");
36275ffd83dbSDimitry Andric     return selectImageIntrinsic(I, Intr);
36285ffd83dbSDimitry Andric   }
3629e8d8bef9SDimitry Andric   case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
3630e8d8bef9SDimitry Andric     return selectBVHIntrinsic(I);
3631fe6060f1SDimitry Andric   case AMDGPU::G_SBFX:
3632fe6060f1SDimitry Andric   case AMDGPU::G_UBFX:
3633fe6060f1SDimitry Andric     return selectG_SBFX_UBFX(I);
3634349cc55cSDimitry Andric   case AMDGPU::G_SI_CALL:
3635349cc55cSDimitry Andric     I.setDesc(TII.get(AMDGPU::SI_CALL));
3636349cc55cSDimitry Andric     return true;
363704eeddc0SDimitry Andric   case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
363804eeddc0SDimitry Andric     return selectWaveAddress(I);
36395f757f3fSDimitry Andric   case AMDGPU::G_STACKRESTORE:
36405f757f3fSDimitry Andric     return selectStackRestore(I);
3641*0fca6ea1SDimitry Andric   case AMDGPU::G_PHI:
3642*0fca6ea1SDimitry Andric     return selectPHI(I);
36438bcb0991SDimitry Andric   default:
36448bcb0991SDimitry Andric     return selectImpl(I, *CoverageInfo);
36450b57cec5SDimitry Andric   }
36460b57cec5SDimitry Andric   return false;
36470b57cec5SDimitry Andric }
36480b57cec5SDimitry Andric 
36490b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns
36500b57cec5SDimitry Andric AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
36510b57cec5SDimitry Andric   return {{
36520b57cec5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
36530b57cec5SDimitry Andric   }};
36540b57cec5SDimitry Andric 
36550b57cec5SDimitry Andric }
36560b57cec5SDimitry Andric 
365706c3fb27SDimitry Andric std::pair<Register, unsigned>
365806c3fb27SDimitry Andric AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root,
365906c3fb27SDimitry Andric                                               bool IsCanonicalizing,
366006c3fb27SDimitry Andric                                               bool AllowAbs, bool OpSel) const {
36615ffd83dbSDimitry Andric   Register Src = Root.getReg();
36620b57cec5SDimitry Andric   unsigned Mods = 0;
36635ffd83dbSDimitry Andric   MachineInstr *MI = getDefIgnoringCopies(Src, *MRI);
36640b57cec5SDimitry Andric 
3665bdd1243dSDimitry Andric   if (MI->getOpcode() == AMDGPU::G_FNEG) {
36660b57cec5SDimitry Andric     Src = MI->getOperand(1).getReg();
36670b57cec5SDimitry Andric     Mods |= SISrcMods::NEG;
36685ffd83dbSDimitry Andric     MI = getDefIgnoringCopies(Src, *MRI);
366906c3fb27SDimitry Andric   } else if (MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
367006c3fb27SDimitry Andric     // Fold fsub [+-]0 into fneg. This may not have folded depending on the
367106c3fb27SDimitry Andric     // denormal mode, but we're implicitly canonicalizing in a source operand.
367206c3fb27SDimitry Andric     const ConstantFP *LHS =
367306c3fb27SDimitry Andric         getConstantFPVRegVal(MI->getOperand(1).getReg(), *MRI);
367406c3fb27SDimitry Andric     if (LHS && LHS->isZero()) {
367506c3fb27SDimitry Andric       Mods |= SISrcMods::NEG;
367606c3fb27SDimitry Andric       Src = MI->getOperand(2).getReg();
367706c3fb27SDimitry Andric     }
36780b57cec5SDimitry Andric   }
36790b57cec5SDimitry Andric 
3680bdd1243dSDimitry Andric   if (AllowAbs && MI->getOpcode() == AMDGPU::G_FABS) {
36810b57cec5SDimitry Andric     Src = MI->getOperand(1).getReg();
36820b57cec5SDimitry Andric     Mods |= SISrcMods::ABS;
36830b57cec5SDimitry Andric   }
36840b57cec5SDimitry Andric 
368581ad6265SDimitry Andric   if (OpSel)
368681ad6265SDimitry Andric     Mods |= SISrcMods::OP_SEL_0;
368781ad6265SDimitry Andric 
3688bdd1243dSDimitry Andric   return std::pair(Src, Mods);
3689bdd1243dSDimitry Andric }
3690bdd1243dSDimitry Andric 
3691bdd1243dSDimitry Andric Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
3692bdd1243dSDimitry Andric     Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt,
3693bdd1243dSDimitry Andric     bool ForceVGPR) const {
369481ad6265SDimitry Andric   if ((Mods != 0 || ForceVGPR) &&
36955ffd83dbSDimitry Andric       RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
36965ffd83dbSDimitry Andric 
36975ffd83dbSDimitry Andric     // If we looked through copies to find source modifiers on an SGPR operand,
36985ffd83dbSDimitry Andric     // we now have an SGPR register source. To avoid potentially violating the
36995ffd83dbSDimitry Andric     // constant bus restriction, we need to insert a copy to a VGPR.
3700bdd1243dSDimitry Andric     Register VGPRSrc = MRI->cloneVirtualRegister(Root.getReg());
3701bdd1243dSDimitry Andric     BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(),
37025ffd83dbSDimitry Andric             TII.get(AMDGPU::COPY), VGPRSrc)
37035ffd83dbSDimitry Andric         .addReg(Src);
37045ffd83dbSDimitry Andric     Src = VGPRSrc;
37055ffd83dbSDimitry Andric   }
37065ffd83dbSDimitry Andric 
3707bdd1243dSDimitry Andric   return Src;
37080b57cec5SDimitry Andric }
37090b57cec5SDimitry Andric 
37100b57cec5SDimitry Andric ///
37110b57cec5SDimitry Andric /// This will select either an SGPR or VGPR operand and will save us from
37120b57cec5SDimitry Andric /// having to write an extra tablegen pattern.
37130b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns
37140b57cec5SDimitry Andric AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const {
37150b57cec5SDimitry Andric   return {{
37160b57cec5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.add(Root); }
37170b57cec5SDimitry Andric   }};
37180b57cec5SDimitry Andric }
37190b57cec5SDimitry Andric 
37200b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns
37210b57cec5SDimitry Andric AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const {
37220b57cec5SDimitry Andric   Register Src;
37230b57cec5SDimitry Andric   unsigned Mods;
37245ffd83dbSDimitry Andric   std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
37250b57cec5SDimitry Andric 
37260b57cec5SDimitry Andric   return {{
3727bdd1243dSDimitry Andric       [=](MachineInstrBuilder &MIB) {
3728bdd1243dSDimitry Andric         MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3729bdd1243dSDimitry Andric       },
37300b57cec5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
37310b57cec5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },    // clamp
37320b57cec5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }     // omod
37330b57cec5SDimitry Andric   }};
37340b57cec5SDimitry Andric }
37358bcb0991SDimitry Andric 
37368bcb0991SDimitry Andric InstructionSelector::ComplexRendererFns
3737e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const {
3738e8d8bef9SDimitry Andric   Register Src;
3739e8d8bef9SDimitry Andric   unsigned Mods;
374006c3fb27SDimitry Andric   std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
374106c3fb27SDimitry Andric                                            /*IsCanonicalizing=*/true,
374206c3fb27SDimitry Andric                                            /*AllowAbs=*/false);
3743e8d8bef9SDimitry Andric 
3744e8d8bef9SDimitry Andric   return {{
3745bdd1243dSDimitry Andric       [=](MachineInstrBuilder &MIB) {
3746bdd1243dSDimitry Andric         MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3747bdd1243dSDimitry Andric       },
3748e8d8bef9SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
3749e8d8bef9SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },    // clamp
3750e8d8bef9SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }     // omod
3751e8d8bef9SDimitry Andric   }};
3752e8d8bef9SDimitry Andric }
3753e8d8bef9SDimitry Andric 
3754e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns
37550b57cec5SDimitry Andric AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const {
37560b57cec5SDimitry Andric   return {{
37570b57cec5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
37580b57cec5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp
37590b57cec5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }  // omod
37600b57cec5SDimitry Andric   }};
37610b57cec5SDimitry Andric }
37620b57cec5SDimitry Andric 
37630b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns
37640b57cec5SDimitry Andric AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
37650b57cec5SDimitry Andric   Register Src;
37660b57cec5SDimitry Andric   unsigned Mods;
37675ffd83dbSDimitry Andric   std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
37685ffd83dbSDimitry Andric 
37695ffd83dbSDimitry Andric   return {{
3770bdd1243dSDimitry Andric       [=](MachineInstrBuilder &MIB) {
3771bdd1243dSDimitry Andric         MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3772bdd1243dSDimitry Andric       },
37735ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
37745ffd83dbSDimitry Andric   }};
37755ffd83dbSDimitry Andric }
37765ffd83dbSDimitry Andric 
37775ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns
377806c3fb27SDimitry Andric AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
377906c3fb27SDimitry Andric     MachineOperand &Root) const {
378006c3fb27SDimitry Andric   Register Src;
378106c3fb27SDimitry Andric   unsigned Mods;
378206c3fb27SDimitry Andric   std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /*IsCanonicalizing=*/false);
378306c3fb27SDimitry Andric 
378406c3fb27SDimitry Andric   return {{
378506c3fb27SDimitry Andric       [=](MachineInstrBuilder &MIB) {
378606c3fb27SDimitry Andric         MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
378706c3fb27SDimitry Andric       },
378806c3fb27SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
378906c3fb27SDimitry Andric   }};
379006c3fb27SDimitry Andric }
379106c3fb27SDimitry Andric 
379206c3fb27SDimitry Andric InstructionSelector::ComplexRendererFns
3793e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const {
3794e8d8bef9SDimitry Andric   Register Src;
3795e8d8bef9SDimitry Andric   unsigned Mods;
379606c3fb27SDimitry Andric   std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /*IsCanonicalizing=*/true,
379706c3fb27SDimitry Andric                                            /*AllowAbs=*/false);
3798e8d8bef9SDimitry Andric 
3799e8d8bef9SDimitry Andric   return {{
3800bdd1243dSDimitry Andric       [=](MachineInstrBuilder &MIB) {
3801bdd1243dSDimitry Andric         MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3802bdd1243dSDimitry Andric       },
3803e8d8bef9SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
3804e8d8bef9SDimitry Andric   }};
3805e8d8bef9SDimitry Andric }
3806e8d8bef9SDimitry Andric 
3807e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns
38085ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
38095ffd83dbSDimitry Andric   Register Reg = Root.getReg();
38105ffd83dbSDimitry Andric   const MachineInstr *Def = getDefIgnoringCopies(Reg, *MRI);
3811bdd1243dSDimitry Andric   if (Def->getOpcode() == AMDGPU::G_FNEG || Def->getOpcode() == AMDGPU::G_FABS)
38125ffd83dbSDimitry Andric     return {};
38135ffd83dbSDimitry Andric   return {{
38145ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
38155ffd83dbSDimitry Andric   }};
38165ffd83dbSDimitry Andric }
38175ffd83dbSDimitry Andric 
38185ffd83dbSDimitry Andric std::pair<Register, unsigned>
38195ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectVOP3PModsImpl(
382081ad6265SDimitry Andric   Register Src, const MachineRegisterInfo &MRI, bool IsDOT) const {
38215ffd83dbSDimitry Andric   unsigned Mods = 0;
38225ffd83dbSDimitry Andric   MachineInstr *MI = MRI.getVRegDef(Src);
38235ffd83dbSDimitry Andric 
38245ffd83dbSDimitry Andric   if (MI && MI->getOpcode() == AMDGPU::G_FNEG &&
38255ffd83dbSDimitry Andric       // It's possible to see an f32 fneg here, but unlikely.
38265ffd83dbSDimitry Andric       // TODO: Treat f32 fneg as only high bit.
3827fe6060f1SDimitry Andric       MRI.getType(Src) == LLT::fixed_vector(2, 16)) {
38285ffd83dbSDimitry Andric     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
38295ffd83dbSDimitry Andric     Src = MI->getOperand(1).getReg();
38305ffd83dbSDimitry Andric     MI = MRI.getVRegDef(Src);
38315ffd83dbSDimitry Andric   }
38325ffd83dbSDimitry Andric 
383306c3fb27SDimitry Andric   // TODO: Handle G_FSUB 0 as fneg
383406c3fb27SDimitry Andric 
38355ffd83dbSDimitry Andric   // TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.
383681ad6265SDimitry Andric   (void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard()
38375ffd83dbSDimitry Andric 
38385ffd83dbSDimitry Andric   // Packed instructions do not have abs modifiers.
38395ffd83dbSDimitry Andric   Mods |= SISrcMods::OP_SEL_1;
38405ffd83dbSDimitry Andric 
3841bdd1243dSDimitry Andric   return std::pair(Src, Mods);
38425ffd83dbSDimitry Andric }
38435ffd83dbSDimitry Andric 
38445ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns
38455ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectVOP3PMods(MachineOperand &Root) const {
38465ffd83dbSDimitry Andric   MachineRegisterInfo &MRI
38475ffd83dbSDimitry Andric     = Root.getParent()->getParent()->getParent()->getRegInfo();
38485ffd83dbSDimitry Andric 
38495ffd83dbSDimitry Andric   Register Src;
38505ffd83dbSDimitry Andric   unsigned Mods;
38515ffd83dbSDimitry Andric   std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI);
38520b57cec5SDimitry Andric 
38530b57cec5SDimitry Andric   return {{
38540b57cec5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
38550b57cec5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }  // src_mods
38560b57cec5SDimitry Andric   }};
38570b57cec5SDimitry Andric }
38580b57cec5SDimitry Andric 
38590b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns
386081ad6265SDimitry Andric AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
386181ad6265SDimitry Andric   MachineRegisterInfo &MRI
386281ad6265SDimitry Andric     = Root.getParent()->getParent()->getParent()->getRegInfo();
386381ad6265SDimitry Andric 
386481ad6265SDimitry Andric   Register Src;
386581ad6265SDimitry Andric   unsigned Mods;
386681ad6265SDimitry Andric   std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI, true);
386781ad6265SDimitry Andric 
386881ad6265SDimitry Andric   return {{
386981ad6265SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
387081ad6265SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }  // src_mods
387181ad6265SDimitry Andric   }};
387281ad6265SDimitry Andric }
387381ad6265SDimitry Andric 
387481ad6265SDimitry Andric InstructionSelector::ComplexRendererFns
38757a6dacacSDimitry Andric AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const {
387681ad6265SDimitry Andric   // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
387781ad6265SDimitry Andric   // Value is in Imm operand as i1 sign extended to int64_t.
387881ad6265SDimitry Andric   // 1(-1) promotes packed values to signed, 0 treats them as unsigned.
387981ad6265SDimitry Andric   assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
388081ad6265SDimitry Andric          "expected i1 value");
388181ad6265SDimitry Andric   unsigned Mods = SISrcMods::OP_SEL_1;
388281ad6265SDimitry Andric   if (Root.getImm() == -1)
388381ad6265SDimitry Andric     Mods ^= SISrcMods::NEG;
388481ad6265SDimitry Andric   return {{
388581ad6265SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
388681ad6265SDimitry Andric   }};
388781ad6265SDimitry Andric }
388881ad6265SDimitry Andric 
388981ad6265SDimitry Andric InstructionSelector::ComplexRendererFns
389081ad6265SDimitry Andric AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
389181ad6265SDimitry Andric     MachineOperand &Root) const {
389281ad6265SDimitry Andric   assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
389381ad6265SDimitry Andric          "expected i1 value");
389481ad6265SDimitry Andric   unsigned Mods = SISrcMods::OP_SEL_1;
389581ad6265SDimitry Andric   if (Root.getImm() != 0)
389681ad6265SDimitry Andric     Mods |= SISrcMods::OP_SEL_0;
389781ad6265SDimitry Andric 
389881ad6265SDimitry Andric   return {{
389981ad6265SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
390081ad6265SDimitry Andric   }};
390181ad6265SDimitry Andric }
390281ad6265SDimitry Andric 
3903b3edf446SDimitry Andric static Register buildRegSequence(SmallVectorImpl<Register> &Elts,
3904b3edf446SDimitry Andric                                  MachineInstr *InsertPt,
3905b3edf446SDimitry Andric                                  MachineRegisterInfo &MRI) {
3906b3edf446SDimitry Andric   const TargetRegisterClass *DstRegClass;
3907b3edf446SDimitry Andric   switch (Elts.size()) {
3908b3edf446SDimitry Andric   case 8:
3909b3edf446SDimitry Andric     DstRegClass = &AMDGPU::VReg_256RegClass;
3910b3edf446SDimitry Andric     break;
3911b3edf446SDimitry Andric   case 4:
3912b3edf446SDimitry Andric     DstRegClass = &AMDGPU::VReg_128RegClass;
3913b3edf446SDimitry Andric     break;
3914b3edf446SDimitry Andric   case 2:
3915b3edf446SDimitry Andric     DstRegClass = &AMDGPU::VReg_64RegClass;
3916b3edf446SDimitry Andric     break;
3917b3edf446SDimitry Andric   default:
3918b3edf446SDimitry Andric     llvm_unreachable("unhandled Reg sequence size");
3919b3edf446SDimitry Andric   }
3920b3edf446SDimitry Andric 
3921b3edf446SDimitry Andric   MachineIRBuilder B(*InsertPt);
3922b3edf446SDimitry Andric   auto MIB = B.buildInstr(AMDGPU::REG_SEQUENCE)
3923b3edf446SDimitry Andric                  .addDef(MRI.createVirtualRegister(DstRegClass));
3924b3edf446SDimitry Andric   for (unsigned i = 0; i < Elts.size(); ++i) {
3925b3edf446SDimitry Andric     MIB.addReg(Elts[i]);
3926b3edf446SDimitry Andric     MIB.addImm(SIRegisterInfo::getSubRegFromChannel(i));
3927b3edf446SDimitry Andric   }
3928b3edf446SDimitry Andric   return MIB->getOperand(0).getReg();
3929b3edf446SDimitry Andric }
3930b3edf446SDimitry Andric 
3931b3edf446SDimitry Andric static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods,
3932b3edf446SDimitry Andric                                  SmallVectorImpl<Register> &Elts, Register &Src,
3933b3edf446SDimitry Andric                                  MachineInstr *InsertPt,
3934b3edf446SDimitry Andric                                  MachineRegisterInfo &MRI) {
3935b3edf446SDimitry Andric   if (ModOpcode == TargetOpcode::G_FNEG) {
3936b3edf446SDimitry Andric     Mods |= SISrcMods::NEG;
3937b3edf446SDimitry Andric     // Check if all elements also have abs modifier
3938b3edf446SDimitry Andric     SmallVector<Register, 8> NegAbsElts;
3939b3edf446SDimitry Andric     for (auto El : Elts) {
3940b3edf446SDimitry Andric       Register FabsSrc;
3941b3edf446SDimitry Andric       if (!mi_match(El, MRI, m_GFabs(m_Reg(FabsSrc))))
3942b3edf446SDimitry Andric         break;
3943b3edf446SDimitry Andric       NegAbsElts.push_back(FabsSrc);
3944b3edf446SDimitry Andric     }
3945b3edf446SDimitry Andric     if (Elts.size() != NegAbsElts.size()) {
3946b3edf446SDimitry Andric       // Neg
3947b3edf446SDimitry Andric       Src = buildRegSequence(Elts, InsertPt, MRI);
3948b3edf446SDimitry Andric     } else {
3949b3edf446SDimitry Andric       // Neg and Abs
3950b3edf446SDimitry Andric       Mods |= SISrcMods::NEG_HI;
3951b3edf446SDimitry Andric       Src = buildRegSequence(NegAbsElts, InsertPt, MRI);
3952b3edf446SDimitry Andric     }
3953b3edf446SDimitry Andric   } else {
3954b3edf446SDimitry Andric     assert(ModOpcode == TargetOpcode::G_FABS);
3955b3edf446SDimitry Andric     // Abs
3956b3edf446SDimitry Andric     Mods |= SISrcMods::NEG_HI;
3957b3edf446SDimitry Andric     Src = buildRegSequence(Elts, InsertPt, MRI);
3958b3edf446SDimitry Andric   }
3959b3edf446SDimitry Andric }
3960b3edf446SDimitry Andric 
3961b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns
3962b3edf446SDimitry Andric AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(MachineOperand &Root) const {
3963b3edf446SDimitry Andric   Register Src = Root.getReg();
3964b3edf446SDimitry Andric   unsigned Mods = SISrcMods::OP_SEL_1;
3965b3edf446SDimitry Andric   SmallVector<Register, 8> EltsF32;
3966b3edf446SDimitry Andric 
3967b3edf446SDimitry Andric   if (GBuildVector *BV = dyn_cast<GBuildVector>(MRI->getVRegDef(Src))) {
3968*0fca6ea1SDimitry Andric     assert(BV->getNumSources() > 0);
3969b3edf446SDimitry Andric     // Based on first element decide which mod we match, neg or abs
3970*0fca6ea1SDimitry Andric     MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
3971*0fca6ea1SDimitry Andric     unsigned ModOpcode = (ElF32->getOpcode() == AMDGPU::G_FNEG)
3972*0fca6ea1SDimitry Andric                              ? AMDGPU::G_FNEG
3973b3edf446SDimitry Andric                              : AMDGPU::G_FABS;
3974*0fca6ea1SDimitry Andric     for (unsigned i = 0; i < BV->getNumSources(); ++i) {
3975*0fca6ea1SDimitry Andric       ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
3976b3edf446SDimitry Andric       if (ElF32->getOpcode() != ModOpcode)
3977b3edf446SDimitry Andric         break;
3978b3edf446SDimitry Andric       EltsF32.push_back(ElF32->getOperand(1).getReg());
3979b3edf446SDimitry Andric     }
3980b3edf446SDimitry Andric 
3981b3edf446SDimitry Andric     // All elements had ModOpcode modifier
3982b3edf446SDimitry Andric     if (BV->getNumSources() == EltsF32.size()) {
3983b3edf446SDimitry Andric       selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, Root.getParent(),
3984b3edf446SDimitry Andric                            *MRI);
3985b3edf446SDimitry Andric     }
3986b3edf446SDimitry Andric   }
3987b3edf446SDimitry Andric 
3988b3edf446SDimitry Andric   return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
3989b3edf446SDimitry Andric            [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}};
3990b3edf446SDimitry Andric }
3991b3edf446SDimitry Andric 
3992b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns
3993b3edf446SDimitry Andric AMDGPUInstructionSelector::selectWMMAModsF16Neg(MachineOperand &Root) const {
3994b3edf446SDimitry Andric   Register Src = Root.getReg();
3995b3edf446SDimitry Andric   unsigned Mods = SISrcMods::OP_SEL_1;
3996b3edf446SDimitry Andric   SmallVector<Register, 8> EltsV2F16;
3997b3edf446SDimitry Andric 
3998b3edf446SDimitry Andric   if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef(Src))) {
3999b3edf446SDimitry Andric     for (unsigned i = 0; i < CV->getNumSources(); ++i) {
4000b3edf446SDimitry Andric       Register FNegSrc;
4001b3edf446SDimitry Andric       if (!mi_match(CV->getSourceReg(i), *MRI, m_GFNeg(m_Reg(FNegSrc))))
4002b3edf446SDimitry Andric         break;
4003b3edf446SDimitry Andric       EltsV2F16.push_back(FNegSrc);
4004b3edf446SDimitry Andric     }
4005b3edf446SDimitry Andric 
4006b3edf446SDimitry Andric     // All elements had ModOpcode modifier
4007b3edf446SDimitry Andric     if (CV->getNumSources() == EltsV2F16.size()) {
4008b3edf446SDimitry Andric       Mods |= SISrcMods::NEG;
4009b3edf446SDimitry Andric       Mods |= SISrcMods::NEG_HI;
4010b3edf446SDimitry Andric       Src = buildRegSequence(EltsV2F16, Root.getParent(), *MRI);
4011b3edf446SDimitry Andric     }
4012b3edf446SDimitry Andric   }
4013b3edf446SDimitry Andric 
4014b3edf446SDimitry Andric   return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
4015b3edf446SDimitry Andric            [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}};
4016b3edf446SDimitry Andric }
4017b3edf446SDimitry Andric 
4018b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns
4019b3edf446SDimitry Andric AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(MachineOperand &Root) const {
4020b3edf446SDimitry Andric   Register Src = Root.getReg();
4021b3edf446SDimitry Andric   unsigned Mods = SISrcMods::OP_SEL_1;
4022b3edf446SDimitry Andric   SmallVector<Register, 8> EltsV2F16;
4023b3edf446SDimitry Andric 
4024b3edf446SDimitry Andric   if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef(Src))) {
4025*0fca6ea1SDimitry Andric     assert(CV->getNumSources() > 0);
4026*0fca6ea1SDimitry Andric     MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
4027b3edf446SDimitry Andric     // Based on first element decide which mod we match, neg or abs
4028*0fca6ea1SDimitry Andric     unsigned ModOpcode = (ElV2F16->getOpcode() == AMDGPU::G_FNEG)
4029*0fca6ea1SDimitry Andric                              ? AMDGPU::G_FNEG
4030b3edf446SDimitry Andric                              : AMDGPU::G_FABS;
4031*0fca6ea1SDimitry Andric 
4032*0fca6ea1SDimitry Andric     for (unsigned i = 0; i < CV->getNumSources(); ++i) {
4033*0fca6ea1SDimitry Andric       ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
4034b3edf446SDimitry Andric       if (ElV2F16->getOpcode() != ModOpcode)
4035b3edf446SDimitry Andric         break;
4036b3edf446SDimitry Andric       EltsV2F16.push_back(ElV2F16->getOperand(1).getReg());
4037b3edf446SDimitry Andric     }
4038b3edf446SDimitry Andric 
4039b3edf446SDimitry Andric     // All elements had ModOpcode modifier
4040b3edf446SDimitry Andric     if (CV->getNumSources() == EltsV2F16.size()) {
4041b3edf446SDimitry Andric       MachineIRBuilder B(*Root.getParent());
4042b3edf446SDimitry Andric       selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, Root.getParent(),
4043b3edf446SDimitry Andric                            *MRI);
4044b3edf446SDimitry Andric     }
4045b3edf446SDimitry Andric   }
4046b3edf446SDimitry Andric 
4047b3edf446SDimitry Andric   return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
4048b3edf446SDimitry Andric            [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}};
4049b3edf446SDimitry Andric }
4050b3edf446SDimitry Andric 
4051b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns
4052b3edf446SDimitry Andric AMDGPUInstructionSelector::selectWMMAVISrc(MachineOperand &Root) const {
4053b3edf446SDimitry Andric   std::optional<FPValueAndVReg> FPValReg;
4054b3edf446SDimitry Andric   if (mi_match(Root.getReg(), *MRI, m_GFCstOrSplat(FPValReg))) {
4055*0fca6ea1SDimitry Andric     if (TII.isInlineConstant(FPValReg->Value)) {
4056b3edf446SDimitry Andric       return {{[=](MachineInstrBuilder &MIB) {
4057b3edf446SDimitry Andric         MIB.addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4058b3edf446SDimitry Andric       }}};
4059b3edf446SDimitry Andric     }
4060b3edf446SDimitry Andric     // Non-inlineable splat floats should not fall-through for integer immediate
4061b3edf446SDimitry Andric     // checks.
4062b3edf446SDimitry Andric     return {};
4063b3edf446SDimitry Andric   }
4064b3edf446SDimitry Andric 
4065b3edf446SDimitry Andric   APInt ICst;
4066b3edf446SDimitry Andric   if (mi_match(Root.getReg(), *MRI, m_ICstOrSplat(ICst))) {
4067b3edf446SDimitry Andric     if (TII.isInlineConstant(ICst)) {
4068b3edf446SDimitry Andric       return {
4069b3edf446SDimitry Andric           {[=](MachineInstrBuilder &MIB) { MIB.addImm(ICst.getSExtValue()); }}};
4070b3edf446SDimitry Andric     }
4071b3edf446SDimitry Andric   }
4072b3edf446SDimitry Andric 
4073b3edf446SDimitry Andric   return {};
4074b3edf446SDimitry Andric }
4075b3edf446SDimitry Andric 
4076b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns
4077b3edf446SDimitry Andric AMDGPUInstructionSelector::selectSWMMACIndex8(MachineOperand &Root) const {
4078b3edf446SDimitry Andric   Register Src =
4079b3edf446SDimitry Andric       getDefIgnoringCopies(Root.getReg(), *MRI)->getOperand(0).getReg();
4080b3edf446SDimitry Andric   unsigned Key = 0;
4081b3edf446SDimitry Andric 
4082b3edf446SDimitry Andric   Register ShiftSrc;
4083b3edf446SDimitry Andric   std::optional<ValueAndVReg> ShiftAmt;
4084b3edf446SDimitry Andric   if (mi_match(Src, *MRI, m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt))) &&
4085b3edf446SDimitry Andric       MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4086b3edf446SDimitry Andric       ShiftAmt->Value.getZExtValue() % 8 == 0) {
4087b3edf446SDimitry Andric     Key = ShiftAmt->Value.getZExtValue() / 8;
4088b3edf446SDimitry Andric     Src = ShiftSrc;
4089b3edf446SDimitry Andric   }
4090b3edf446SDimitry Andric 
4091b3edf446SDimitry Andric   return {{
4092b3edf446SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
4093b3edf446SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Key); } // index_key
4094b3edf446SDimitry Andric   }};
4095b3edf446SDimitry Andric }
4096b3edf446SDimitry Andric 
4097b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns
4098b3edf446SDimitry Andric AMDGPUInstructionSelector::selectSWMMACIndex16(MachineOperand &Root) const {
4099b3edf446SDimitry Andric 
4100b3edf446SDimitry Andric   Register Src =
4101b3edf446SDimitry Andric       getDefIgnoringCopies(Root.getReg(), *MRI)->getOperand(0).getReg();
4102b3edf446SDimitry Andric   unsigned Key = 0;
4103b3edf446SDimitry Andric 
4104b3edf446SDimitry Andric   Register ShiftSrc;
4105b3edf446SDimitry Andric   std::optional<ValueAndVReg> ShiftAmt;
4106b3edf446SDimitry Andric   if (mi_match(Src, *MRI, m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt))) &&
4107b3edf446SDimitry Andric       MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4108b3edf446SDimitry Andric       ShiftAmt->Value.getZExtValue() == 16) {
4109b3edf446SDimitry Andric     Src = ShiftSrc;
4110b3edf446SDimitry Andric     Key = 1;
4111b3edf446SDimitry Andric   }
4112b3edf446SDimitry Andric 
4113b3edf446SDimitry Andric   return {{
4114b3edf446SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
4115b3edf446SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Key); } // index_key
4116b3edf446SDimitry Andric   }};
4117b3edf446SDimitry Andric }
4118b3edf446SDimitry Andric 
411981ad6265SDimitry Andric InstructionSelector::ComplexRendererFns
4120bdd1243dSDimitry Andric AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const {
4121480093f4SDimitry Andric   Register Src;
4122480093f4SDimitry Andric   unsigned Mods;
41235ffd83dbSDimitry Andric   std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
4124480093f4SDimitry Andric 
4125bdd1243dSDimitry Andric   // FIXME: Handle op_sel
4126480093f4SDimitry Andric   return {{
4127480093f4SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
4128480093f4SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
4129480093f4SDimitry Andric   }};
4130480093f4SDimitry Andric }
4131480093f4SDimitry Andric 
4132480093f4SDimitry Andric InstructionSelector::ComplexRendererFns
413381ad6265SDimitry Andric AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {
413481ad6265SDimitry Andric   Register Src;
413581ad6265SDimitry Andric   unsigned Mods;
413681ad6265SDimitry Andric   std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
413706c3fb27SDimitry Andric                                            /*IsCanonicalizing=*/true,
413806c3fb27SDimitry Andric                                            /*AllowAbs=*/false,
413906c3fb27SDimitry Andric                                            /*OpSel=*/false);
414081ad6265SDimitry Andric 
414181ad6265SDimitry Andric   return {{
4142bdd1243dSDimitry Andric       [=](MachineInstrBuilder &MIB) {
4143bdd1243dSDimitry Andric         MIB.addReg(
4144bdd1243dSDimitry Andric             copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true));
4145bdd1243dSDimitry Andric       },
414681ad6265SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
414781ad6265SDimitry Andric   }};
414881ad6265SDimitry Andric }
414981ad6265SDimitry Andric 
415081ad6265SDimitry Andric InstructionSelector::ComplexRendererFns
415181ad6265SDimitry Andric AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {
415281ad6265SDimitry Andric   Register Src;
415381ad6265SDimitry Andric   unsigned Mods;
415481ad6265SDimitry Andric   std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
415506c3fb27SDimitry Andric                                            /*IsCanonicalizing=*/true,
415606c3fb27SDimitry Andric                                            /*AllowAbs=*/false,
415706c3fb27SDimitry Andric                                            /*OpSel=*/true);
415881ad6265SDimitry Andric 
415981ad6265SDimitry Andric   return {{
4160bdd1243dSDimitry Andric       [=](MachineInstrBuilder &MIB) {
4161bdd1243dSDimitry Andric         MIB.addReg(
4162bdd1243dSDimitry Andric             copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true));
4163bdd1243dSDimitry Andric       },
416481ad6265SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods
416581ad6265SDimitry Andric   }};
416681ad6265SDimitry Andric }
416781ad6265SDimitry Andric 
4168fcaf7f86SDimitry Andric bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root,
4169fcaf7f86SDimitry Andric                                                  Register &Base,
4170fcaf7f86SDimitry Andric                                                  Register *SOffset,
4171fcaf7f86SDimitry Andric                                                  int64_t *Offset) const {
4172fcaf7f86SDimitry Andric   MachineInstr *MI = Root.getParent();
4173fcaf7f86SDimitry Andric   MachineBasicBlock *MBB = MI->getParent();
4174fcaf7f86SDimitry Andric 
4175fcaf7f86SDimitry Andric   // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits,
4176fcaf7f86SDimitry Andric   // then we can select all ptr + 32-bit offsets.
4177fcaf7f86SDimitry Andric   SmallVector<GEPInfo, 4> AddrInfo;
4178fcaf7f86SDimitry Andric   getAddrModeInfo(*MI, *MRI, AddrInfo);
4179fcaf7f86SDimitry Andric 
4180fcaf7f86SDimitry Andric   if (AddrInfo.empty())
4181fcaf7f86SDimitry Andric     return false;
4182fcaf7f86SDimitry Andric 
4183fcaf7f86SDimitry Andric   const GEPInfo &GEPI = AddrInfo[0];
4184*0fca6ea1SDimitry Andric   std::optional<int64_t> EncodedImm;
4185fcaf7f86SDimitry Andric 
4186fcaf7f86SDimitry Andric   if (SOffset && Offset) {
4187*0fca6ea1SDimitry Andric     EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false,
4188*0fca6ea1SDimitry Andric                                               /*HasSOffset=*/true);
4189fcaf7f86SDimitry Andric     if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4190fcaf7f86SDimitry Andric         AddrInfo.size() > 1) {
4191fcaf7f86SDimitry Andric       const GEPInfo &GEPI2 = AddrInfo[1];
4192fcaf7f86SDimitry Andric       if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4193fcaf7f86SDimitry Andric         if (Register OffsetReg =
4194fcaf7f86SDimitry Andric                 matchZeroExtendFromS32(*MRI, GEPI2.SgprParts[1])) {
4195fcaf7f86SDimitry Andric           Base = GEPI2.SgprParts[0];
4196fcaf7f86SDimitry Andric           *SOffset = OffsetReg;
4197fcaf7f86SDimitry Andric           *Offset = *EncodedImm;
4198*0fca6ea1SDimitry Andric           if (*Offset >= 0 || !AMDGPU::hasSMRDSignedImmOffset(STI))
4199*0fca6ea1SDimitry Andric             return true;
4200*0fca6ea1SDimitry Andric 
4201*0fca6ea1SDimitry Andric           // For unbuffered smem loads, it is illegal for the Immediate Offset
4202*0fca6ea1SDimitry Andric           // to be negative if the resulting (Offset + (M0 or SOffset or zero)
4203*0fca6ea1SDimitry Andric           // is negative. Handle the case where the Immediate Offset + SOffset
4204*0fca6ea1SDimitry Andric           // is negative.
4205*0fca6ea1SDimitry Andric           auto SKnown = KB->getKnownBits(*SOffset);
4206*0fca6ea1SDimitry Andric           if (*Offset + SKnown.getMinValue().getSExtValue() < 0)
4207*0fca6ea1SDimitry Andric             return false;
4208*0fca6ea1SDimitry Andric 
4209fcaf7f86SDimitry Andric           return true;
4210fcaf7f86SDimitry Andric         }
4211fcaf7f86SDimitry Andric       }
4212fcaf7f86SDimitry Andric     }
4213fcaf7f86SDimitry Andric     return false;
4214fcaf7f86SDimitry Andric   }
4215fcaf7f86SDimitry Andric 
4216*0fca6ea1SDimitry Andric   EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false,
4217*0fca6ea1SDimitry Andric                                             /*HasSOffset=*/false);
4218fcaf7f86SDimitry Andric   if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4219fcaf7f86SDimitry Andric     Base = GEPI.SgprParts[0];
4220fcaf7f86SDimitry Andric     *Offset = *EncodedImm;
4221fcaf7f86SDimitry Andric     return true;
4222fcaf7f86SDimitry Andric   }
4223fcaf7f86SDimitry Andric 
4224fcaf7f86SDimitry Andric   // SGPR offset is unsigned.
4225fcaf7f86SDimitry Andric   if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4226fcaf7f86SDimitry Andric       GEPI.Imm != 0) {
4227fcaf7f86SDimitry Andric     // If we make it this far we have a load with an 32-bit immediate offset.
4228fcaf7f86SDimitry Andric     // It is OK to select this using a sgpr offset, because we have already
4229fcaf7f86SDimitry Andric     // failed trying to select this load into one of the _IMM variants since
4230fcaf7f86SDimitry Andric     // the _IMM Patterns are considered before the _SGPR patterns.
4231fcaf7f86SDimitry Andric     Base = GEPI.SgprParts[0];
4232fcaf7f86SDimitry Andric     *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4233fcaf7f86SDimitry Andric     BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4234fcaf7f86SDimitry Andric         .addImm(GEPI.Imm);
4235fcaf7f86SDimitry Andric     return true;
4236fcaf7f86SDimitry Andric   }
4237fcaf7f86SDimitry Andric 
4238fcaf7f86SDimitry Andric   if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4239fcaf7f86SDimitry Andric     if (Register OffsetReg = matchZeroExtendFromS32(*MRI, GEPI.SgprParts[1])) {
4240fcaf7f86SDimitry Andric       Base = GEPI.SgprParts[0];
4241fcaf7f86SDimitry Andric       *SOffset = OffsetReg;
4242fcaf7f86SDimitry Andric       return true;
4243fcaf7f86SDimitry Andric     }
4244fcaf7f86SDimitry Andric   }
4245fcaf7f86SDimitry Andric 
4246fcaf7f86SDimitry Andric   return false;
4247fcaf7f86SDimitry Andric }
4248fcaf7f86SDimitry Andric 
424981ad6265SDimitry Andric InstructionSelector::ComplexRendererFns
42508bcb0991SDimitry Andric AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const {
4251fcaf7f86SDimitry Andric   Register Base;
4252fcaf7f86SDimitry Andric   int64_t Offset;
4253fcaf7f86SDimitry Andric   if (!selectSmrdOffset(Root, Base, /* SOffset= */ nullptr, &Offset))
4254bdd1243dSDimitry Andric     return std::nullopt;
42550b57cec5SDimitry Andric 
4256fcaf7f86SDimitry Andric   return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
4257fcaf7f86SDimitry Andric            [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}};
42580b57cec5SDimitry Andric }
42590b57cec5SDimitry Andric 
42600b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns
42610b57cec5SDimitry Andric AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const {
42620b57cec5SDimitry Andric   SmallVector<GEPInfo, 4> AddrInfo;
42638bcb0991SDimitry Andric   getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo);
42640b57cec5SDimitry Andric 
42650b57cec5SDimitry Andric   if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1)
4266bdd1243dSDimitry Andric     return std::nullopt;
42670b57cec5SDimitry Andric 
42680b57cec5SDimitry Andric   const GEPInfo &GEPInfo = AddrInfo[0];
42695ffd83dbSDimitry Andric   Register PtrReg = GEPInfo.SgprParts[0];
4270bdd1243dSDimitry Andric   std::optional<int64_t> EncodedImm =
42715ffd83dbSDimitry Andric       AMDGPU::getSMRDEncodedLiteralOffset32(STI, GEPInfo.Imm);
42725ffd83dbSDimitry Andric   if (!EncodedImm)
4273bdd1243dSDimitry Andric     return std::nullopt;
42740b57cec5SDimitry Andric 
42750b57cec5SDimitry Andric   return {{
42760b57cec5SDimitry Andric     [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); },
42775ffd83dbSDimitry Andric     [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }
42780b57cec5SDimitry Andric   }};
42790b57cec5SDimitry Andric }
42800b57cec5SDimitry Andric 
42810b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns
42820b57cec5SDimitry Andric AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const {
4283fcaf7f86SDimitry Andric   Register Base, SOffset;
4284fcaf7f86SDimitry Andric   if (!selectSmrdOffset(Root, Base, &SOffset, /* Offset= */ nullptr))
4285bdd1243dSDimitry Andric     return std::nullopt;
42860b57cec5SDimitry Andric 
4287fcaf7f86SDimitry Andric   return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
4288fcaf7f86SDimitry Andric            [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}};
4289753f127fSDimitry Andric }
4290753f127fSDimitry Andric 
4291fcaf7f86SDimitry Andric InstructionSelector::ComplexRendererFns
4292fcaf7f86SDimitry Andric AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const {
4293fcaf7f86SDimitry Andric   Register Base, SOffset;
4294fcaf7f86SDimitry Andric   int64_t Offset;
4295fcaf7f86SDimitry Andric   if (!selectSmrdOffset(Root, Base, &SOffset, &Offset))
4296bdd1243dSDimitry Andric     return std::nullopt;
4297fcaf7f86SDimitry Andric 
4298fcaf7f86SDimitry Andric   return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); },
4299fcaf7f86SDimitry Andric            [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); },
4300fcaf7f86SDimitry Andric            [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}};
43010b57cec5SDimitry Andric }
43020b57cec5SDimitry Andric 
4303e8d8bef9SDimitry Andric std::pair<Register, int>
4304fe6060f1SDimitry Andric AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,
4305fe6060f1SDimitry Andric                                                 uint64_t FlatVariant) const {
43060b57cec5SDimitry Andric   MachineInstr *MI = Root.getParent();
43070b57cec5SDimitry Andric 
4308bdd1243dSDimitry Andric   auto Default = std::pair(Root.getReg(), 0);
43090b57cec5SDimitry Andric 
43100b57cec5SDimitry Andric   if (!STI.hasFlatInstOffsets())
43110b57cec5SDimitry Andric     return Default;
43120b57cec5SDimitry Andric 
4313e8d8bef9SDimitry Andric   Register PtrBase;
4314e8d8bef9SDimitry Andric   int64_t ConstOffset;
4315e8d8bef9SDimitry Andric   std::tie(PtrBase, ConstOffset) =
4316e8d8bef9SDimitry Andric       getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
43175f757f3fSDimitry Andric 
43185f757f3fSDimitry Andric   if (ConstOffset == 0 || (FlatVariant == SIInstrFlags::FlatScratch &&
43195f757f3fSDimitry Andric                            !isFlatScratchBaseLegal(Root.getReg())))
43200b57cec5SDimitry Andric     return Default;
43210b57cec5SDimitry Andric 
43220b57cec5SDimitry Andric   unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
4323fe6060f1SDimitry Andric   if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
43240b57cec5SDimitry Andric     return Default;
43250b57cec5SDimitry Andric 
4326bdd1243dSDimitry Andric   return std::pair(PtrBase, ConstOffset);
43270b57cec5SDimitry Andric }
43280b57cec5SDimitry Andric 
43290b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns
43300b57cec5SDimitry Andric AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const {
4331fe6060f1SDimitry Andric   auto PtrWithOffset = selectFlatOffsetImpl(Root, SIInstrFlags::FLAT);
4332e8d8bef9SDimitry Andric 
4333e8d8bef9SDimitry Andric   return {{
4334e8d8bef9SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); },
4335e8d8bef9SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); },
4336e8d8bef9SDimitry Andric     }};
43370b57cec5SDimitry Andric }
43380b57cec5SDimitry Andric 
43390b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns
4340fe6060f1SDimitry Andric AMDGPUInstructionSelector::selectGlobalOffset(MachineOperand &Root) const {
4341fe6060f1SDimitry Andric   auto PtrWithOffset = selectFlatOffsetImpl(Root, SIInstrFlags::FlatGlobal);
4342fe6060f1SDimitry Andric 
4343fe6060f1SDimitry Andric   return {{
4344fe6060f1SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); },
4345fe6060f1SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); },
4346fe6060f1SDimitry Andric   }};
4347fe6060f1SDimitry Andric }
4348fe6060f1SDimitry Andric 
4349fe6060f1SDimitry Andric InstructionSelector::ComplexRendererFns
4350fe6060f1SDimitry Andric AMDGPUInstructionSelector::selectScratchOffset(MachineOperand &Root) const {
4351fe6060f1SDimitry Andric   auto PtrWithOffset = selectFlatOffsetImpl(Root, SIInstrFlags::FlatScratch);
4352e8d8bef9SDimitry Andric 
4353e8d8bef9SDimitry Andric   return {{
4354e8d8bef9SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); },
4355e8d8bef9SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); },
4356e8d8bef9SDimitry Andric     }};
4357e8d8bef9SDimitry Andric }
4358e8d8bef9SDimitry Andric 
4359e8d8bef9SDimitry Andric // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
4360e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns
4361e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const {
4362e8d8bef9SDimitry Andric   Register Addr = Root.getReg();
4363e8d8bef9SDimitry Andric   Register PtrBase;
4364e8d8bef9SDimitry Andric   int64_t ConstOffset;
4365e8d8bef9SDimitry Andric   int64_t ImmOffset = 0;
4366e8d8bef9SDimitry Andric 
4367e8d8bef9SDimitry Andric   // Match the immediate offset first, which canonically is moved as low as
4368e8d8bef9SDimitry Andric   // possible.
4369e8d8bef9SDimitry Andric   std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
4370e8d8bef9SDimitry Andric 
4371e8d8bef9SDimitry Andric   if (ConstOffset != 0) {
4372fe6060f1SDimitry Andric     if (TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS,
4373fe6060f1SDimitry Andric                               SIInstrFlags::FlatGlobal)) {
4374e8d8bef9SDimitry Andric       Addr = PtrBase;
4375e8d8bef9SDimitry Andric       ImmOffset = ConstOffset;
4376fe6060f1SDimitry Andric     } else {
4377e8d8bef9SDimitry Andric       auto PtrBaseDef = getDefSrcRegIgnoringCopies(PtrBase, *MRI);
4378e8d8bef9SDimitry Andric       if (isSGPR(PtrBaseDef->Reg)) {
4379fe6060f1SDimitry Andric         if (ConstOffset > 0) {
4380e8d8bef9SDimitry Andric           // Offset is too large.
4381e8d8bef9SDimitry Andric           //
4382fe6060f1SDimitry Andric           // saddr + large_offset -> saddr +
4383fe6060f1SDimitry Andric           //                         (voffset = large_offset & ~MaxOffset) +
4384fe6060f1SDimitry Andric           //                         (large_offset & MaxOffset);
4385e8d8bef9SDimitry Andric           int64_t SplitImmOffset, RemainderOffset;
4386fe6060f1SDimitry Andric           std::tie(SplitImmOffset, RemainderOffset) = TII.splitFlatOffset(
4387fe6060f1SDimitry Andric               ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
4388e8d8bef9SDimitry Andric 
4389e8d8bef9SDimitry Andric           if (isUInt<32>(RemainderOffset)) {
4390e8d8bef9SDimitry Andric             MachineInstr *MI = Root.getParent();
4391e8d8bef9SDimitry Andric             MachineBasicBlock *MBB = MI->getParent();
4392fe6060f1SDimitry Andric             Register HighBits =
4393fe6060f1SDimitry Andric                 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4394e8d8bef9SDimitry Andric 
4395e8d8bef9SDimitry Andric             BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4396e8d8bef9SDimitry Andric                     HighBits)
4397e8d8bef9SDimitry Andric                 .addImm(RemainderOffset);
4398e8d8bef9SDimitry Andric 
4399e8d8bef9SDimitry Andric             return {{
4400e8d8bef9SDimitry Andric                 [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrBase); }, // saddr
4401fe6060f1SDimitry Andric                 [=](MachineInstrBuilder &MIB) {
4402fe6060f1SDimitry Andric                   MIB.addReg(HighBits);
4403fe6060f1SDimitry Andric                 }, // voffset
4404e8d8bef9SDimitry Andric                 [=](MachineInstrBuilder &MIB) { MIB.addImm(SplitImmOffset); },
4405e8d8bef9SDimitry Andric             }};
4406e8d8bef9SDimitry Andric           }
4407e8d8bef9SDimitry Andric         }
4408fe6060f1SDimitry Andric 
4409fe6060f1SDimitry Andric         // We are adding a 64 bit SGPR and a constant. If constant bus limit
4410fe6060f1SDimitry Andric         // is 1 we would need to perform 1 or 2 extra moves for each half of
4411fe6060f1SDimitry Andric         // the constant and it is better to do a scalar add and then issue a
4412fe6060f1SDimitry Andric         // single VALU instruction to materialize zero. Otherwise it is less
4413fe6060f1SDimitry Andric         // instructions to perform VALU adds with immediates or inline literals.
4414fe6060f1SDimitry Andric         unsigned NumLiterals =
4415fe6060f1SDimitry Andric             !TII.isInlineConstant(APInt(32, ConstOffset & 0xffffffff)) +
4416fe6060f1SDimitry Andric             !TII.isInlineConstant(APInt(32, ConstOffset >> 32));
4417fe6060f1SDimitry Andric         if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
4418bdd1243dSDimitry Andric           return std::nullopt;
4419fe6060f1SDimitry Andric       }
4420e8d8bef9SDimitry Andric     }
4421e8d8bef9SDimitry Andric   }
4422e8d8bef9SDimitry Andric 
4423e8d8bef9SDimitry Andric   // Match the variable offset.
442481ad6265SDimitry Andric   auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
4425fe6060f1SDimitry Andric   if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4426e8d8bef9SDimitry Andric     // Look through the SGPR->VGPR copy.
4427e8d8bef9SDimitry Andric     Register SAddr =
4428e8d8bef9SDimitry Andric         getSrcRegIgnoringCopies(AddrDef->MI->getOperand(1).getReg(), *MRI);
4429e8d8bef9SDimitry Andric 
4430bdd1243dSDimitry Andric     if (isSGPR(SAddr)) {
4431e8d8bef9SDimitry Andric       Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4432e8d8bef9SDimitry Andric 
4433e8d8bef9SDimitry Andric       // It's possible voffset is an SGPR here, but the copy to VGPR will be
4434e8d8bef9SDimitry Andric       // inserted later.
4435fe6060f1SDimitry Andric       if (Register VOffset = matchZeroExtendFromS32(*MRI, PtrBaseOffset)) {
4436e8d8bef9SDimitry Andric         return {{[=](MachineInstrBuilder &MIB) { // saddr
4437e8d8bef9SDimitry Andric                    MIB.addReg(SAddr);
4438e8d8bef9SDimitry Andric                  },
4439e8d8bef9SDimitry Andric                  [=](MachineInstrBuilder &MIB) { // voffset
4440e8d8bef9SDimitry Andric                    MIB.addReg(VOffset);
4441e8d8bef9SDimitry Andric                  },
4442e8d8bef9SDimitry Andric                  [=](MachineInstrBuilder &MIB) { // offset
4443e8d8bef9SDimitry Andric                    MIB.addImm(ImmOffset);
4444e8d8bef9SDimitry Andric                  }}};
4445e8d8bef9SDimitry Andric       }
4446fe6060f1SDimitry Andric     }
4447fe6060f1SDimitry Andric   }
4448fe6060f1SDimitry Andric 
4449fe6060f1SDimitry Andric   // FIXME: We should probably have folded COPY (G_IMPLICIT_DEF) earlier, and
4450fe6060f1SDimitry Andric   // drop this.
4451fe6060f1SDimitry Andric   if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4452fe6060f1SDimitry Andric       AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4453bdd1243dSDimitry Andric     return std::nullopt;
4454fe6060f1SDimitry Andric 
4455fe6060f1SDimitry Andric   // It's cheaper to materialize a single 32-bit zero for vaddr than the two
4456fe6060f1SDimitry Andric   // moves required to copy a 64-bit SGPR to VGPR.
4457fe6060f1SDimitry Andric   MachineInstr *MI = Root.getParent();
4458fe6060f1SDimitry Andric   MachineBasicBlock *MBB = MI->getParent();
4459fe6060f1SDimitry Andric   Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4460fe6060f1SDimitry Andric 
4461fe6060f1SDimitry Andric   BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4462fe6060f1SDimitry Andric       .addImm(0);
4463fe6060f1SDimitry Andric 
4464fe6060f1SDimitry Andric   return {{
4465fe6060f1SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(AddrDef->Reg); }, // saddr
4466fe6060f1SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(VOffset); },      // voffset
4467fe6060f1SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); }     // offset
4468fe6060f1SDimitry Andric   }};
4469fe6060f1SDimitry Andric }
4470e8d8bef9SDimitry Andric 
4471e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns
4472e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
4473e8d8bef9SDimitry Andric   Register Addr = Root.getReg();
4474e8d8bef9SDimitry Andric   Register PtrBase;
4475e8d8bef9SDimitry Andric   int64_t ConstOffset;
4476e8d8bef9SDimitry Andric   int64_t ImmOffset = 0;
4477e8d8bef9SDimitry Andric 
4478e8d8bef9SDimitry Andric   // Match the immediate offset first, which canonically is moved as low as
4479e8d8bef9SDimitry Andric   // possible.
4480e8d8bef9SDimitry Andric   std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
4481e8d8bef9SDimitry Andric 
44825f757f3fSDimitry Andric   if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
4483fe6060f1SDimitry Andric       TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS,
4484fe6060f1SDimitry Andric                             SIInstrFlags::FlatScratch)) {
4485e8d8bef9SDimitry Andric     Addr = PtrBase;
4486e8d8bef9SDimitry Andric     ImmOffset = ConstOffset;
4487e8d8bef9SDimitry Andric   }
4488e8d8bef9SDimitry Andric 
4489e8d8bef9SDimitry Andric   auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
4490e8d8bef9SDimitry Andric   if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4491e8d8bef9SDimitry Andric     int FI = AddrDef->MI->getOperand(1).getIndex();
4492e8d8bef9SDimitry Andric     return {{
4493e8d8bef9SDimitry Andric         [=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr
4494e8d8bef9SDimitry Andric         [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
4495e8d8bef9SDimitry Andric     }};
4496e8d8bef9SDimitry Andric   }
4497e8d8bef9SDimitry Andric 
4498e8d8bef9SDimitry Andric   Register SAddr = AddrDef->Reg;
4499e8d8bef9SDimitry Andric 
4500e8d8bef9SDimitry Andric   if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4501e8d8bef9SDimitry Andric     Register LHS = AddrDef->MI->getOperand(1).getReg();
4502e8d8bef9SDimitry Andric     Register RHS = AddrDef->MI->getOperand(2).getReg();
4503e8d8bef9SDimitry Andric     auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI);
4504e8d8bef9SDimitry Andric     auto RHSDef = getDefSrcRegIgnoringCopies(RHS, *MRI);
4505e8d8bef9SDimitry Andric 
450681ad6265SDimitry Andric     if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4507e8d8bef9SDimitry Andric         isSGPR(RHSDef->Reg)) {
4508e8d8bef9SDimitry Andric       int FI = LHSDef->MI->getOperand(1).getIndex();
4509e8d8bef9SDimitry Andric       MachineInstr &I = *Root.getParent();
4510e8d8bef9SDimitry Andric       MachineBasicBlock *BB = I.getParent();
4511e8d8bef9SDimitry Andric       const DebugLoc &DL = I.getDebugLoc();
4512e8d8bef9SDimitry Andric       SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4513e8d8bef9SDimitry Andric 
4514fe6060f1SDimitry Andric       BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4515e8d8bef9SDimitry Andric           .addFrameIndex(FI)
45165f757f3fSDimitry Andric           .addReg(RHSDef->Reg)
45175f757f3fSDimitry Andric           .setOperandDead(3); // Dead scc
4518e8d8bef9SDimitry Andric     }
4519e8d8bef9SDimitry Andric   }
4520e8d8bef9SDimitry Andric 
4521e8d8bef9SDimitry Andric   if (!isSGPR(SAddr))
4522bdd1243dSDimitry Andric     return std::nullopt;
4523e8d8bef9SDimitry Andric 
4524e8d8bef9SDimitry Andric   return {{
4525e8d8bef9SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(SAddr); }, // saddr
4526e8d8bef9SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
4527e8d8bef9SDimitry Andric   }};
45280b57cec5SDimitry Andric }
45290b57cec5SDimitry Andric 
453081ad6265SDimitry Andric // Check whether the flat scratch SVS swizzle bug affects this access.
453181ad6265SDimitry Andric bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
453281ad6265SDimitry Andric     Register VAddr, Register SAddr, uint64_t ImmOffset) const {
453381ad6265SDimitry Andric   if (!Subtarget->hasFlatScratchSVSSwizzleBug())
453481ad6265SDimitry Andric     return false;
453581ad6265SDimitry Andric 
453681ad6265SDimitry Andric   // The bug affects the swizzling of SVS accesses if there is any carry out
453781ad6265SDimitry Andric   // from the two low order bits (i.e. from bit 1 into bit 2) when adding
453881ad6265SDimitry Andric   // voffset to (soffset + inst_offset).
453906c3fb27SDimitry Andric   auto VKnown = KB->getKnownBits(VAddr);
454081ad6265SDimitry Andric   auto SKnown = KnownBits::computeForAddSub(
4541*0fca6ea1SDimitry Andric       /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KB->getKnownBits(SAddr),
454281ad6265SDimitry Andric       KnownBits::makeConstant(APInt(32, ImmOffset)));
454381ad6265SDimitry Andric   uint64_t VMax = VKnown.getMaxValue().getZExtValue();
454481ad6265SDimitry Andric   uint64_t SMax = SKnown.getMaxValue().getZExtValue();
454581ad6265SDimitry Andric   return (VMax & 3) + (SMax & 3) >= 4;
454681ad6265SDimitry Andric }
454781ad6265SDimitry Andric 
454881ad6265SDimitry Andric InstructionSelector::ComplexRendererFns
454981ad6265SDimitry Andric AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
455081ad6265SDimitry Andric   Register Addr = Root.getReg();
455181ad6265SDimitry Andric   Register PtrBase;
455281ad6265SDimitry Andric   int64_t ConstOffset;
455381ad6265SDimitry Andric   int64_t ImmOffset = 0;
455481ad6265SDimitry Andric 
455581ad6265SDimitry Andric   // Match the immediate offset first, which canonically is moved as low as
455681ad6265SDimitry Andric   // possible.
455781ad6265SDimitry Andric   std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
455881ad6265SDimitry Andric 
45595f757f3fSDimitry Andric   Register OrigAddr = Addr;
456081ad6265SDimitry Andric   if (ConstOffset != 0 &&
456181ad6265SDimitry Andric       TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
456281ad6265SDimitry Andric     Addr = PtrBase;
456381ad6265SDimitry Andric     ImmOffset = ConstOffset;
456481ad6265SDimitry Andric   }
456581ad6265SDimitry Andric 
456681ad6265SDimitry Andric   auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI);
456781ad6265SDimitry Andric   if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4568bdd1243dSDimitry Andric     return std::nullopt;
456981ad6265SDimitry Andric 
457081ad6265SDimitry Andric   Register RHS = AddrDef->MI->getOperand(2).getReg();
457181ad6265SDimitry Andric   if (RBI.getRegBank(RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
4572bdd1243dSDimitry Andric     return std::nullopt;
457381ad6265SDimitry Andric 
457481ad6265SDimitry Andric   Register LHS = AddrDef->MI->getOperand(1).getReg();
457581ad6265SDimitry Andric   auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI);
457681ad6265SDimitry Andric 
45775f757f3fSDimitry Andric   if (OrigAddr != Addr) {
45785f757f3fSDimitry Andric     if (!isFlatScratchBaseLegalSVImm(OrigAddr))
457906c3fb27SDimitry Andric       return std::nullopt;
45805f757f3fSDimitry Andric   } else {
45815f757f3fSDimitry Andric     if (!isFlatScratchBaseLegalSV(OrigAddr))
45825f757f3fSDimitry Andric       return std::nullopt;
45835f757f3fSDimitry Andric   }
458406c3fb27SDimitry Andric 
458581ad6265SDimitry Andric   if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4586bdd1243dSDimitry Andric     return std::nullopt;
458781ad6265SDimitry Andric 
458881ad6265SDimitry Andric   if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
458981ad6265SDimitry Andric     int FI = LHSDef->MI->getOperand(1).getIndex();
459081ad6265SDimitry Andric     return {{
459181ad6265SDimitry Andric         [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
459281ad6265SDimitry Andric         [=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr
459381ad6265SDimitry Andric         [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
459481ad6265SDimitry Andric     }};
459581ad6265SDimitry Andric   }
459681ad6265SDimitry Andric 
459781ad6265SDimitry Andric   if (!isSGPR(LHS))
4598bdd1243dSDimitry Andric     return std::nullopt;
459981ad6265SDimitry Andric 
460081ad6265SDimitry Andric   return {{
460181ad6265SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr
460281ad6265SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr
460381ad6265SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset
460481ad6265SDimitry Andric   }};
460581ad6265SDimitry Andric }
460681ad6265SDimitry Andric 
46070b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns
46080b57cec5SDimitry Andric AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
46090b57cec5SDimitry Andric   MachineInstr *MI = Root.getParent();
46100b57cec5SDimitry Andric   MachineBasicBlock *MBB = MI->getParent();
46110b57cec5SDimitry Andric   MachineFunction *MF = MBB->getParent();
46120b57cec5SDimitry Andric   const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
46130b57cec5SDimitry Andric 
46140b57cec5SDimitry Andric   int64_t Offset = 0;
46155ffd83dbSDimitry Andric   if (mi_match(Root.getReg(), *MRI, m_ICst(Offset)) &&
46165ffd83dbSDimitry Andric       Offset != TM.getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS)) {
46178bcb0991SDimitry Andric     Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
46180b57cec5SDimitry Andric 
46190b57cec5SDimitry Andric     // TODO: Should this be inside the render function? The iterator seems to
46200b57cec5SDimitry Andric     // move.
46215f757f3fSDimitry Andric     const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget);
46220b57cec5SDimitry Andric     BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
46230b57cec5SDimitry Andric             HighBits)
462406c3fb27SDimitry Andric         .addImm(Offset & ~MaxOffset);
46250b57cec5SDimitry Andric 
46260b57cec5SDimitry Andric     return {{[=](MachineInstrBuilder &MIB) { // rsrc
46270b57cec5SDimitry Andric                MIB.addReg(Info->getScratchRSrcReg());
46280b57cec5SDimitry Andric              },
46290b57cec5SDimitry Andric              [=](MachineInstrBuilder &MIB) { // vaddr
46300b57cec5SDimitry Andric                MIB.addReg(HighBits);
46310b57cec5SDimitry Andric              },
46320b57cec5SDimitry Andric              [=](MachineInstrBuilder &MIB) { // soffset
4633e8d8bef9SDimitry Andric                // Use constant zero for soffset and rely on eliminateFrameIndex
4634e8d8bef9SDimitry Andric                // to choose the appropriate frame register if need be.
46355ffd83dbSDimitry Andric                MIB.addImm(0);
46360b57cec5SDimitry Andric              },
46370b57cec5SDimitry Andric              [=](MachineInstrBuilder &MIB) { // offset
463806c3fb27SDimitry Andric                MIB.addImm(Offset & MaxOffset);
46390b57cec5SDimitry Andric              }}};
46400b57cec5SDimitry Andric   }
46410b57cec5SDimitry Andric 
46425ffd83dbSDimitry Andric   assert(Offset == 0 || Offset == -1);
46430b57cec5SDimitry Andric 
46440b57cec5SDimitry Andric   // Try to fold a frame index directly into the MUBUF vaddr field, and any
46450b57cec5SDimitry Andric   // offsets.
4646bdd1243dSDimitry Andric   std::optional<int> FI;
46470b57cec5SDimitry Andric   Register VAddr = Root.getReg();
46488bcb0991SDimitry Andric   if (const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg())) {
4649fe6060f1SDimitry Andric     Register PtrBase;
4650fe6060f1SDimitry Andric     int64_t ConstOffset;
4651fe6060f1SDimitry Andric     std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
4652fe6060f1SDimitry Andric     if (ConstOffset != 0) {
46535f757f3fSDimitry Andric       if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
46540b57cec5SDimitry Andric           (!STI.privateMemoryResourceIsRangeChecked() ||
465506c3fb27SDimitry Andric            KB->signBitIsZero(PtrBase))) {
4656fe6060f1SDimitry Andric         const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
4657fe6060f1SDimitry Andric         if (PtrBaseDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
4658fe6060f1SDimitry Andric           FI = PtrBaseDef->getOperand(1).getIndex();
46590b57cec5SDimitry Andric         else
4660fe6060f1SDimitry Andric           VAddr = PtrBase;
4661fe6060f1SDimitry Andric         Offset = ConstOffset;
46620b57cec5SDimitry Andric       }
46630b57cec5SDimitry Andric     } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
46640b57cec5SDimitry Andric       FI = RootDef->getOperand(1).getIndex();
46650b57cec5SDimitry Andric     }
46660b57cec5SDimitry Andric   }
46670b57cec5SDimitry Andric 
46680b57cec5SDimitry Andric   return {{[=](MachineInstrBuilder &MIB) { // rsrc
46690b57cec5SDimitry Andric              MIB.addReg(Info->getScratchRSrcReg());
46700b57cec5SDimitry Andric            },
46710b57cec5SDimitry Andric            [=](MachineInstrBuilder &MIB) { // vaddr
467281ad6265SDimitry Andric              if (FI)
4673bdd1243dSDimitry Andric                MIB.addFrameIndex(*FI);
46740b57cec5SDimitry Andric              else
46750b57cec5SDimitry Andric                MIB.addReg(VAddr);
46760b57cec5SDimitry Andric            },
46770b57cec5SDimitry Andric            [=](MachineInstrBuilder &MIB) { // soffset
4678e8d8bef9SDimitry Andric              // Use constant zero for soffset and rely on eliminateFrameIndex
4679e8d8bef9SDimitry Andric              // to choose the appropriate frame register if need be.
46805ffd83dbSDimitry Andric              MIB.addImm(0);
46810b57cec5SDimitry Andric            },
46820b57cec5SDimitry Andric            [=](MachineInstrBuilder &MIB) { // offset
46830b57cec5SDimitry Andric              MIB.addImm(Offset);
46840b57cec5SDimitry Andric            }}};
46850b57cec5SDimitry Andric }
46860b57cec5SDimitry Andric 
46875ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::isDSOffsetLegal(Register Base,
4688e8d8bef9SDimitry Andric                                                 int64_t Offset) const {
4689e8d8bef9SDimitry Andric   if (!isUInt<16>(Offset))
4690e8d8bef9SDimitry Andric     return false;
4691e8d8bef9SDimitry Andric 
4692e8d8bef9SDimitry Andric   if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
4693e8d8bef9SDimitry Andric     return true;
4694e8d8bef9SDimitry Andric 
4695e8d8bef9SDimitry Andric   // On Southern Islands instruction with a negative base value and an offset
4696e8d8bef9SDimitry Andric   // don't seem to work.
469706c3fb27SDimitry Andric   return KB->signBitIsZero(Base);
4698e8d8bef9SDimitry Andric }
4699e8d8bef9SDimitry Andric 
4700e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
4701e8d8bef9SDimitry Andric                                                  int64_t Offset1,
4702e8d8bef9SDimitry Andric                                                  unsigned Size) const {
4703e8d8bef9SDimitry Andric   if (Offset0 % Size != 0 || Offset1 % Size != 0)
4704e8d8bef9SDimitry Andric     return false;
4705e8d8bef9SDimitry Andric   if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
47068bcb0991SDimitry Andric     return false;
47078bcb0991SDimitry Andric 
47088bcb0991SDimitry Andric   if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
47098bcb0991SDimitry Andric     return true;
47108bcb0991SDimitry Andric 
47118bcb0991SDimitry Andric   // On Southern Islands instruction with a negative base value and an offset
47128bcb0991SDimitry Andric   // don't seem to work.
471306c3fb27SDimitry Andric   return KB->signBitIsZero(Base);
471406c3fb27SDimitry Andric }
471506c3fb27SDimitry Andric 
47165f757f3fSDimitry Andric // Return whether the operation has NoUnsignedWrap property.
47175f757f3fSDimitry Andric static bool isNoUnsignedWrap(MachineInstr *Addr) {
47185f757f3fSDimitry Andric   return Addr->getOpcode() == TargetOpcode::G_OR ||
47195f757f3fSDimitry Andric          (Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
47205f757f3fSDimitry Andric           Addr->getFlag(MachineInstr::NoUWrap));
47215f757f3fSDimitry Andric }
47225f757f3fSDimitry Andric 
47235f757f3fSDimitry Andric // Check that the base address of flat scratch load/store in the form of `base +
47245f757f3fSDimitry Andric // offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
47255f757f3fSDimitry Andric // requirement). We always treat the first operand as the base address here.
47265f757f3fSDimitry Andric bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const {
47275f757f3fSDimitry Andric   MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);
47285f757f3fSDimitry Andric 
47295f757f3fSDimitry Andric   if (isNoUnsignedWrap(AddrMI))
473006c3fb27SDimitry Andric     return true;
473106c3fb27SDimitry Andric 
47325f757f3fSDimitry Andric   // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
47335f757f3fSDimitry Andric   // values.
47347a6dacacSDimitry Andric   if (STI.hasSignedScratchOffsets())
47355f757f3fSDimitry Andric     return true;
47365f757f3fSDimitry Andric 
47375f757f3fSDimitry Andric   Register LHS = AddrMI->getOperand(1).getReg();
47385f757f3fSDimitry Andric   Register RHS = AddrMI->getOperand(2).getReg();
47395f757f3fSDimitry Andric 
47405f757f3fSDimitry Andric   if (AddrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
47415f757f3fSDimitry Andric     std::optional<ValueAndVReg> RhsValReg =
47425f757f3fSDimitry Andric         getIConstantVRegValWithLookThrough(RHS, *MRI);
47435f757f3fSDimitry Andric     // If the immediate offset is negative and within certain range, the base
47445f757f3fSDimitry Andric     // address cannot also be negative. If the base is also negative, the sum
47455f757f3fSDimitry Andric     // would be either negative or much larger than the valid range of scratch
47465f757f3fSDimitry Andric     // memory a thread can access.
47475f757f3fSDimitry Andric     if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
47485f757f3fSDimitry Andric         RhsValReg->Value.getSExtValue() > -0x40000000)
47495f757f3fSDimitry Andric       return true;
47505f757f3fSDimitry Andric   }
47515f757f3fSDimitry Andric 
47525f757f3fSDimitry Andric   return KB->signBitIsZero(LHS);
47535f757f3fSDimitry Andric }
47545f757f3fSDimitry Andric 
47555f757f3fSDimitry Andric // Check address value in SGPR/VGPR are legal for flat scratch in the form
47565f757f3fSDimitry Andric // of: SGPR + VGPR.
47575f757f3fSDimitry Andric bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(Register Addr) const {
47585f757f3fSDimitry Andric   MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);
47595f757f3fSDimitry Andric 
47605f757f3fSDimitry Andric   if (isNoUnsignedWrap(AddrMI))
47615f757f3fSDimitry Andric     return true;
47625f757f3fSDimitry Andric 
47637a6dacacSDimitry Andric   // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
47647a6dacacSDimitry Andric   // values.
47657a6dacacSDimitry Andric   if (STI.hasSignedScratchOffsets())
47667a6dacacSDimitry Andric     return true;
47677a6dacacSDimitry Andric 
47685f757f3fSDimitry Andric   Register LHS = AddrMI->getOperand(1).getReg();
47695f757f3fSDimitry Andric   Register RHS = AddrMI->getOperand(2).getReg();
47705f757f3fSDimitry Andric   return KB->signBitIsZero(RHS) && KB->signBitIsZero(LHS);
47715f757f3fSDimitry Andric }
47725f757f3fSDimitry Andric 
47735f757f3fSDimitry Andric // Check address value in SGPR/VGPR are legal for flat scratch in the form
47745f757f3fSDimitry Andric // of: SGPR + VGPR + Imm.
47755f757f3fSDimitry Andric bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
47765f757f3fSDimitry Andric     Register Addr) const {
47777a6dacacSDimitry Andric   // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative
47787a6dacacSDimitry Andric   // values.
47797a6dacacSDimitry Andric   if (STI.hasSignedScratchOffsets())
47807a6dacacSDimitry Andric     return true;
47817a6dacacSDimitry Andric 
47825f757f3fSDimitry Andric   MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);
47835f757f3fSDimitry Andric   Register Base = AddrMI->getOperand(1).getReg();
47845f757f3fSDimitry Andric   std::optional<DefinitionAndSourceRegister> BaseDef =
47855f757f3fSDimitry Andric       getDefSrcRegIgnoringCopies(Base, *MRI);
47865f757f3fSDimitry Andric   std::optional<ValueAndVReg> RHSOffset =
47875f757f3fSDimitry Andric       getIConstantVRegValWithLookThrough(AddrMI->getOperand(2).getReg(), *MRI);
47885f757f3fSDimitry Andric   assert(RHSOffset);
47895f757f3fSDimitry Andric 
47905f757f3fSDimitry Andric   // If the immediate offset is negative and within certain range, the base
47915f757f3fSDimitry Andric   // address cannot also be negative. If the base is also negative, the sum
47925f757f3fSDimitry Andric   // would be either negative or much larger than the valid range of scratch
47935f757f3fSDimitry Andric   // memory a thread can access.
47945f757f3fSDimitry Andric   if (isNoUnsignedWrap(BaseDef->MI) &&
47955f757f3fSDimitry Andric       (isNoUnsignedWrap(AddrMI) ||
47965f757f3fSDimitry Andric        (RHSOffset->Value.getSExtValue() < 0 &&
47975f757f3fSDimitry Andric         RHSOffset->Value.getSExtValue() > -0x40000000)))
47985f757f3fSDimitry Andric     return true;
47995f757f3fSDimitry Andric 
48005f757f3fSDimitry Andric   Register LHS = BaseDef->MI->getOperand(1).getReg();
48015f757f3fSDimitry Andric   Register RHS = BaseDef->MI->getOperand(2).getReg();
48025f757f3fSDimitry Andric   return KB->signBitIsZero(RHS) && KB->signBitIsZero(LHS);
48038bcb0991SDimitry Andric }
48048bcb0991SDimitry Andric 
48054824e7fdSDimitry Andric bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
48064824e7fdSDimitry Andric                                                     unsigned ShAmtBits) const {
48074824e7fdSDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_AND);
48084824e7fdSDimitry Andric 
4809bdd1243dSDimitry Andric   std::optional<APInt> RHS =
4810bdd1243dSDimitry Andric       getIConstantVRegVal(MI.getOperand(2).getReg(), *MRI);
48114824e7fdSDimitry Andric   if (!RHS)
48124824e7fdSDimitry Andric     return false;
48134824e7fdSDimitry Andric 
481406c3fb27SDimitry Andric   if (RHS->countr_one() >= ShAmtBits)
48154824e7fdSDimitry Andric     return true;
48164824e7fdSDimitry Andric 
481706c3fb27SDimitry Andric   const APInt &LHSKnownZeros = KB->getKnownZeroes(MI.getOperand(1).getReg());
481806c3fb27SDimitry Andric   return (LHSKnownZeros | *RHS).countr_one() >= ShAmtBits;
48194824e7fdSDimitry Andric }
48204824e7fdSDimitry Andric 
48210b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns
48220b57cec5SDimitry Andric AMDGPUInstructionSelector::selectMUBUFScratchOffset(
48230b57cec5SDimitry Andric     MachineOperand &Root) const {
482404eeddc0SDimitry Andric   Register Reg = Root.getReg();
482504eeddc0SDimitry Andric   const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
482604eeddc0SDimitry Andric 
48275f757f3fSDimitry Andric   std::optional<DefinitionAndSourceRegister> Def =
48285f757f3fSDimitry Andric     getDefSrcRegIgnoringCopies(Reg, *MRI);
48295f757f3fSDimitry Andric   assert(Def && "this shouldn't be an optional result");
48305f757f3fSDimitry Andric   Reg = Def->Reg;
48315f757f3fSDimitry Andric 
48325f757f3fSDimitry Andric   if (Register WaveBase = getWaveAddress(Def->MI)) {
483304eeddc0SDimitry Andric     return {{
483404eeddc0SDimitry Andric         [=](MachineInstrBuilder &MIB) { // rsrc
483504eeddc0SDimitry Andric           MIB.addReg(Info->getScratchRSrcReg());
483604eeddc0SDimitry Andric         },
483704eeddc0SDimitry Andric         [=](MachineInstrBuilder &MIB) { // soffset
483804eeddc0SDimitry Andric           MIB.addReg(WaveBase);
483904eeddc0SDimitry Andric         },
484004eeddc0SDimitry Andric         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // offset
484104eeddc0SDimitry Andric     }};
484204eeddc0SDimitry Andric   }
48430b57cec5SDimitry Andric 
48440b57cec5SDimitry Andric   int64_t Offset = 0;
484504eeddc0SDimitry Andric 
484604eeddc0SDimitry Andric   // FIXME: Copy check is a hack
484704eeddc0SDimitry Andric   Register BasePtr;
48485f757f3fSDimitry Andric   if (mi_match(Reg, *MRI,
48495f757f3fSDimitry Andric                m_GPtrAdd(m_Reg(BasePtr),
48505f757f3fSDimitry Andric                          m_any_of(m_ICst(Offset), m_Copy(m_ICst(Offset)))))) {
48515f757f3fSDimitry Andric     if (!TII.isLegalMUBUFImmOffset(Offset))
485204eeddc0SDimitry Andric       return {};
48535f757f3fSDimitry Andric     MachineInstr *BasePtrDef = getDefIgnoringCopies(BasePtr, *MRI);
485404eeddc0SDimitry Andric     Register WaveBase = getWaveAddress(BasePtrDef);
485504eeddc0SDimitry Andric     if (!WaveBase)
485604eeddc0SDimitry Andric       return {};
485704eeddc0SDimitry Andric 
485804eeddc0SDimitry Andric     return {{
485904eeddc0SDimitry Andric         [=](MachineInstrBuilder &MIB) { // rsrc
486004eeddc0SDimitry Andric           MIB.addReg(Info->getScratchRSrcReg());
486104eeddc0SDimitry Andric         },
486204eeddc0SDimitry Andric         [=](MachineInstrBuilder &MIB) { // soffset
486304eeddc0SDimitry Andric           MIB.addReg(WaveBase);
486404eeddc0SDimitry Andric         },
486504eeddc0SDimitry Andric         [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
486604eeddc0SDimitry Andric     }};
486704eeddc0SDimitry Andric   }
486804eeddc0SDimitry Andric 
48698bcb0991SDimitry Andric   if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) ||
48705f757f3fSDimitry Andric       !TII.isLegalMUBUFImmOffset(Offset))
48710b57cec5SDimitry Andric     return {};
48720b57cec5SDimitry Andric 
48730b57cec5SDimitry Andric   return {{
48745ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { // rsrc
48750b57cec5SDimitry Andric         MIB.addReg(Info->getScratchRSrcReg());
48765ffd83dbSDimitry Andric       },
48775ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { // soffset
48785ffd83dbSDimitry Andric         MIB.addImm(0);
48795ffd83dbSDimitry Andric       },
48800b57cec5SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
48810b57cec5SDimitry Andric   }};
48820b57cec5SDimitry Andric }
48838bcb0991SDimitry Andric 
48845ffd83dbSDimitry Andric std::pair<Register, unsigned>
48855ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const {
48868bcb0991SDimitry Andric   const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg());
48875ffd83dbSDimitry Andric   if (!RootDef)
4888bdd1243dSDimitry Andric     return std::pair(Root.getReg(), 0);
48898bcb0991SDimitry Andric 
48908bcb0991SDimitry Andric   int64_t ConstAddr = 0;
48915ffd83dbSDimitry Andric 
48925ffd83dbSDimitry Andric   Register PtrBase;
48935ffd83dbSDimitry Andric   int64_t Offset;
48945ffd83dbSDimitry Andric   std::tie(PtrBase, Offset) =
48955ffd83dbSDimitry Andric     getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
48965ffd83dbSDimitry Andric 
48975ffd83dbSDimitry Andric   if (Offset) {
4898e8d8bef9SDimitry Andric     if (isDSOffsetLegal(PtrBase, Offset)) {
48998bcb0991SDimitry Andric       // (add n0, c0)
4900bdd1243dSDimitry Andric       return std::pair(PtrBase, Offset);
49018bcb0991SDimitry Andric     }
49028bcb0991SDimitry Andric   } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
49035ffd83dbSDimitry Andric     // TODO
49048bcb0991SDimitry Andric 
49058bcb0991SDimitry Andric 
49068bcb0991SDimitry Andric   } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) {
49075ffd83dbSDimitry Andric     // TODO
49088bcb0991SDimitry Andric 
49098bcb0991SDimitry Andric   }
49108bcb0991SDimitry Andric 
4911bdd1243dSDimitry Andric   return std::pair(Root.getReg(), 0);
49125ffd83dbSDimitry Andric }
49135ffd83dbSDimitry Andric 
49145ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns
49155ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
49165ffd83dbSDimitry Andric   Register Reg;
49175ffd83dbSDimitry Andric   unsigned Offset;
49185ffd83dbSDimitry Andric   std::tie(Reg, Offset) = selectDS1Addr1OffsetImpl(Root);
49198bcb0991SDimitry Andric   return {{
49205ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
49215ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }
49228bcb0991SDimitry Andric     }};
49238bcb0991SDimitry Andric }
49248bcb0991SDimitry Andric 
49255ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns
49265ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const {
4927e8d8bef9SDimitry Andric   return selectDSReadWrite2(Root, 4);
4928e8d8bef9SDimitry Andric }
4929e8d8bef9SDimitry Andric 
4930e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns
4931e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(MachineOperand &Root) const {
4932e8d8bef9SDimitry Andric   return selectDSReadWrite2(Root, 8);
4933e8d8bef9SDimitry Andric }
4934e8d8bef9SDimitry Andric 
4935e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns
4936e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectDSReadWrite2(MachineOperand &Root,
4937e8d8bef9SDimitry Andric                                               unsigned Size) const {
49385ffd83dbSDimitry Andric   Register Reg;
49395ffd83dbSDimitry Andric   unsigned Offset;
4940e8d8bef9SDimitry Andric   std::tie(Reg, Offset) = selectDSReadWrite2Impl(Root, Size);
49415ffd83dbSDimitry Andric   return {{
49425ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
49435ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); },
49445ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset+1); }
49455ffd83dbSDimitry Andric     }};
49465ffd83dbSDimitry Andric }
49475ffd83dbSDimitry Andric 
49485ffd83dbSDimitry Andric std::pair<Register, unsigned>
4949e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectDSReadWrite2Impl(MachineOperand &Root,
4950e8d8bef9SDimitry Andric                                                   unsigned Size) const {
49515ffd83dbSDimitry Andric   const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg());
49525ffd83dbSDimitry Andric   if (!RootDef)
4953bdd1243dSDimitry Andric     return std::pair(Root.getReg(), 0);
49545ffd83dbSDimitry Andric 
49555ffd83dbSDimitry Andric   int64_t ConstAddr = 0;
49565ffd83dbSDimitry Andric 
49575ffd83dbSDimitry Andric   Register PtrBase;
49585ffd83dbSDimitry Andric   int64_t Offset;
49595ffd83dbSDimitry Andric   std::tie(PtrBase, Offset) =
49605ffd83dbSDimitry Andric     getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
49615ffd83dbSDimitry Andric 
49625ffd83dbSDimitry Andric   if (Offset) {
4963e8d8bef9SDimitry Andric     int64_t OffsetValue0 = Offset;
4964e8d8bef9SDimitry Andric     int64_t OffsetValue1 = Offset + Size;
4965e8d8bef9SDimitry Andric     if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1, Size)) {
49665ffd83dbSDimitry Andric       // (add n0, c0)
4967bdd1243dSDimitry Andric       return std::pair(PtrBase, OffsetValue0 / Size);
49685ffd83dbSDimitry Andric     }
49695ffd83dbSDimitry Andric   } else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
49705ffd83dbSDimitry Andric     // TODO
49715ffd83dbSDimitry Andric 
49725ffd83dbSDimitry Andric   } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) {
49735ffd83dbSDimitry Andric     // TODO
49745ffd83dbSDimitry Andric 
49755ffd83dbSDimitry Andric   }
49765ffd83dbSDimitry Andric 
4977bdd1243dSDimitry Andric   return std::pair(Root.getReg(), 0);
49785ffd83dbSDimitry Andric }
49795ffd83dbSDimitry Andric 
49805ffd83dbSDimitry Andric /// If \p Root is a G_PTR_ADD with a G_CONSTANT on the right hand side, return
49815ffd83dbSDimitry Andric /// the base value with the constant offset. There may be intervening copies
49825ffd83dbSDimitry Andric /// between \p Root and the identified constant. Returns \p Root, 0 if this does
49835ffd83dbSDimitry Andric /// not match the pattern.
49845ffd83dbSDimitry Andric std::pair<Register, int64_t>
49855ffd83dbSDimitry Andric AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
49865ffd83dbSDimitry Andric   Register Root, const MachineRegisterInfo &MRI) const {
4987e8d8bef9SDimitry Andric   MachineInstr *RootI = getDefIgnoringCopies(Root, MRI);
49885ffd83dbSDimitry Andric   if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
49895ffd83dbSDimitry Andric     return {Root, 0};
49905ffd83dbSDimitry Andric 
49915ffd83dbSDimitry Andric   MachineOperand &RHS = RootI->getOperand(2);
4992bdd1243dSDimitry Andric   std::optional<ValueAndVReg> MaybeOffset =
4993349cc55cSDimitry Andric       getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
49945ffd83dbSDimitry Andric   if (!MaybeOffset)
49955ffd83dbSDimitry Andric     return {Root, 0};
4996e8d8bef9SDimitry Andric   return {RootI->getOperand(1).getReg(), MaybeOffset->Value.getSExtValue()};
49975ffd83dbSDimitry Andric }
49985ffd83dbSDimitry Andric 
49995ffd83dbSDimitry Andric static void addZeroImm(MachineInstrBuilder &MIB) {
50005ffd83dbSDimitry Andric   MIB.addImm(0);
50015ffd83dbSDimitry Andric }
50025ffd83dbSDimitry Andric 
50035ffd83dbSDimitry Andric /// Return a resource descriptor for use with an arbitrary 64-bit pointer. If \p
50045ffd83dbSDimitry Andric /// BasePtr is not valid, a null base pointer will be used.
50055ffd83dbSDimitry Andric static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI,
50065ffd83dbSDimitry Andric                           uint32_t FormatLo, uint32_t FormatHi,
50075ffd83dbSDimitry Andric                           Register BasePtr) {
50085ffd83dbSDimitry Andric   Register RSrc2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
50095ffd83dbSDimitry Andric   Register RSrc3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
50105ffd83dbSDimitry Andric   Register RSrcHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
50115ffd83dbSDimitry Andric   Register RSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
50125ffd83dbSDimitry Andric 
50135ffd83dbSDimitry Andric   B.buildInstr(AMDGPU::S_MOV_B32)
50145ffd83dbSDimitry Andric     .addDef(RSrc2)
50155ffd83dbSDimitry Andric     .addImm(FormatLo);
50165ffd83dbSDimitry Andric   B.buildInstr(AMDGPU::S_MOV_B32)
50175ffd83dbSDimitry Andric     .addDef(RSrc3)
50185ffd83dbSDimitry Andric     .addImm(FormatHi);
50195ffd83dbSDimitry Andric 
50205ffd83dbSDimitry Andric   // Build the half of the subregister with the constants before building the
50215ffd83dbSDimitry Andric   // full 128-bit register. If we are building multiple resource descriptors,
50225ffd83dbSDimitry Andric   // this will allow CSEing of the 2-component register.
50235ffd83dbSDimitry Andric   B.buildInstr(AMDGPU::REG_SEQUENCE)
50245ffd83dbSDimitry Andric     .addDef(RSrcHi)
50255ffd83dbSDimitry Andric     .addReg(RSrc2)
50265ffd83dbSDimitry Andric     .addImm(AMDGPU::sub0)
50275ffd83dbSDimitry Andric     .addReg(RSrc3)
50285ffd83dbSDimitry Andric     .addImm(AMDGPU::sub1);
50295ffd83dbSDimitry Andric 
50305ffd83dbSDimitry Andric   Register RSrcLo = BasePtr;
50315ffd83dbSDimitry Andric   if (!BasePtr) {
50325ffd83dbSDimitry Andric     RSrcLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
50335ffd83dbSDimitry Andric     B.buildInstr(AMDGPU::S_MOV_B64)
50345ffd83dbSDimitry Andric       .addDef(RSrcLo)
50355ffd83dbSDimitry Andric       .addImm(0);
50365ffd83dbSDimitry Andric   }
50375ffd83dbSDimitry Andric 
50385ffd83dbSDimitry Andric   B.buildInstr(AMDGPU::REG_SEQUENCE)
50395ffd83dbSDimitry Andric     .addDef(RSrc)
50405ffd83dbSDimitry Andric     .addReg(RSrcLo)
50415ffd83dbSDimitry Andric     .addImm(AMDGPU::sub0_sub1)
50425ffd83dbSDimitry Andric     .addReg(RSrcHi)
50435ffd83dbSDimitry Andric     .addImm(AMDGPU::sub2_sub3);
50445ffd83dbSDimitry Andric 
50455ffd83dbSDimitry Andric   return RSrc;
50465ffd83dbSDimitry Andric }
50475ffd83dbSDimitry Andric 
50485ffd83dbSDimitry Andric static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI,
50495ffd83dbSDimitry Andric                                 const SIInstrInfo &TII, Register BasePtr) {
50505ffd83dbSDimitry Andric   uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();
50515ffd83dbSDimitry Andric 
50525ffd83dbSDimitry Andric   // FIXME: Why are half the "default" bits ignored based on the addressing
50535ffd83dbSDimitry Andric   // mode?
50545ffd83dbSDimitry Andric   return buildRSRC(B, MRI, 0, Hi_32(DefaultFormat), BasePtr);
50555ffd83dbSDimitry Andric }
50565ffd83dbSDimitry Andric 
50575ffd83dbSDimitry Andric static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI,
50585ffd83dbSDimitry Andric                                const SIInstrInfo &TII, Register BasePtr) {
50595ffd83dbSDimitry Andric   uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat();
50605ffd83dbSDimitry Andric 
50615ffd83dbSDimitry Andric   // FIXME: Why are half the "default" bits ignored based on the addressing
50625ffd83dbSDimitry Andric   // mode?
50635ffd83dbSDimitry Andric   return buildRSRC(B, MRI, -1, Hi_32(DefaultFormat), BasePtr);
50645ffd83dbSDimitry Andric }
50655ffd83dbSDimitry Andric 
50665ffd83dbSDimitry Andric AMDGPUInstructionSelector::MUBUFAddressData
50675ffd83dbSDimitry Andric AMDGPUInstructionSelector::parseMUBUFAddress(Register Src) const {
50685ffd83dbSDimitry Andric   MUBUFAddressData Data;
50695ffd83dbSDimitry Andric   Data.N0 = Src;
50705ffd83dbSDimitry Andric 
50715ffd83dbSDimitry Andric   Register PtrBase;
50725ffd83dbSDimitry Andric   int64_t Offset;
50735ffd83dbSDimitry Andric 
50745ffd83dbSDimitry Andric   std::tie(PtrBase, Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
50755ffd83dbSDimitry Andric   if (isUInt<32>(Offset)) {
50765ffd83dbSDimitry Andric     Data.N0 = PtrBase;
50775ffd83dbSDimitry Andric     Data.Offset = Offset;
50785ffd83dbSDimitry Andric   }
50795ffd83dbSDimitry Andric 
50805ffd83dbSDimitry Andric   if (MachineInstr *InputAdd
50815ffd83dbSDimitry Andric       = getOpcodeDef(TargetOpcode::G_PTR_ADD, Data.N0, *MRI)) {
50825ffd83dbSDimitry Andric     Data.N2 = InputAdd->getOperand(1).getReg();
50835ffd83dbSDimitry Andric     Data.N3 = InputAdd->getOperand(2).getReg();
50845ffd83dbSDimitry Andric 
50855ffd83dbSDimitry Andric     // FIXME: Need to fix extra SGPR->VGPRcopies inserted
50865ffd83dbSDimitry Andric     // FIXME: Don't know this was defined by operand 0
50875ffd83dbSDimitry Andric     //
50885ffd83dbSDimitry Andric     // TODO: Remove this when we have copy folding optimizations after
50895ffd83dbSDimitry Andric     // RegBankSelect.
50905ffd83dbSDimitry Andric     Data.N2 = getDefIgnoringCopies(Data.N2, *MRI)->getOperand(0).getReg();
50915ffd83dbSDimitry Andric     Data.N3 = getDefIgnoringCopies(Data.N3, *MRI)->getOperand(0).getReg();
50925ffd83dbSDimitry Andric   }
50935ffd83dbSDimitry Andric 
50945ffd83dbSDimitry Andric   return Data;
50955ffd83dbSDimitry Andric }
50965ffd83dbSDimitry Andric 
50975ffd83dbSDimitry Andric /// Return if the addr64 mubuf mode should be used for the given address.
50985ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const {
50995ffd83dbSDimitry Andric   // (ptr_add N2, N3) -> addr64, or
51005ffd83dbSDimitry Andric   // (ptr_add (ptr_add N2, N3), C1) -> addr64
51015ffd83dbSDimitry Andric   if (Addr.N2)
51025ffd83dbSDimitry Andric     return true;
51035ffd83dbSDimitry Andric 
51045ffd83dbSDimitry Andric   const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
51055ffd83dbSDimitry Andric   return N0Bank->getID() == AMDGPU::VGPRRegBankID;
51065ffd83dbSDimitry Andric }
51075ffd83dbSDimitry Andric 
51085ffd83dbSDimitry Andric /// Split an immediate offset \p ImmOffset depending on whether it fits in the
51095ffd83dbSDimitry Andric /// immediate field. Modifies \p ImmOffset and sets \p SOffset to the variable
51105ffd83dbSDimitry Andric /// component.
51115ffd83dbSDimitry Andric void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
51125ffd83dbSDimitry Andric   MachineIRBuilder &B, Register &SOffset, int64_t &ImmOffset) const {
51135f757f3fSDimitry Andric   if (TII.isLegalMUBUFImmOffset(ImmOffset))
51145ffd83dbSDimitry Andric     return;
51155ffd83dbSDimitry Andric 
51165ffd83dbSDimitry Andric   // Illegal offset, store it in soffset.
51175ffd83dbSDimitry Andric   SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
51185ffd83dbSDimitry Andric   B.buildInstr(AMDGPU::S_MOV_B32)
51195ffd83dbSDimitry Andric     .addDef(SOffset)
51205ffd83dbSDimitry Andric     .addImm(ImmOffset);
51215ffd83dbSDimitry Andric   ImmOffset = 0;
51225ffd83dbSDimitry Andric }
51235ffd83dbSDimitry Andric 
51245ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
51255ffd83dbSDimitry Andric   MachineOperand &Root, Register &VAddr, Register &RSrcReg,
51265ffd83dbSDimitry Andric   Register &SOffset, int64_t &Offset) const {
51275ffd83dbSDimitry Andric   // FIXME: Predicates should stop this from reaching here.
51285ffd83dbSDimitry Andric   // addr64 bit was removed for volcanic islands.
51295ffd83dbSDimitry Andric   if (!STI.hasAddr64() || STI.useFlatForGlobal())
51305ffd83dbSDimitry Andric     return false;
51315ffd83dbSDimitry Andric 
51325ffd83dbSDimitry Andric   MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());
51335ffd83dbSDimitry Andric   if (!shouldUseAddr64(AddrData))
51345ffd83dbSDimitry Andric     return false;
51355ffd83dbSDimitry Andric 
51365ffd83dbSDimitry Andric   Register N0 = AddrData.N0;
51375ffd83dbSDimitry Andric   Register N2 = AddrData.N2;
51385ffd83dbSDimitry Andric   Register N3 = AddrData.N3;
51395ffd83dbSDimitry Andric   Offset = AddrData.Offset;
51405ffd83dbSDimitry Andric 
51415ffd83dbSDimitry Andric   // Base pointer for the SRD.
51425ffd83dbSDimitry Andric   Register SRDPtr;
51435ffd83dbSDimitry Andric 
51445ffd83dbSDimitry Andric   if (N2) {
51455ffd83dbSDimitry Andric     if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
51465ffd83dbSDimitry Andric       assert(N3);
51475ffd83dbSDimitry Andric       if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
51485ffd83dbSDimitry Andric         // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
51495ffd83dbSDimitry Andric         // addr64, and construct the default resource from a 0 address.
51505ffd83dbSDimitry Andric         VAddr = N0;
51515ffd83dbSDimitry Andric       } else {
51525ffd83dbSDimitry Andric         SRDPtr = N3;
51535ffd83dbSDimitry Andric         VAddr = N2;
51545ffd83dbSDimitry Andric       }
51555ffd83dbSDimitry Andric     } else {
51565ffd83dbSDimitry Andric       // N2 is not divergent.
51575ffd83dbSDimitry Andric       SRDPtr = N2;
51585ffd83dbSDimitry Andric       VAddr = N3;
51595ffd83dbSDimitry Andric     }
51605ffd83dbSDimitry Andric   } else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
51615ffd83dbSDimitry Andric     // Use the default null pointer in the resource
51625ffd83dbSDimitry Andric     VAddr = N0;
51635ffd83dbSDimitry Andric   } else {
51645ffd83dbSDimitry Andric     // N0 -> offset, or
51655ffd83dbSDimitry Andric     // (N0 + C1) -> offset
51665ffd83dbSDimitry Andric     SRDPtr = N0;
51675ffd83dbSDimitry Andric   }
51685ffd83dbSDimitry Andric 
51695ffd83dbSDimitry Andric   MachineIRBuilder B(*Root.getParent());
51705ffd83dbSDimitry Andric   RSrcReg = buildAddr64RSrc(B, *MRI, TII, SRDPtr);
51715ffd83dbSDimitry Andric   splitIllegalMUBUFOffset(B, SOffset, Offset);
51725ffd83dbSDimitry Andric   return true;
51735ffd83dbSDimitry Andric }
51745ffd83dbSDimitry Andric 
51755ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
51765ffd83dbSDimitry Andric   MachineOperand &Root, Register &RSrcReg, Register &SOffset,
51775ffd83dbSDimitry Andric   int64_t &Offset) const {
5178e8d8bef9SDimitry Andric 
5179e8d8bef9SDimitry Andric   // FIXME: Pattern should not reach here.
5180e8d8bef9SDimitry Andric   if (STI.useFlatForGlobal())
5181e8d8bef9SDimitry Andric     return false;
5182e8d8bef9SDimitry Andric 
51835ffd83dbSDimitry Andric   MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg());
51845ffd83dbSDimitry Andric   if (shouldUseAddr64(AddrData))
51855ffd83dbSDimitry Andric     return false;
51865ffd83dbSDimitry Andric 
51875ffd83dbSDimitry Andric   // N0 -> offset, or
51885ffd83dbSDimitry Andric   // (N0 + C1) -> offset
51895ffd83dbSDimitry Andric   Register SRDPtr = AddrData.N0;
51905ffd83dbSDimitry Andric   Offset = AddrData.Offset;
51915ffd83dbSDimitry Andric 
51925ffd83dbSDimitry Andric   // TODO: Look through extensions for 32-bit soffset.
51935ffd83dbSDimitry Andric   MachineIRBuilder B(*Root.getParent());
51945ffd83dbSDimitry Andric 
51955ffd83dbSDimitry Andric   RSrcReg = buildOffsetSrc(B, *MRI, TII, SRDPtr);
51965ffd83dbSDimitry Andric   splitIllegalMUBUFOffset(B, SOffset, Offset);
51975ffd83dbSDimitry Andric   return true;
51985ffd83dbSDimitry Andric }
51995ffd83dbSDimitry Andric 
52005ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns
52015ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
52025ffd83dbSDimitry Andric   Register VAddr;
52035ffd83dbSDimitry Andric   Register RSrcReg;
52045ffd83dbSDimitry Andric   Register SOffset;
52055ffd83dbSDimitry Andric   int64_t Offset = 0;
52065ffd83dbSDimitry Andric 
52075ffd83dbSDimitry Andric   if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset, Offset))
52085ffd83dbSDimitry Andric     return {};
52095ffd83dbSDimitry Andric 
52105ffd83dbSDimitry Andric   // FIXME: Use defaulted operands for trailing 0s and remove from the complex
52115ffd83dbSDimitry Andric   // pattern.
52125ffd83dbSDimitry Andric   return {{
52135ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) {  // rsrc
52145ffd83dbSDimitry Andric         MIB.addReg(RSrcReg);
52155ffd83dbSDimitry Andric       },
52165ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { // vaddr
52175ffd83dbSDimitry Andric         MIB.addReg(VAddr);
52185ffd83dbSDimitry Andric       },
52195ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { // soffset
52205ffd83dbSDimitry Andric         if (SOffset)
52215ffd83dbSDimitry Andric           MIB.addReg(SOffset);
52225f757f3fSDimitry Andric         else if (STI.hasRestrictedSOffset())
52235f757f3fSDimitry Andric           MIB.addReg(AMDGPU::SGPR_NULL);
52245ffd83dbSDimitry Andric         else
52255ffd83dbSDimitry Andric           MIB.addImm(0);
52265ffd83dbSDimitry Andric       },
52275ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { // offset
52285ffd83dbSDimitry Andric         MIB.addImm(Offset);
52295ffd83dbSDimitry Andric       },
5230fe6060f1SDimitry Andric       addZeroImm, //  cpol
52315ffd83dbSDimitry Andric       addZeroImm, //  tfe
52325ffd83dbSDimitry Andric       addZeroImm  //  swz
52335ffd83dbSDimitry Andric     }};
52345ffd83dbSDimitry Andric }
52355ffd83dbSDimitry Andric 
52365ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns
52375ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
52385ffd83dbSDimitry Andric   Register RSrcReg;
52395ffd83dbSDimitry Andric   Register SOffset;
52405ffd83dbSDimitry Andric   int64_t Offset = 0;
52415ffd83dbSDimitry Andric 
52425ffd83dbSDimitry Andric   if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset, Offset))
52435ffd83dbSDimitry Andric     return {};
52445ffd83dbSDimitry Andric 
52455ffd83dbSDimitry Andric   return {{
52465ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) {  // rsrc
52475ffd83dbSDimitry Andric         MIB.addReg(RSrcReg);
52485ffd83dbSDimitry Andric       },
52495ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { // soffset
52505ffd83dbSDimitry Andric         if (SOffset)
52515ffd83dbSDimitry Andric           MIB.addReg(SOffset);
52525f757f3fSDimitry Andric         else if (STI.hasRestrictedSOffset())
52535f757f3fSDimitry Andric           MIB.addReg(AMDGPU::SGPR_NULL);
52545ffd83dbSDimitry Andric         else
52555ffd83dbSDimitry Andric           MIB.addImm(0);
52565ffd83dbSDimitry Andric       },
52575ffd83dbSDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
5258fe6060f1SDimitry Andric       addZeroImm, //  cpol
52595ffd83dbSDimitry Andric       addZeroImm, //  tfe
5260fe6060f1SDimitry Andric       addZeroImm, //  swz
52615ffd83dbSDimitry Andric     }};
52625ffd83dbSDimitry Andric }
52635ffd83dbSDimitry Andric 
52645f757f3fSDimitry Andric InstructionSelector::ComplexRendererFns
52655f757f3fSDimitry Andric AMDGPUInstructionSelector::selectBUFSOffset(MachineOperand &Root) const {
52665f757f3fSDimitry Andric 
52675f757f3fSDimitry Andric   Register SOffset = Root.getReg();
52685f757f3fSDimitry Andric 
52695f757f3fSDimitry Andric   if (STI.hasRestrictedSOffset() && mi_match(SOffset, *MRI, m_ZeroInt()))
52705f757f3fSDimitry Andric     SOffset = AMDGPU::SGPR_NULL;
52715f757f3fSDimitry Andric 
52725f757f3fSDimitry Andric   return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}};
52735f757f3fSDimitry Andric }
52745f757f3fSDimitry Andric 
52755ffd83dbSDimitry Andric /// Get an immediate that must be 32-bits, and treated as zero extended.
5276bdd1243dSDimitry Andric static std::optional<uint64_t>
5277bdd1243dSDimitry Andric getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) {
5278349cc55cSDimitry Andric   // getIConstantVRegVal sexts any values, so see if that matters.
5279bdd1243dSDimitry Andric   std::optional<int64_t> OffsetVal = getIConstantVRegSExtVal(Reg, MRI);
52805ffd83dbSDimitry Andric   if (!OffsetVal || !isInt<32>(*OffsetVal))
5281bdd1243dSDimitry Andric     return std::nullopt;
52825ffd83dbSDimitry Andric   return Lo_32(*OffsetVal);
52835ffd83dbSDimitry Andric }
52845ffd83dbSDimitry Andric 
52855ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns
52865ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const {
5287bdd1243dSDimitry Andric   std::optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
52885ffd83dbSDimitry Andric   if (!OffsetVal)
52895ffd83dbSDimitry Andric     return {};
52905ffd83dbSDimitry Andric 
5291bdd1243dSDimitry Andric   std::optional<int64_t> EncodedImm =
52925ffd83dbSDimitry Andric       AMDGPU::getSMRDEncodedOffset(STI, *OffsetVal, true);
52935ffd83dbSDimitry Andric   if (!EncodedImm)
52945ffd83dbSDimitry Andric     return {};
52955ffd83dbSDimitry Andric 
52965ffd83dbSDimitry Andric   return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }  }};
52975ffd83dbSDimitry Andric }
52985ffd83dbSDimitry Andric 
52995ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns
53005ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const {
53015ffd83dbSDimitry Andric   assert(STI.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
53025ffd83dbSDimitry Andric 
5303bdd1243dSDimitry Andric   std::optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI);
53045ffd83dbSDimitry Andric   if (!OffsetVal)
53055ffd83dbSDimitry Andric     return {};
53065ffd83dbSDimitry Andric 
5307bdd1243dSDimitry Andric   std::optional<int64_t> EncodedImm =
5308bdd1243dSDimitry Andric       AMDGPU::getSMRDEncodedLiteralOffset32(STI, *OffsetVal);
53095ffd83dbSDimitry Andric   if (!EncodedImm)
53105ffd83dbSDimitry Andric     return {};
53115ffd83dbSDimitry Andric 
53125ffd83dbSDimitry Andric   return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); }  }};
53135ffd83dbSDimitry Andric }
53145ffd83dbSDimitry Andric 
5315bdd1243dSDimitry Andric InstructionSelector::ComplexRendererFns
5316bdd1243dSDimitry Andric AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
5317bdd1243dSDimitry Andric   // Match the (soffset + offset) pair as a 32-bit register base and
5318bdd1243dSDimitry Andric   // an immediate offset.
5319bdd1243dSDimitry Andric   Register SOffset;
5320bdd1243dSDimitry Andric   unsigned Offset;
53215f757f3fSDimitry Andric   std::tie(SOffset, Offset) = AMDGPU::getBaseWithConstantOffset(
53225f757f3fSDimitry Andric       *MRI, Root.getReg(), KB, /*CheckNUW*/ true);
5323bdd1243dSDimitry Andric   if (!SOffset)
5324bdd1243dSDimitry Andric     return std::nullopt;
5325bdd1243dSDimitry Andric 
5326bdd1243dSDimitry Andric   std::optional<int64_t> EncodedOffset =
5327bdd1243dSDimitry Andric       AMDGPU::getSMRDEncodedOffset(STI, Offset, /* IsBuffer */ true);
5328bdd1243dSDimitry Andric   if (!EncodedOffset)
5329bdd1243dSDimitry Andric     return std::nullopt;
5330bdd1243dSDimitry Andric 
5331bdd1243dSDimitry Andric   assert(MRI->getType(SOffset) == LLT::scalar(32));
5332bdd1243dSDimitry Andric   return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); },
5333bdd1243dSDimitry Andric            [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedOffset); }}};
5334bdd1243dSDimitry Andric }
5335bdd1243dSDimitry Andric 
5336bdd1243dSDimitry Andric // Variant of stripBitCast that returns the instruction instead of a
5337bdd1243dSDimitry Andric // MachineOperand.
5338bdd1243dSDimitry Andric static MachineInstr *stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI) {
5339bdd1243dSDimitry Andric   if (MI->getOpcode() == AMDGPU::G_BITCAST)
5340bdd1243dSDimitry Andric     return getDefIgnoringCopies(MI->getOperand(1).getReg(), MRI);
5341bdd1243dSDimitry Andric   return MI;
5342bdd1243dSDimitry Andric }
5343bdd1243dSDimitry Andric 
5344bdd1243dSDimitry Andric // Figure out if this is really an extract of the high 16-bits of a dword,
5345bdd1243dSDimitry Andric // returns nullptr if it isn't.
5346bdd1243dSDimitry Andric static MachineInstr *isExtractHiElt(MachineInstr *Inst,
5347bdd1243dSDimitry Andric                                     MachineRegisterInfo &MRI) {
5348bdd1243dSDimitry Andric   Inst = stripBitCast(Inst, MRI);
5349bdd1243dSDimitry Andric 
5350bdd1243dSDimitry Andric   if (Inst->getOpcode() != AMDGPU::G_TRUNC)
5351bdd1243dSDimitry Andric     return nullptr;
5352bdd1243dSDimitry Andric 
5353bdd1243dSDimitry Andric   MachineInstr *TruncOp =
5354bdd1243dSDimitry Andric       getDefIgnoringCopies(Inst->getOperand(1).getReg(), MRI);
5355bdd1243dSDimitry Andric   TruncOp = stripBitCast(TruncOp, MRI);
5356bdd1243dSDimitry Andric 
5357bdd1243dSDimitry Andric   // G_LSHR x, (G_CONSTANT i32 16)
5358bdd1243dSDimitry Andric   if (TruncOp->getOpcode() == AMDGPU::G_LSHR) {
5359bdd1243dSDimitry Andric     auto SrlAmount = getIConstantVRegValWithLookThrough(
5360bdd1243dSDimitry Andric         TruncOp->getOperand(2).getReg(), MRI);
5361bdd1243dSDimitry Andric     if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) {
5362bdd1243dSDimitry Andric       MachineInstr *SrlOp =
5363bdd1243dSDimitry Andric           getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI);
5364bdd1243dSDimitry Andric       return stripBitCast(SrlOp, MRI);
5365bdd1243dSDimitry Andric     }
5366bdd1243dSDimitry Andric   }
5367bdd1243dSDimitry Andric 
5368bdd1243dSDimitry Andric   // G_SHUFFLE_VECTOR x, y, shufflemask(1, 1|0)
5369bdd1243dSDimitry Andric   //    1, 0 swaps the low/high 16 bits.
5370bdd1243dSDimitry Andric   //    1, 1 sets the high 16 bits to be the same as the low 16.
5371bdd1243dSDimitry Andric   // in any case, it selects the high elts.
5372bdd1243dSDimitry Andric   if (TruncOp->getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) {
5373bdd1243dSDimitry Andric     assert(MRI.getType(TruncOp->getOperand(0).getReg()) ==
5374bdd1243dSDimitry Andric            LLT::fixed_vector(2, 16));
5375bdd1243dSDimitry Andric 
5376bdd1243dSDimitry Andric     ArrayRef<int> Mask = TruncOp->getOperand(3).getShuffleMask();
5377bdd1243dSDimitry Andric     assert(Mask.size() == 2);
5378bdd1243dSDimitry Andric 
5379bdd1243dSDimitry Andric     if (Mask[0] == 1 && Mask[1] <= 1) {
5380bdd1243dSDimitry Andric       MachineInstr *LHS =
5381bdd1243dSDimitry Andric           getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI);
5382bdd1243dSDimitry Andric       return stripBitCast(LHS, MRI);
5383bdd1243dSDimitry Andric     }
5384bdd1243dSDimitry Andric   }
5385bdd1243dSDimitry Andric 
5386bdd1243dSDimitry Andric   return nullptr;
5387bdd1243dSDimitry Andric }
5388bdd1243dSDimitry Andric 
5389bdd1243dSDimitry Andric std::pair<Register, unsigned>
5390bdd1243dSDimitry Andric AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
5391bdd1243dSDimitry Andric                                                      bool &Matched) const {
5392bdd1243dSDimitry Andric   Matched = false;
5393bdd1243dSDimitry Andric 
5394bdd1243dSDimitry Andric   Register Src;
5395bdd1243dSDimitry Andric   unsigned Mods;
5396bdd1243dSDimitry Andric   std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
5397bdd1243dSDimitry Andric 
5398bdd1243dSDimitry Andric   MachineInstr *MI = getDefIgnoringCopies(Src, *MRI);
5399bdd1243dSDimitry Andric   if (MI->getOpcode() == AMDGPU::G_FPEXT) {
5400bdd1243dSDimitry Andric     MachineOperand *MO = &MI->getOperand(1);
5401bdd1243dSDimitry Andric     Src = MO->getReg();
5402bdd1243dSDimitry Andric     MI = getDefIgnoringCopies(Src, *MRI);
5403bdd1243dSDimitry Andric 
5404bdd1243dSDimitry Andric     assert(MRI->getType(Src) == LLT::scalar(16));
5405bdd1243dSDimitry Andric 
5406bdd1243dSDimitry Andric     // See through bitcasts.
5407bdd1243dSDimitry Andric     // FIXME: Would be nice to use stripBitCast here.
5408bdd1243dSDimitry Andric     if (MI->getOpcode() == AMDGPU::G_BITCAST) {
5409bdd1243dSDimitry Andric       MO = &MI->getOperand(1);
5410bdd1243dSDimitry Andric       Src = MO->getReg();
5411bdd1243dSDimitry Andric       MI = getDefIgnoringCopies(Src, *MRI);
5412bdd1243dSDimitry Andric     }
5413bdd1243dSDimitry Andric 
5414bdd1243dSDimitry Andric     const auto CheckAbsNeg = [&]() {
5415bdd1243dSDimitry Andric       // Be careful about folding modifiers if we already have an abs. fneg is
5416bdd1243dSDimitry Andric       // applied last, so we don't want to apply an earlier fneg.
5417bdd1243dSDimitry Andric       if ((Mods & SISrcMods::ABS) == 0) {
5418bdd1243dSDimitry Andric         unsigned ModsTmp;
5419bdd1243dSDimitry Andric         std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO);
5420bdd1243dSDimitry Andric         MI = getDefIgnoringCopies(Src, *MRI);
5421bdd1243dSDimitry Andric 
5422bdd1243dSDimitry Andric         if ((ModsTmp & SISrcMods::NEG) != 0)
5423bdd1243dSDimitry Andric           Mods ^= SISrcMods::NEG;
5424bdd1243dSDimitry Andric 
5425bdd1243dSDimitry Andric         if ((ModsTmp & SISrcMods::ABS) != 0)
5426bdd1243dSDimitry Andric           Mods |= SISrcMods::ABS;
5427bdd1243dSDimitry Andric       }
5428bdd1243dSDimitry Andric     };
5429bdd1243dSDimitry Andric 
5430bdd1243dSDimitry Andric     CheckAbsNeg();
5431bdd1243dSDimitry Andric 
5432bdd1243dSDimitry Andric     // op_sel/op_sel_hi decide the source type and source.
5433bdd1243dSDimitry Andric     // If the source's op_sel_hi is set, it indicates to do a conversion from
5434bdd1243dSDimitry Andric     // fp16. If the sources's op_sel is set, it picks the high half of the
5435bdd1243dSDimitry Andric     // source register.
5436bdd1243dSDimitry Andric 
5437bdd1243dSDimitry Andric     Mods |= SISrcMods::OP_SEL_1;
5438bdd1243dSDimitry Andric 
5439bdd1243dSDimitry Andric     if (MachineInstr *ExtractHiEltMI = isExtractHiElt(MI, *MRI)) {
5440bdd1243dSDimitry Andric       Mods |= SISrcMods::OP_SEL_0;
5441bdd1243dSDimitry Andric       MI = ExtractHiEltMI;
5442bdd1243dSDimitry Andric       MO = &MI->getOperand(0);
5443bdd1243dSDimitry Andric       Src = MO->getReg();
5444bdd1243dSDimitry Andric 
5445bdd1243dSDimitry Andric       CheckAbsNeg();
5446bdd1243dSDimitry Andric     }
5447bdd1243dSDimitry Andric 
5448bdd1243dSDimitry Andric     Matched = true;
5449bdd1243dSDimitry Andric   }
5450bdd1243dSDimitry Andric 
5451bdd1243dSDimitry Andric   return {Src, Mods};
5452bdd1243dSDimitry Andric }
5453bdd1243dSDimitry Andric 
5454bdd1243dSDimitry Andric InstructionSelector::ComplexRendererFns
545506c3fb27SDimitry Andric AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
545606c3fb27SDimitry Andric     MachineOperand &Root) const {
545706c3fb27SDimitry Andric   Register Src;
545806c3fb27SDimitry Andric   unsigned Mods;
545906c3fb27SDimitry Andric   bool Matched;
546006c3fb27SDimitry Andric   std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
546106c3fb27SDimitry Andric   if (!Matched)
546206c3fb27SDimitry Andric     return {};
546306c3fb27SDimitry Andric 
546406c3fb27SDimitry Andric   return {{
546506c3fb27SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
546606c3fb27SDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
546706c3fb27SDimitry Andric   }};
546806c3fb27SDimitry Andric }
546906c3fb27SDimitry Andric 
547006c3fb27SDimitry Andric InstructionSelector::ComplexRendererFns
5471bdd1243dSDimitry Andric AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const {
5472bdd1243dSDimitry Andric   Register Src;
5473bdd1243dSDimitry Andric   unsigned Mods;
5474bdd1243dSDimitry Andric   bool Matched;
5475bdd1243dSDimitry Andric   std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5476bdd1243dSDimitry Andric 
5477bdd1243dSDimitry Andric   return {{
5478bdd1243dSDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
5479bdd1243dSDimitry Andric       [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
5480bdd1243dSDimitry Andric   }};
5481bdd1243dSDimitry Andric }
5482bdd1243dSDimitry Andric 
54835f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
54845f757f3fSDimitry Andric     MachineInstr &I, Intrinsic::ID IntrID) const {
54855f757f3fSDimitry Andric   MachineBasicBlock *MBB = I.getParent();
54865f757f3fSDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
54875f757f3fSDimitry Andric   Register CCReg = I.getOperand(0).getReg();
54885f757f3fSDimitry Andric 
54895f757f3fSDimitry Andric   bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var;
54905f757f3fSDimitry Andric 
54915f757f3fSDimitry Andric   if (HasM0) {
54925f757f3fSDimitry Andric     auto CopyMIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
54935f757f3fSDimitry Andric                        .addReg(I.getOperand(2).getReg());
54945f757f3fSDimitry Andric     BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0));
54955f757f3fSDimitry Andric     if (!constrainSelectedInstRegOperands(*CopyMIB, TII, TRI, RBI))
54965f757f3fSDimitry Andric       return false;
54975f757f3fSDimitry Andric   } else {
54985f757f3fSDimitry Andric     BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
54995f757f3fSDimitry Andric         .addImm(I.getOperand(2).getImm());
55005f757f3fSDimitry Andric   }
55015f757f3fSDimitry Andric 
55025f757f3fSDimitry Andric   BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC);
55035f757f3fSDimitry Andric 
55045f757f3fSDimitry Andric   I.eraseFromParent();
55055f757f3fSDimitry Andric   return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
55065f757f3fSDimitry Andric                                       *MRI);
55075f757f3fSDimitry Andric }
55085f757f3fSDimitry Andric 
55095f757f3fSDimitry Andric unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
55105f757f3fSDimitry Andric   if (HasInlineConst) {
55115f757f3fSDimitry Andric     switch (IntrID) {
55125f757f3fSDimitry Andric     default:
55135f757f3fSDimitry Andric       llvm_unreachable("not a named barrier op");
55145f757f3fSDimitry Andric     case Intrinsic::amdgcn_s_barrier_init:
55155f757f3fSDimitry Andric       return AMDGPU::S_BARRIER_INIT_IMM;
55165f757f3fSDimitry Andric     case Intrinsic::amdgcn_s_barrier_join:
55175f757f3fSDimitry Andric       return AMDGPU::S_BARRIER_JOIN_IMM;
55185f757f3fSDimitry Andric     case Intrinsic::amdgcn_s_wakeup_barrier:
55195f757f3fSDimitry Andric       return AMDGPU::S_WAKEUP_BARRIER_IMM;
55205f757f3fSDimitry Andric     case Intrinsic::amdgcn_s_get_barrier_state:
55215f757f3fSDimitry Andric       return AMDGPU::S_GET_BARRIER_STATE_IMM;
55225f757f3fSDimitry Andric     };
55235f757f3fSDimitry Andric   } else {
55245f757f3fSDimitry Andric     switch (IntrID) {
55255f757f3fSDimitry Andric     default:
55265f757f3fSDimitry Andric       llvm_unreachable("not a named barrier op");
55275f757f3fSDimitry Andric     case Intrinsic::amdgcn_s_barrier_init:
55285f757f3fSDimitry Andric       return AMDGPU::S_BARRIER_INIT_M0;
55295f757f3fSDimitry Andric     case Intrinsic::amdgcn_s_barrier_join:
55305f757f3fSDimitry Andric       return AMDGPU::S_BARRIER_JOIN_M0;
55315f757f3fSDimitry Andric     case Intrinsic::amdgcn_s_wakeup_barrier:
55325f757f3fSDimitry Andric       return AMDGPU::S_WAKEUP_BARRIER_M0;
55335f757f3fSDimitry Andric     case Intrinsic::amdgcn_s_get_barrier_state:
55345f757f3fSDimitry Andric       return AMDGPU::S_GET_BARRIER_STATE_M0;
55355f757f3fSDimitry Andric     };
55365f757f3fSDimitry Andric   }
55375f757f3fSDimitry Andric }
55385f757f3fSDimitry Andric 
55395f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectNamedBarrierInst(
55405f757f3fSDimitry Andric     MachineInstr &I, Intrinsic::ID IntrID) const {
55415f757f3fSDimitry Andric   MachineBasicBlock *MBB = I.getParent();
55425f757f3fSDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
55435f757f3fSDimitry Andric   MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state
55445f757f3fSDimitry Andric                              ? I.getOperand(2)
55455f757f3fSDimitry Andric                              : I.getOperand(1);
55465f757f3fSDimitry Andric   std::optional<int64_t> BarValImm =
55475f757f3fSDimitry Andric       getIConstantVRegSExtVal(BarOp.getReg(), *MRI);
55485f757f3fSDimitry Andric   Register M0Val;
55495f757f3fSDimitry Andric   Register TmpReg0;
55505f757f3fSDimitry Andric 
55515f757f3fSDimitry Andric   // For S_BARRIER_INIT, member count will always be read from M0[16:22]
55525f757f3fSDimitry Andric   if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
55535f757f3fSDimitry Andric     Register MemberCount = I.getOperand(2).getReg();
55545f757f3fSDimitry Andric     TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
55555f757f3fSDimitry Andric     // TODO: This should be expanded during legalization so that the the S_LSHL
55565f757f3fSDimitry Andric     // and S_OR can be constant-folded
55575f757f3fSDimitry Andric     BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0)
55585f757f3fSDimitry Andric         .addImm(16)
55595f757f3fSDimitry Andric         .addReg(MemberCount);
55605f757f3fSDimitry Andric     M0Val = TmpReg0;
55615f757f3fSDimitry Andric   }
55625f757f3fSDimitry Andric 
55635f757f3fSDimitry Andric   // If not inlinable, get reference to barrier depending on the instruction
55645f757f3fSDimitry Andric   if (!BarValImm) {
55655f757f3fSDimitry Andric     if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
55665f757f3fSDimitry Andric       // If reference to barrier id is not an inlinable constant then it must be
55675f757f3fSDimitry Andric       // referenced with M0[4:0]. Perform an OR with the member count to include
55685f757f3fSDimitry Andric       // it in M0 for S_BARRIER_INIT.
55695f757f3fSDimitry Andric       Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
55705f757f3fSDimitry Andric       BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_OR_B32), TmpReg1)
55715f757f3fSDimitry Andric           .addReg(BarOp.getReg())
55725f757f3fSDimitry Andric           .addReg(TmpReg0);
55735f757f3fSDimitry Andric       M0Val = TmpReg1;
55745f757f3fSDimitry Andric     } else {
55755f757f3fSDimitry Andric       M0Val = BarOp.getReg();
55765f757f3fSDimitry Andric     }
55775f757f3fSDimitry Andric   }
55785f757f3fSDimitry Andric 
55795f757f3fSDimitry Andric   // Build copy to M0 if needed. For S_BARRIER_INIT, M0 is always required.
55805f757f3fSDimitry Andric   if (M0Val) {
55815f757f3fSDimitry Andric     auto CopyMIB =
55825f757f3fSDimitry Andric         BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0).addReg(M0Val);
55835f757f3fSDimitry Andric     constrainSelectedInstRegOperands(*CopyMIB, TII, TRI, RBI);
55845f757f3fSDimitry Andric   }
55855f757f3fSDimitry Andric 
55865f757f3fSDimitry Andric   MachineInstrBuilder MIB;
55875f757f3fSDimitry Andric   unsigned Opc = getNamedBarrierOp(BarValImm.has_value(), IntrID);
55885f757f3fSDimitry Andric   MIB = BuildMI(*MBB, &I, DL, TII.get(Opc));
55895f757f3fSDimitry Andric 
55905f757f3fSDimitry Andric   if (IntrID == Intrinsic::amdgcn_s_get_barrier_state)
55915f757f3fSDimitry Andric     MIB.addDef(I.getOperand(0).getReg());
55925f757f3fSDimitry Andric 
55935f757f3fSDimitry Andric   if (BarValImm)
55945f757f3fSDimitry Andric     MIB.addImm(*BarValImm);
55955f757f3fSDimitry Andric 
55965f757f3fSDimitry Andric   I.eraseFromParent();
55975f757f3fSDimitry Andric   return true;
55985f757f3fSDimitry Andric }
55997a6dacacSDimitry Andric 
56005f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectSBarrierLeave(MachineInstr &I) const {
56015f757f3fSDimitry Andric   MachineBasicBlock *BB = I.getParent();
56025f757f3fSDimitry Andric   const DebugLoc &DL = I.getDebugLoc();
56035f757f3fSDimitry Andric   Register CCReg = I.getOperand(0).getReg();
56045f757f3fSDimitry Andric 
56055f757f3fSDimitry Andric   BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_BARRIER_LEAVE));
56065f757f3fSDimitry Andric   BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC);
56075f757f3fSDimitry Andric 
56085f757f3fSDimitry Andric   I.eraseFromParent();
56095f757f3fSDimitry Andric   return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
56105f757f3fSDimitry Andric                                       *MRI);
56115f757f3fSDimitry Andric }
56125f757f3fSDimitry Andric 
56138bcb0991SDimitry Andric void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
5614480093f4SDimitry Andric                                                  const MachineInstr &MI,
5615480093f4SDimitry Andric                                                  int OpIdx) const {
5616480093f4SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5617480093f4SDimitry Andric          "Expected G_CONSTANT");
56185ffd83dbSDimitry Andric   MIB.addImm(MI.getOperand(1).getCImm()->getSExtValue());
56198bcb0991SDimitry Andric }
5620480093f4SDimitry Andric 
5621480093f4SDimitry Andric void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB,
5622480093f4SDimitry Andric                                                 const MachineInstr &MI,
5623480093f4SDimitry Andric                                                 int OpIdx) const {
5624480093f4SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5625480093f4SDimitry Andric          "Expected G_CONSTANT");
5626480093f4SDimitry Andric   MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue());
5627480093f4SDimitry Andric }
5628480093f4SDimitry Andric 
5629480093f4SDimitry Andric void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB,
5630480093f4SDimitry Andric                                                  const MachineInstr &MI,
5631480093f4SDimitry Andric                                                  int OpIdx) const {
5632480093f4SDimitry Andric   assert(OpIdx == -1);
5633480093f4SDimitry Andric 
5634480093f4SDimitry Andric   const MachineOperand &Op = MI.getOperand(1);
5635480093f4SDimitry Andric   if (MI.getOpcode() == TargetOpcode::G_FCONSTANT)
5636480093f4SDimitry Andric     MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
5637480093f4SDimitry Andric   else {
5638480093f4SDimitry Andric     assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
5639480093f4SDimitry Andric     MIB.addImm(Op.getCImm()->getSExtValue());
5640480093f4SDimitry Andric   }
5641480093f4SDimitry Andric }
5642480093f4SDimitry Andric 
5643480093f4SDimitry Andric void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,
5644480093f4SDimitry Andric                                                 const MachineInstr &MI,
5645480093f4SDimitry Andric                                                 int OpIdx) const {
5646480093f4SDimitry Andric   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5647480093f4SDimitry Andric          "Expected G_CONSTANT");
564806c3fb27SDimitry Andric   MIB.addImm(MI.getOperand(1).getCImm()->getValue().popcount());
5649480093f4SDimitry Andric }
5650480093f4SDimitry Andric 
5651480093f4SDimitry Andric /// This only really exists to satisfy DAG type checking machinery, so is a
5652480093f4SDimitry Andric /// no-op here.
5653480093f4SDimitry Andric void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
5654480093f4SDimitry Andric                                                 const MachineInstr &MI,
5655480093f4SDimitry Andric                                                 int OpIdx) const {
5656480093f4SDimitry Andric   MIB.addImm(MI.getOperand(OpIdx).getImm());
5657480093f4SDimitry Andric }
5658480093f4SDimitry Andric 
565906c3fb27SDimitry Andric void AMDGPUInstructionSelector::renderOpSelTImm(MachineInstrBuilder &MIB,
566006c3fb27SDimitry Andric                                                 const MachineInstr &MI,
566106c3fb27SDimitry Andric                                                 int OpIdx) const {
566206c3fb27SDimitry Andric   assert(OpIdx >= 0 && "expected to match an immediate operand");
566306c3fb27SDimitry Andric   MIB.addImm(MI.getOperand(OpIdx).getImm() ? (int64_t)SISrcMods::OP_SEL_0 : 0);
566406c3fb27SDimitry Andric }
566506c3fb27SDimitry Andric 
5666fe6060f1SDimitry Andric void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,
56675ffd83dbSDimitry Andric                                                   const MachineInstr &MI,
56685ffd83dbSDimitry Andric                                                   int OpIdx) const {
56695ffd83dbSDimitry Andric   assert(OpIdx >= 0 && "expected to match an immediate operand");
56705f757f3fSDimitry Andric   MIB.addImm(MI.getOperand(OpIdx).getImm() &
56715f757f3fSDimitry Andric              (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::ALL
56725f757f3fSDimitry Andric                                        : AMDGPU::CPol::ALL_pregfx12));
56735ffd83dbSDimitry Andric }
56745ffd83dbSDimitry Andric 
56755ffd83dbSDimitry Andric void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
56765ffd83dbSDimitry Andric                                                  const MachineInstr &MI,
56775ffd83dbSDimitry Andric                                                  int OpIdx) const {
56785ffd83dbSDimitry Andric   assert(OpIdx >= 0 && "expected to match an immediate operand");
56795f757f3fSDimitry Andric   const bool Swizzle = MI.getOperand(OpIdx).getImm() &
56805f757f3fSDimitry Andric                        (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::SWZ
56815f757f3fSDimitry Andric                                                  : AMDGPU::CPol::SWZ_pregfx12);
56825f757f3fSDimitry Andric   MIB.addImm(Swizzle);
56835ffd83dbSDimitry Andric }
56845ffd83dbSDimitry Andric 
56857a6dacacSDimitry Andric void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
56867a6dacacSDimitry Andric     MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
5687fe6060f1SDimitry Andric   assert(OpIdx >= 0 && "expected to match an immediate operand");
56887a6dacacSDimitry Andric   const uint32_t Cpol = MI.getOperand(OpIdx).getImm() &
56897a6dacacSDimitry Andric                         (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::ALL
56907a6dacacSDimitry Andric                                                   : AMDGPU::CPol::ALL_pregfx12);
56917a6dacacSDimitry Andric   MIB.addImm(Cpol | AMDGPU::CPol::GLC);
5692fe6060f1SDimitry Andric }
5693fe6060f1SDimitry Andric 
5694e8d8bef9SDimitry Andric void AMDGPUInstructionSelector::renderFrameIndex(MachineInstrBuilder &MIB,
5695e8d8bef9SDimitry Andric                                                  const MachineInstr &MI,
5696e8d8bef9SDimitry Andric                                                  int OpIdx) const {
56975f757f3fSDimitry Andric   MIB.addFrameIndex(MI.getOperand(1).getIndex());
56985f757f3fSDimitry Andric }
56995f757f3fSDimitry Andric 
57005f757f3fSDimitry Andric void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB,
57015f757f3fSDimitry Andric                                                        const MachineInstr &MI,
57025f757f3fSDimitry Andric                                                        int OpIdx) const {
57035f757f3fSDimitry Andric   const APFloat &APF = MI.getOperand(1).getFPImm()->getValueAPF();
57045f757f3fSDimitry Andric   int ExpVal = APF.getExactLog2Abs();
57055f757f3fSDimitry Andric   assert(ExpVal != INT_MIN);
57065f757f3fSDimitry Andric   MIB.addImm(ExpVal);
5707e8d8bef9SDimitry Andric }
5708e8d8bef9SDimitry Andric 
5709*0fca6ea1SDimitry Andric bool AMDGPUInstructionSelector::isInlineImmediate(const APInt &Imm) const {
5710*0fca6ea1SDimitry Andric   return TII.isInlineConstant(Imm);
5711480093f4SDimitry Andric }
5712480093f4SDimitry Andric 
5713480093f4SDimitry Andric bool AMDGPUInstructionSelector::isInlineImmediate(const APFloat &Imm) const {
5714480093f4SDimitry Andric   return TII.isInlineConstant(Imm);
5715480093f4SDimitry Andric }
5716