10b57cec5SDimitry Andric //===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// \file 90b57cec5SDimitry Andric /// This file implements the targeting of the InstructionSelector class for 100b57cec5SDimitry Andric /// AMDGPU. 110b57cec5SDimitry Andric /// \todo This should be generated by TableGen. 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "AMDGPUInstructionSelector.h" 15e8d8bef9SDimitry Andric #include "AMDGPU.h" 16480093f4SDimitry Andric #include "AMDGPUGlobalISelUtils.h" 17e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h" 180b57cec5SDimitry Andric #include "AMDGPURegisterBankInfo.h" 190b57cec5SDimitry Andric #include "AMDGPUTargetMachine.h" 200b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 21fe6060f1SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 2206c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 238bcb0991SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 245f757f3fSDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 250b57cec5SDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 26e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 2781ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 28e8d8bef9SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 29349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 30bdd1243dSDimitry Andric #include <optional> 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric #define DEBUG_TYPE "amdgpu-isel" 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric using namespace llvm; 350b57cec5SDimitry Andric using namespace MIPatternMatch; 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric #define GET_GLOBALISEL_IMPL 380b57cec5SDimitry Andric #define AMDGPUSubtarget GCNSubtarget 390b57cec5SDimitry Andric #include "AMDGPUGenGlobalISel.inc" 400b57cec5SDimitry Andric #undef GET_GLOBALISEL_IMPL 410b57cec5SDimitry Andric #undef AMDGPUSubtarget 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric AMDGPUInstructionSelector::AMDGPUInstructionSelector( 440b57cec5SDimitry Andric const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, 450b57cec5SDimitry Andric const AMDGPUTargetMachine &TM) 4604eeddc0SDimitry Andric : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), 470b57cec5SDimitry Andric STI(STI), 480b57cec5SDimitry Andric EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), 490b57cec5SDimitry Andric #define GET_GLOBALISEL_PREDICATES_INIT 500b57cec5SDimitry Andric #include "AMDGPUGenGlobalISel.inc" 510b57cec5SDimitry Andric #undef GET_GLOBALISEL_PREDICATES_INIT 520b57cec5SDimitry Andric #define GET_GLOBALISEL_TEMPORARIES_INIT 530b57cec5SDimitry Andric #include "AMDGPUGenGlobalISel.inc" 540b57cec5SDimitry Andric #undef GET_GLOBALISEL_TEMPORARIES_INIT 550b57cec5SDimitry Andric { 560b57cec5SDimitry Andric } 570b57cec5SDimitry Andric 580b57cec5SDimitry Andric const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } 590b57cec5SDimitry Andric 60fe6060f1SDimitry Andric void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB, 6106c3fb27SDimitry Andric CodeGenCoverage *CoverageInfo, 62fe6060f1SDimitry Andric ProfileSummaryInfo *PSI, 63fe6060f1SDimitry Andric BlockFrequencyInfo *BFI) { 648bcb0991SDimitry Andric MRI = &MF.getRegInfo(); 65e8d8bef9SDimitry Andric Subtarget = &MF.getSubtarget<GCNSubtarget>(); 66*0fca6ea1SDimitry Andric Subtarget->checkSubtargetFeatures(MF.getFunction()); 67fe6060f1SDimitry Andric InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); 688bcb0991SDimitry Andric } 698bcb0991SDimitry Andric 705f757f3fSDimitry Andric // Return the wave level SGPR base address if this is a wave address. 715f757f3fSDimitry Andric static Register getWaveAddress(const MachineInstr *Def) { 725f757f3fSDimitry Andric return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS 735f757f3fSDimitry Andric ? Def->getOperand(1).getReg() 745f757f3fSDimitry Andric : Register(); 755f757f3fSDimitry Andric } 765f757f3fSDimitry Andric 770b57cec5SDimitry Andric bool AMDGPUInstructionSelector::isVCC(Register Reg, 780b57cec5SDimitry Andric const MachineRegisterInfo &MRI) const { 79e8d8bef9SDimitry Andric // The verifier is oblivious to s1 being a valid value for wavesize registers. 80e8d8bef9SDimitry Andric if (Reg.isPhysical()) 81e8d8bef9SDimitry Andric return false; 820b57cec5SDimitry Andric 830b57cec5SDimitry Andric auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 840b57cec5SDimitry Andric const TargetRegisterClass *RC = 850b57cec5SDimitry Andric RegClassOrBank.dyn_cast<const TargetRegisterClass*>(); 860b57cec5SDimitry Andric if (RC) { 870b57cec5SDimitry Andric const LLT Ty = MRI.getType(Reg); 8881ad6265SDimitry Andric if (!Ty.isValid() || Ty.getSizeInBits() != 1) 8981ad6265SDimitry Andric return false; 9081ad6265SDimitry Andric // G_TRUNC s1 result is never vcc. 9181ad6265SDimitry Andric return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC && 9281ad6265SDimitry Andric RC->hasSuperClassEq(TRI.getBoolRC()); 930b57cec5SDimitry Andric } 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>(); 960b57cec5SDimitry Andric return RB->getID() == AMDGPU::VCCRegBankID; 970b57cec5SDimitry Andric } 980b57cec5SDimitry Andric 995ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI, 1005ffd83dbSDimitry Andric unsigned NewOpc) const { 1015ffd83dbSDimitry Andric MI.setDesc(TII.get(NewOpc)); 10281ad6265SDimitry Andric MI.removeOperand(1); // Remove intrinsic ID. 1035ffd83dbSDimitry Andric MI.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 1045ffd83dbSDimitry Andric 1055ffd83dbSDimitry Andric MachineOperand &Dst = MI.getOperand(0); 1065ffd83dbSDimitry Andric MachineOperand &Src = MI.getOperand(1); 1075ffd83dbSDimitry Andric 1085ffd83dbSDimitry Andric // TODO: This should be legalized to s32 if needed 1095ffd83dbSDimitry Andric if (MRI->getType(Dst.getReg()) == LLT::scalar(1)) 1105ffd83dbSDimitry Andric return false; 1115ffd83dbSDimitry Andric 1125ffd83dbSDimitry Andric const TargetRegisterClass *DstRC 1135ffd83dbSDimitry Andric = TRI.getConstrainedRegClassForOperand(Dst, *MRI); 1145ffd83dbSDimitry Andric const TargetRegisterClass *SrcRC 1155ffd83dbSDimitry Andric = TRI.getConstrainedRegClassForOperand(Src, *MRI); 1165ffd83dbSDimitry Andric if (!DstRC || DstRC != SrcRC) 1175ffd83dbSDimitry Andric return false; 1185ffd83dbSDimitry Andric 1195ffd83dbSDimitry Andric return RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) && 1205ffd83dbSDimitry Andric RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI); 1215ffd83dbSDimitry Andric } 1225ffd83dbSDimitry Andric 1230b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { 1240b57cec5SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 1250b57cec5SDimitry Andric MachineBasicBlock *BB = I.getParent(); 1260b57cec5SDimitry Andric I.setDesc(TII.get(TargetOpcode::COPY)); 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric const MachineOperand &Src = I.getOperand(1); 1290b57cec5SDimitry Andric MachineOperand &Dst = I.getOperand(0); 1300b57cec5SDimitry Andric Register DstReg = Dst.getReg(); 1310b57cec5SDimitry Andric Register SrcReg = Src.getReg(); 1320b57cec5SDimitry Andric 1338bcb0991SDimitry Andric if (isVCC(DstReg, *MRI)) { 1340b57cec5SDimitry Andric if (SrcReg == AMDGPU::SCC) { 1350b57cec5SDimitry Andric const TargetRegisterClass *RC 1368bcb0991SDimitry Andric = TRI.getConstrainedRegClassForOperand(Dst, *MRI); 1370b57cec5SDimitry Andric if (!RC) 1380b57cec5SDimitry Andric return true; 1398bcb0991SDimitry Andric return RBI.constrainGenericRegister(DstReg, *RC, *MRI); 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric 1428bcb0991SDimitry Andric if (!isVCC(SrcReg, *MRI)) { 1430b57cec5SDimitry Andric // TODO: Should probably leave the copy and let copyPhysReg expand it. 1448bcb0991SDimitry Andric if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI)) 1450b57cec5SDimitry Andric return false; 1460b57cec5SDimitry Andric 147480093f4SDimitry Andric const TargetRegisterClass *SrcRC 148480093f4SDimitry Andric = TRI.getConstrainedRegClassForOperand(Src, *MRI); 149480093f4SDimitry Andric 150bdd1243dSDimitry Andric std::optional<ValueAndVReg> ConstVal = 151349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(SrcReg, *MRI, true); 152fe6060f1SDimitry Andric if (ConstVal) { 153fe6060f1SDimitry Andric unsigned MovOpc = 154fe6060f1SDimitry Andric STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; 155fe6060f1SDimitry Andric BuildMI(*BB, &I, DL, TII.get(MovOpc), DstReg) 156fe6060f1SDimitry Andric .addImm(ConstVal->Value.getBoolValue() ? -1 : 0); 157fe6060f1SDimitry Andric } else { 158480093f4SDimitry Andric Register MaskedReg = MRI->createVirtualRegister(SrcRC); 159480093f4SDimitry Andric 160480093f4SDimitry Andric // We can't trust the high bits at this point, so clear them. 161480093f4SDimitry Andric 162480093f4SDimitry Andric // TODO: Skip masking high bits if def is known boolean. 163480093f4SDimitry Andric 1645f757f3fSDimitry Andric bool IsSGPR = TRI.isSGPRClass(SrcRC); 165fe6060f1SDimitry Andric unsigned AndOpc = 1665f757f3fSDimitry Andric IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32; 1675f757f3fSDimitry Andric auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg) 168480093f4SDimitry Andric .addImm(1) 169480093f4SDimitry Andric .addReg(SrcReg); 1705f757f3fSDimitry Andric if (IsSGPR) 1715f757f3fSDimitry Andric And.setOperandDead(3); // Dead scc 1725f757f3fSDimitry Andric 1730b57cec5SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg) 1740b57cec5SDimitry Andric .addImm(0) 175480093f4SDimitry Andric .addReg(MaskedReg); 176fe6060f1SDimitry Andric } 1770b57cec5SDimitry Andric 1788bcb0991SDimitry Andric if (!MRI->getRegClassOrNull(SrcReg)) 179480093f4SDimitry Andric MRI->setRegClass(SrcReg, SrcRC); 1800b57cec5SDimitry Andric I.eraseFromParent(); 1810b57cec5SDimitry Andric return true; 1820b57cec5SDimitry Andric } 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric const TargetRegisterClass *RC = 1858bcb0991SDimitry Andric TRI.getConstrainedRegClassForOperand(Dst, *MRI); 1868bcb0991SDimitry Andric if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI)) 1870b57cec5SDimitry Andric return false; 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric return true; 1900b57cec5SDimitry Andric } 1910b57cec5SDimitry Andric 1920b57cec5SDimitry Andric for (const MachineOperand &MO : I.operands()) { 193e8d8bef9SDimitry Andric if (MO.getReg().isPhysical()) 1940b57cec5SDimitry Andric continue; 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric const TargetRegisterClass *RC = 1978bcb0991SDimitry Andric TRI.getConstrainedRegClassForOperand(MO, *MRI); 1980b57cec5SDimitry Andric if (!RC) 1990b57cec5SDimitry Andric continue; 2008bcb0991SDimitry Andric RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI); 2010b57cec5SDimitry Andric } 2020b57cec5SDimitry Andric return true; 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { 2060b57cec5SDimitry Andric const Register DefReg = I.getOperand(0).getReg(); 2078bcb0991SDimitry Andric const LLT DefTy = MRI->getType(DefReg); 2085ffd83dbSDimitry Andric 209*0fca6ea1SDimitry Andric // S1 G_PHIs should not be selected in instruction-select, instead: 210*0fca6ea1SDimitry Andric // - divergent S1 G_PHI should go through lane mask merging algorithm 211*0fca6ea1SDimitry Andric // and be fully inst-selected in AMDGPUGlobalISelDivergenceLowering 212*0fca6ea1SDimitry Andric // - uniform S1 G_PHI should be lowered into S32 G_PHI in AMDGPURegBankSelect 213*0fca6ea1SDimitry Andric if (DefTy == LLT::scalar(1)) 214*0fca6ea1SDimitry Andric return false; 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric // TODO: Verify this doesn't have insane operands (i.e. VGPR to SGPR copy) 2170b57cec5SDimitry Andric 2180b57cec5SDimitry Andric const RegClassOrRegBank &RegClassOrBank = 2198bcb0991SDimitry Andric MRI->getRegClassOrRegBank(DefReg); 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric const TargetRegisterClass *DefRC 2220b57cec5SDimitry Andric = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 2230b57cec5SDimitry Andric if (!DefRC) { 2240b57cec5SDimitry Andric if (!DefTy.isValid()) { 2250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 2260b57cec5SDimitry Andric return false; 2270b57cec5SDimitry Andric } 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 23081ad6265SDimitry Andric DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB); 2310b57cec5SDimitry Andric if (!DefRC) { 2320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 2330b57cec5SDimitry Andric return false; 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric } 2360b57cec5SDimitry Andric 237480093f4SDimitry Andric // TODO: Verify that all registers have the same bank 2380b57cec5SDimitry Andric I.setDesc(TII.get(TargetOpcode::PHI)); 2398bcb0991SDimitry Andric return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI); 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric MachineOperand 2430b57cec5SDimitry Andric AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, 2440b57cec5SDimitry Andric const TargetRegisterClass &SubRC, 2450b57cec5SDimitry Andric unsigned SubIdx) const { 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric MachineInstr *MI = MO.getParent(); 2480b57cec5SDimitry Andric MachineBasicBlock *BB = MO.getParent()->getParent(); 2498bcb0991SDimitry Andric Register DstReg = MRI->createVirtualRegister(&SubRC); 2500b57cec5SDimitry Andric 2510b57cec5SDimitry Andric if (MO.isReg()) { 2520b57cec5SDimitry Andric unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); 2538bcb0991SDimitry Andric Register Reg = MO.getReg(); 2540b57cec5SDimitry Andric BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) 2550b57cec5SDimitry Andric .addReg(Reg, 0, ComposedSubIdx); 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), 2580b57cec5SDimitry Andric MO.isKill(), MO.isDead(), MO.isUndef(), 2590b57cec5SDimitry Andric MO.isEarlyClobber(), 0, MO.isDebug(), 2600b57cec5SDimitry Andric MO.isInternalRead()); 2610b57cec5SDimitry Andric } 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric assert(MO.isImm()); 2640b57cec5SDimitry Andric 2650b57cec5SDimitry Andric APInt Imm(64, MO.getImm()); 2660b57cec5SDimitry Andric 2670b57cec5SDimitry Andric switch (SubIdx) { 2680b57cec5SDimitry Andric default: 2690b57cec5SDimitry Andric llvm_unreachable("do not know to split immediate with this sub index."); 2700b57cec5SDimitry Andric case AMDGPU::sub0: 2710b57cec5SDimitry Andric return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); 2720b57cec5SDimitry Andric case AMDGPU::sub1: 2730b57cec5SDimitry Andric return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); 2740b57cec5SDimitry Andric } 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric 2770b57cec5SDimitry Andric static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) { 2780b57cec5SDimitry Andric switch (Opc) { 2790b57cec5SDimitry Andric case AMDGPU::G_AND: 2800b57cec5SDimitry Andric return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32; 2810b57cec5SDimitry Andric case AMDGPU::G_OR: 2820b57cec5SDimitry Andric return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32; 2830b57cec5SDimitry Andric case AMDGPU::G_XOR: 2840b57cec5SDimitry Andric return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32; 2850b57cec5SDimitry Andric default: 2860b57cec5SDimitry Andric llvm_unreachable("not a bit op"); 2870b57cec5SDimitry Andric } 2880b57cec5SDimitry Andric } 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const { 291e8d8bef9SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 2928bcb0991SDimitry Andric unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); 2930b57cec5SDimitry Andric 2948bcb0991SDimitry Andric const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 295e8d8bef9SDimitry Andric if (DstRB->getID() != AMDGPU::SGPRRegBankID && 296e8d8bef9SDimitry Andric DstRB->getID() != AMDGPU::VCCRegBankID) 297e8d8bef9SDimitry Andric return false; 2980b57cec5SDimitry Andric 299e8d8bef9SDimitry Andric bool Is64 = Size > 32 || (DstRB->getID() == AMDGPU::VCCRegBankID && 300e8d8bef9SDimitry Andric STI.isWave64()); 301e8d8bef9SDimitry Andric I.setDesc(TII.get(getLogicalBitOpcode(I.getOpcode(), Is64))); 3020b57cec5SDimitry Andric 303480093f4SDimitry Andric // Dead implicit-def of scc 304480093f4SDimitry Andric I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef 305480093f4SDimitry Andric true, // isImp 306480093f4SDimitry Andric false, // isKill 307480093f4SDimitry Andric true)); // isDead 3088bcb0991SDimitry Andric return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3090b57cec5SDimitry Andric } 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const { 3120b57cec5SDimitry Andric MachineBasicBlock *BB = I.getParent(); 3130b57cec5SDimitry Andric MachineFunction *MF = BB->getParent(); 3140b57cec5SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 3150b57cec5SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 3165ffd83dbSDimitry Andric LLT Ty = MRI->getType(DstReg); 3175ffd83dbSDimitry Andric if (Ty.isVector()) 3185ffd83dbSDimitry Andric return false; 3195ffd83dbSDimitry Andric 3205ffd83dbSDimitry Andric unsigned Size = Ty.getSizeInBits(); 3218bcb0991SDimitry Andric const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 3220b57cec5SDimitry Andric const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID; 3230b57cec5SDimitry Andric const bool Sub = I.getOpcode() == TargetOpcode::G_SUB; 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric if (Size == 32) { 3260b57cec5SDimitry Andric if (IsSALU) { 3270b57cec5SDimitry Andric const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32; 3280b57cec5SDimitry Andric MachineInstr *Add = 3290b57cec5SDimitry Andric BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 3300b57cec5SDimitry Andric .add(I.getOperand(1)) 3315f757f3fSDimitry Andric .add(I.getOperand(2)) 3325f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 3330b57cec5SDimitry Andric I.eraseFromParent(); 3340b57cec5SDimitry Andric return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 3350b57cec5SDimitry Andric } 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric if (STI.hasAddNoCarry()) { 3380b57cec5SDimitry Andric const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64; 3390b57cec5SDimitry Andric I.setDesc(TII.get(Opc)); 3400b57cec5SDimitry Andric I.addOperand(*MF, MachineOperand::CreateImm(0)); 3410b57cec5SDimitry Andric I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 3420b57cec5SDimitry Andric return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3430b57cec5SDimitry Andric } 3440b57cec5SDimitry Andric 345e8d8bef9SDimitry Andric const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64; 3460b57cec5SDimitry Andric 3478bcb0991SDimitry Andric Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass()); 3480b57cec5SDimitry Andric MachineInstr *Add 3490b57cec5SDimitry Andric = BuildMI(*BB, &I, DL, TII.get(Opc), DstReg) 3500b57cec5SDimitry Andric .addDef(UnusedCarry, RegState::Dead) 3510b57cec5SDimitry Andric .add(I.getOperand(1)) 3520b57cec5SDimitry Andric .add(I.getOperand(2)) 3530b57cec5SDimitry Andric .addImm(0); 3540b57cec5SDimitry Andric I.eraseFromParent(); 3550b57cec5SDimitry Andric return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI); 3560b57cec5SDimitry Andric } 3570b57cec5SDimitry Andric 3580b57cec5SDimitry Andric assert(!Sub && "illegal sub should not reach here"); 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric const TargetRegisterClass &RC 3610b57cec5SDimitry Andric = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass; 3620b57cec5SDimitry Andric const TargetRegisterClass &HalfRC 3630b57cec5SDimitry Andric = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass; 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0)); 3660b57cec5SDimitry Andric MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0)); 3670b57cec5SDimitry Andric MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1)); 3680b57cec5SDimitry Andric MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1)); 3690b57cec5SDimitry Andric 3708bcb0991SDimitry Andric Register DstLo = MRI->createVirtualRegister(&HalfRC); 3718bcb0991SDimitry Andric Register DstHi = MRI->createVirtualRegister(&HalfRC); 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric if (IsSALU) { 3740b57cec5SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) 3750b57cec5SDimitry Andric .add(Lo1) 3760b57cec5SDimitry Andric .add(Lo2); 3770b57cec5SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) 3780b57cec5SDimitry Andric .add(Hi1) 3795f757f3fSDimitry Andric .add(Hi2) 3805f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 3810b57cec5SDimitry Andric } else { 3820b57cec5SDimitry Andric const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass(); 3838bcb0991SDimitry Andric Register CarryReg = MRI->createVirtualRegister(CarryRC); 384e8d8bef9SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo) 3850b57cec5SDimitry Andric .addDef(CarryReg) 3860b57cec5SDimitry Andric .add(Lo1) 3870b57cec5SDimitry Andric .add(Lo2) 3880b57cec5SDimitry Andric .addImm(0); 3890b57cec5SDimitry Andric MachineInstr *Addc = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi) 3908bcb0991SDimitry Andric .addDef(MRI->createVirtualRegister(CarryRC), RegState::Dead) 3910b57cec5SDimitry Andric .add(Hi1) 3920b57cec5SDimitry Andric .add(Hi2) 3930b57cec5SDimitry Andric .addReg(CarryReg, RegState::Kill) 3940b57cec5SDimitry Andric .addImm(0); 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric if (!constrainSelectedInstRegOperands(*Addc, TII, TRI, RBI)) 3970b57cec5SDimitry Andric return false; 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 4010b57cec5SDimitry Andric .addReg(DstLo) 4020b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 4030b57cec5SDimitry Andric .addReg(DstHi) 4040b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric 4078bcb0991SDimitry Andric if (!RBI.constrainGenericRegister(DstReg, RC, *MRI)) 4088bcb0991SDimitry Andric return false; 4098bcb0991SDimitry Andric 4108bcb0991SDimitry Andric I.eraseFromParent(); 4118bcb0991SDimitry Andric return true; 4128bcb0991SDimitry Andric } 4138bcb0991SDimitry Andric 414480093f4SDimitry Andric bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE( 415480093f4SDimitry Andric MachineInstr &I) const { 4168bcb0991SDimitry Andric MachineBasicBlock *BB = I.getParent(); 4178bcb0991SDimitry Andric MachineFunction *MF = BB->getParent(); 4188bcb0991SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 4198bcb0991SDimitry Andric Register Dst0Reg = I.getOperand(0).getReg(); 4208bcb0991SDimitry Andric Register Dst1Reg = I.getOperand(1).getReg(); 421480093f4SDimitry Andric const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO || 422480093f4SDimitry Andric I.getOpcode() == AMDGPU::G_UADDE; 423480093f4SDimitry Andric const bool HasCarryIn = I.getOpcode() == AMDGPU::G_UADDE || 424480093f4SDimitry Andric I.getOpcode() == AMDGPU::G_USUBE; 4258bcb0991SDimitry Andric 426480093f4SDimitry Andric if (isVCC(Dst1Reg, *MRI)) { 427e8d8bef9SDimitry Andric unsigned NoCarryOpc = 428e8d8bef9SDimitry Andric IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64; 429480093f4SDimitry Andric unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64; 430480093f4SDimitry Andric I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc)); 4318bcb0991SDimitry Andric I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 4328bcb0991SDimitry Andric I.addOperand(*MF, MachineOperand::CreateImm(0)); 4338bcb0991SDimitry Andric return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 4348bcb0991SDimitry Andric } 4358bcb0991SDimitry Andric 4368bcb0991SDimitry Andric Register Src0Reg = I.getOperand(2).getReg(); 4378bcb0991SDimitry Andric Register Src1Reg = I.getOperand(3).getReg(); 438480093f4SDimitry Andric 439480093f4SDimitry Andric if (HasCarryIn) { 440480093f4SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 441480093f4SDimitry Andric .addReg(I.getOperand(4).getReg()); 442480093f4SDimitry Andric } 443480093f4SDimitry Andric 444480093f4SDimitry Andric unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; 445480093f4SDimitry Andric unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; 446480093f4SDimitry Andric 4475f757f3fSDimitry Andric auto CarryInst = BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg) 4488bcb0991SDimitry Andric .add(I.getOperand(2)) 4498bcb0991SDimitry Andric .add(I.getOperand(3)); 4505f757f3fSDimitry Andric 4515f757f3fSDimitry Andric if (MRI->use_nodbg_empty(Dst1Reg)) { 4525f757f3fSDimitry Andric CarryInst.setOperandDead(3); // Dead scc 4535f757f3fSDimitry Andric } else { 4548bcb0991SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg) 4558bcb0991SDimitry Andric .addReg(AMDGPU::SCC); 456480093f4SDimitry Andric if (!MRI->getRegClassOrNull(Dst1Reg)) 457480093f4SDimitry Andric MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass); 4585f757f3fSDimitry Andric } 4598bcb0991SDimitry Andric 460480093f4SDimitry Andric if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) || 461480093f4SDimitry Andric !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) || 462480093f4SDimitry Andric !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI)) 463480093f4SDimitry Andric return false; 464480093f4SDimitry Andric 465480093f4SDimitry Andric if (HasCarryIn && 466480093f4SDimitry Andric !RBI.constrainGenericRegister(I.getOperand(4).getReg(), 467480093f4SDimitry Andric AMDGPU::SReg_32RegClass, *MRI)) 4680b57cec5SDimitry Andric return false; 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric I.eraseFromParent(); 4710b57cec5SDimitry Andric return true; 4720b57cec5SDimitry Andric } 4730b57cec5SDimitry Andric 47481ad6265SDimitry Andric bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32( 47581ad6265SDimitry Andric MachineInstr &I) const { 47681ad6265SDimitry Andric MachineBasicBlock *BB = I.getParent(); 47781ad6265SDimitry Andric MachineFunction *MF = BB->getParent(); 47881ad6265SDimitry Andric const bool IsUnsigned = I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32; 47981ad6265SDimitry Andric 48081ad6265SDimitry Andric unsigned Opc; 481bdd1243dSDimitry Andric if (Subtarget->hasMADIntraFwdBug()) 48281ad6265SDimitry Andric Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64 48381ad6265SDimitry Andric : AMDGPU::V_MAD_I64_I32_gfx11_e64; 48481ad6265SDimitry Andric else 48581ad6265SDimitry Andric Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64; 48681ad6265SDimitry Andric I.setDesc(TII.get(Opc)); 48781ad6265SDimitry Andric I.addOperand(*MF, MachineOperand::CreateImm(0)); 48881ad6265SDimitry Andric I.addImplicitDefUseOperands(*MF); 48981ad6265SDimitry Andric return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 49081ad6265SDimitry Andric } 49181ad6265SDimitry Andric 4925ffd83dbSDimitry Andric // TODO: We should probably legalize these to only using 32-bit results. 4930b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { 4940b57cec5SDimitry Andric MachineBasicBlock *BB = I.getParent(); 495480093f4SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 496480093f4SDimitry Andric Register SrcReg = I.getOperand(1).getReg(); 497480093f4SDimitry Andric LLT DstTy = MRI->getType(DstReg); 498480093f4SDimitry Andric LLT SrcTy = MRI->getType(SrcReg); 499480093f4SDimitry Andric const unsigned SrcSize = SrcTy.getSizeInBits(); 5005ffd83dbSDimitry Andric unsigned DstSize = DstTy.getSizeInBits(); 501480093f4SDimitry Andric 502480093f4SDimitry Andric // TODO: Should handle any multiple of 32 offset. 5038bcb0991SDimitry Andric unsigned Offset = I.getOperand(2).getImm(); 5045ffd83dbSDimitry Andric if (Offset % 32 != 0 || DstSize > 128) 5055ffd83dbSDimitry Andric return false; 5065ffd83dbSDimitry Andric 5075ffd83dbSDimitry Andric // 16-bit operations really use 32-bit registers. 5085ffd83dbSDimitry Andric // FIXME: Probably should not allow 16-bit G_EXTRACT results. 5095ffd83dbSDimitry Andric if (DstSize == 16) 5105ffd83dbSDimitry Andric DstSize = 32; 5115ffd83dbSDimitry Andric 5125ffd83dbSDimitry Andric const TargetRegisterClass *DstRC = 5135ffd83dbSDimitry Andric TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI); 5145ffd83dbSDimitry Andric if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) 5158bcb0991SDimitry Andric return false; 5168bcb0991SDimitry Andric 517480093f4SDimitry Andric const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI); 518480093f4SDimitry Andric const TargetRegisterClass *SrcRC = 51981ad6265SDimitry Andric TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank); 520480093f4SDimitry Andric if (!SrcRC) 521480093f4SDimitry Andric return false; 5225ffd83dbSDimitry Andric unsigned SubReg = SIRegisterInfo::getSubRegFromChannel(Offset / 32, 5235ffd83dbSDimitry Andric DstSize / 32); 5245ffd83dbSDimitry Andric SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg); 5255ffd83dbSDimitry Andric if (!SrcRC) 5265ffd83dbSDimitry Andric return false; 527480093f4SDimitry Andric 5285ffd83dbSDimitry Andric SrcReg = constrainOperandRegClass(*MF, TRI, *MRI, TII, RBI, I, 5295ffd83dbSDimitry Andric *SrcRC, I.getOperand(1)); 5300b57cec5SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 5315ffd83dbSDimitry Andric BuildMI(*BB, &I, DL, TII.get(TargetOpcode::COPY), DstReg) 5325ffd83dbSDimitry Andric .addReg(SrcReg, 0, SubReg); 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric I.eraseFromParent(); 5350b57cec5SDimitry Andric return true; 5360b57cec5SDimitry Andric } 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { 5390b57cec5SDimitry Andric MachineBasicBlock *BB = MI.getParent(); 5400b57cec5SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 5418bcb0991SDimitry Andric LLT DstTy = MRI->getType(DstReg); 5428bcb0991SDimitry Andric LLT SrcTy = MRI->getType(MI.getOperand(1).getReg()); 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric const unsigned SrcSize = SrcTy.getSizeInBits(); 5450b57cec5SDimitry Andric if (SrcSize < 32) 546480093f4SDimitry Andric return selectImpl(MI, *CoverageInfo); 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 5498bcb0991SDimitry Andric const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); 5500b57cec5SDimitry Andric const unsigned DstSize = DstTy.getSizeInBits(); 5510b57cec5SDimitry Andric const TargetRegisterClass *DstRC = 55281ad6265SDimitry Andric TRI.getRegClassForSizeOnBank(DstSize, *DstBank); 5530b57cec5SDimitry Andric if (!DstRC) 5540b57cec5SDimitry Andric return false; 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8); 5570b57cec5SDimitry Andric MachineInstrBuilder MIB = 5580b57cec5SDimitry Andric BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg); 5590b57cec5SDimitry Andric for (int I = 0, E = MI.getNumOperands() - 1; I != E; ++I) { 5600b57cec5SDimitry Andric MachineOperand &Src = MI.getOperand(I + 1); 5610b57cec5SDimitry Andric MIB.addReg(Src.getReg(), getUndefRegState(Src.isUndef())); 5620b57cec5SDimitry Andric MIB.addImm(SubRegs[I]); 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric const TargetRegisterClass *SrcRC 5658bcb0991SDimitry Andric = TRI.getConstrainedRegClassForOperand(Src, *MRI); 5668bcb0991SDimitry Andric if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI)) 5670b57cec5SDimitry Andric return false; 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric 5708bcb0991SDimitry Andric if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) 5710b57cec5SDimitry Andric return false; 5720b57cec5SDimitry Andric 5730b57cec5SDimitry Andric MI.eraseFromParent(); 5740b57cec5SDimitry Andric return true; 5750b57cec5SDimitry Andric } 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(MachineInstr &MI) const { 5780b57cec5SDimitry Andric MachineBasicBlock *BB = MI.getParent(); 5790b57cec5SDimitry Andric const int NumDst = MI.getNumOperands() - 1; 5800b57cec5SDimitry Andric 5810b57cec5SDimitry Andric MachineOperand &Src = MI.getOperand(NumDst); 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric Register SrcReg = Src.getReg(); 5840b57cec5SDimitry Andric Register DstReg0 = MI.getOperand(0).getReg(); 5858bcb0991SDimitry Andric LLT DstTy = MRI->getType(DstReg0); 5868bcb0991SDimitry Andric LLT SrcTy = MRI->getType(SrcReg); 5870b57cec5SDimitry Andric 5880b57cec5SDimitry Andric const unsigned DstSize = DstTy.getSizeInBits(); 5890b57cec5SDimitry Andric const unsigned SrcSize = SrcTy.getSizeInBits(); 5900b57cec5SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 5918bcb0991SDimitry Andric const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI); 5920b57cec5SDimitry Andric 5930b57cec5SDimitry Andric const TargetRegisterClass *SrcRC = 59481ad6265SDimitry Andric TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank); 5958bcb0991SDimitry Andric if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) 5960b57cec5SDimitry Andric return false; 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric // Note we could have mixed SGPR and VGPR destination banks for an SGPR 5990b57cec5SDimitry Andric // source, and this relies on the fact that the same subregister indices are 6000b57cec5SDimitry Andric // used for both. 6010b57cec5SDimitry Andric ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8); 6020b57cec5SDimitry Andric for (int I = 0, E = NumDst; I != E; ++I) { 6030b57cec5SDimitry Andric MachineOperand &Dst = MI.getOperand(I); 6040b57cec5SDimitry Andric BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::COPY), Dst.getReg()) 605e8d8bef9SDimitry Andric .addReg(SrcReg, 0, SubRegs[I]); 606e8d8bef9SDimitry Andric 607e8d8bef9SDimitry Andric // Make sure the subregister index is valid for the source register. 608e8d8bef9SDimitry Andric SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[I]); 609e8d8bef9SDimitry Andric if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI)) 610e8d8bef9SDimitry Andric return false; 6110b57cec5SDimitry Andric 6120b57cec5SDimitry Andric const TargetRegisterClass *DstRC = 6138bcb0991SDimitry Andric TRI.getConstrainedRegClassForOperand(Dst, *MRI); 6148bcb0991SDimitry Andric if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI)) 6150b57cec5SDimitry Andric return false; 6160b57cec5SDimitry Andric } 6170b57cec5SDimitry Andric 6180b57cec5SDimitry Andric MI.eraseFromParent(); 6190b57cec5SDimitry Andric return true; 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric 622bdd1243dSDimitry Andric bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const { 623bdd1243dSDimitry Andric assert(MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC || 624bdd1243dSDimitry Andric MI.getOpcode() == AMDGPU::G_BUILD_VECTOR); 6255ffd83dbSDimitry Andric 6265ffd83dbSDimitry Andric Register Src0 = MI.getOperand(1).getReg(); 6275ffd83dbSDimitry Andric Register Src1 = MI.getOperand(2).getReg(); 628bdd1243dSDimitry Andric LLT SrcTy = MRI->getType(Src0); 629bdd1243dSDimitry Andric const unsigned SrcSize = SrcTy.getSizeInBits(); 630bdd1243dSDimitry Andric 631bdd1243dSDimitry Andric // BUILD_VECTOR with >=32 bits source is handled by MERGE_VALUE. 632bdd1243dSDimitry Andric if (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) { 633bdd1243dSDimitry Andric return selectG_MERGE_VALUES(MI); 634bdd1243dSDimitry Andric } 635bdd1243dSDimitry Andric 636bdd1243dSDimitry Andric // Selection logic below is for V2S16 only. 637bdd1243dSDimitry Andric // For G_BUILD_VECTOR_TRUNC, additionally check that the operands are s32. 638bdd1243dSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 639bdd1243dSDimitry Andric if (MRI->getType(Dst) != LLT::fixed_vector(2, 16) || 640bdd1243dSDimitry Andric (MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC && 641bdd1243dSDimitry Andric SrcTy != LLT::scalar(32))) 642bdd1243dSDimitry Andric return selectImpl(MI, *CoverageInfo); 643bdd1243dSDimitry Andric 644bdd1243dSDimitry Andric const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI); 645bdd1243dSDimitry Andric if (DstBank->getID() == AMDGPU::AGPRRegBankID) 6465ffd83dbSDimitry Andric return false; 6475ffd83dbSDimitry Andric 648bdd1243dSDimitry Andric assert(DstBank->getID() == AMDGPU::SGPRRegBankID || 649bdd1243dSDimitry Andric DstBank->getID() == AMDGPU::VGPRRegBankID); 650bdd1243dSDimitry Andric const bool IsVector = DstBank->getID() == AMDGPU::VGPRRegBankID; 651bdd1243dSDimitry Andric 6525ffd83dbSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 6535ffd83dbSDimitry Andric MachineBasicBlock *BB = MI.getParent(); 6545ffd83dbSDimitry Andric 655bdd1243dSDimitry Andric // First, before trying TableGen patterns, check if both sources are 656bdd1243dSDimitry Andric // constants. In those cases, we can trivially compute the final constant 657bdd1243dSDimitry Andric // and emit a simple move. 658349cc55cSDimitry Andric auto ConstSrc1 = getAnyConstantVRegValWithLookThrough(Src1, *MRI, true, true); 659e8d8bef9SDimitry Andric if (ConstSrc1) { 660e8d8bef9SDimitry Andric auto ConstSrc0 = 661349cc55cSDimitry Andric getAnyConstantVRegValWithLookThrough(Src0, *MRI, true, true); 662e8d8bef9SDimitry Andric if (ConstSrc0) { 663e8d8bef9SDimitry Andric const int64_t K0 = ConstSrc0->Value.getSExtValue(); 664e8d8bef9SDimitry Andric const int64_t K1 = ConstSrc1->Value.getSExtValue(); 665e8d8bef9SDimitry Andric uint32_t Lo16 = static_cast<uint32_t>(K0) & 0xffff; 666e8d8bef9SDimitry Andric uint32_t Hi16 = static_cast<uint32_t>(K1) & 0xffff; 667bdd1243dSDimitry Andric uint32_t Imm = Lo16 | (Hi16 << 16); 668e8d8bef9SDimitry Andric 669bdd1243dSDimitry Andric // VALU 670bdd1243dSDimitry Andric if (IsVector) { 671bdd1243dSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), Dst).addImm(Imm); 672bdd1243dSDimitry Andric MI.eraseFromParent(); 673bdd1243dSDimitry Andric return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI); 674bdd1243dSDimitry Andric } 675bdd1243dSDimitry Andric 676bdd1243dSDimitry Andric // SALU 677bdd1243dSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst).addImm(Imm); 678e8d8bef9SDimitry Andric MI.eraseFromParent(); 679e8d8bef9SDimitry Andric return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI); 680e8d8bef9SDimitry Andric } 681e8d8bef9SDimitry Andric } 682e8d8bef9SDimitry Andric 683bdd1243dSDimitry Andric // Now try TableGen patterns. 684bdd1243dSDimitry Andric if (selectImpl(MI, *CoverageInfo)) 685bdd1243dSDimitry Andric return true; 686bdd1243dSDimitry Andric 6875ffd83dbSDimitry Andric // TODO: This should probably be a combine somewhere 688bdd1243dSDimitry Andric // (build_vector $src0, undef) -> copy $src0 6895ffd83dbSDimitry Andric MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI); 690bdd1243dSDimitry Andric if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) { 6915ffd83dbSDimitry Andric MI.setDesc(TII.get(AMDGPU::COPY)); 69281ad6265SDimitry Andric MI.removeOperand(2); 693bdd1243dSDimitry Andric const auto &RC = 694bdd1243dSDimitry Andric IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; 695bdd1243dSDimitry Andric return RBI.constrainGenericRegister(Dst, RC, *MRI) && 696bdd1243dSDimitry Andric RBI.constrainGenericRegister(Src0, RC, *MRI); 697bdd1243dSDimitry Andric } 698bdd1243dSDimitry Andric 699bdd1243dSDimitry Andric // TODO: Can be improved? 700bdd1243dSDimitry Andric if (IsVector) { 701bdd1243dSDimitry Andric Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); 702bdd1243dSDimitry Andric auto MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg) 703bdd1243dSDimitry Andric .addImm(0xFFFF) 704bdd1243dSDimitry Andric .addReg(Src0); 705bdd1243dSDimitry Andric if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI)) 706bdd1243dSDimitry Andric return false; 707bdd1243dSDimitry Andric 708bdd1243dSDimitry Andric MIB = BuildMI(*BB, MI, DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst) 709bdd1243dSDimitry Andric .addReg(Src1) 710bdd1243dSDimitry Andric .addImm(16) 711bdd1243dSDimitry Andric .addReg(TmpReg); 712bdd1243dSDimitry Andric if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI)) 713bdd1243dSDimitry Andric return false; 714bdd1243dSDimitry Andric 715bdd1243dSDimitry Andric MI.eraseFromParent(); 716bdd1243dSDimitry Andric return true; 7175ffd83dbSDimitry Andric } 7185ffd83dbSDimitry Andric 7195ffd83dbSDimitry Andric Register ShiftSrc0; 7205ffd83dbSDimitry Andric Register ShiftSrc1; 7215ffd83dbSDimitry Andric 7225ffd83dbSDimitry Andric // With multiple uses of the shift, this will duplicate the shift and 7235ffd83dbSDimitry Andric // increase register pressure. 7245ffd83dbSDimitry Andric // 725bdd1243dSDimitry Andric // (build_vector (lshr_oneuse $src0, 16), (lshr_oneuse $src1, 16) 7265ffd83dbSDimitry Andric // => (S_PACK_HH_B32_B16 $src0, $src1) 727bdd1243dSDimitry Andric // (build_vector (lshr_oneuse SReg_32:$src0, 16), $src1) 72881ad6265SDimitry Andric // => (S_PACK_HL_B32_B16 $src0, $src1) 729bdd1243dSDimitry Andric // (build_vector $src0, (lshr_oneuse SReg_32:$src1, 16)) 7305ffd83dbSDimitry Andric // => (S_PACK_LH_B32_B16 $src0, $src1) 731bdd1243dSDimitry Andric // (build_vector $src0, $src1) 7325ffd83dbSDimitry Andric // => (S_PACK_LL_B32_B16 $src0, $src1) 7335ffd83dbSDimitry Andric 7345ffd83dbSDimitry Andric bool Shift0 = mi_match( 735e8d8bef9SDimitry Andric Src0, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc0), m_SpecificICst(16)))); 7365ffd83dbSDimitry Andric 7375ffd83dbSDimitry Andric bool Shift1 = mi_match( 738e8d8bef9SDimitry Andric Src1, *MRI, m_OneUse(m_GLShr(m_Reg(ShiftSrc1), m_SpecificICst(16)))); 7395ffd83dbSDimitry Andric 7405ffd83dbSDimitry Andric unsigned Opc = AMDGPU::S_PACK_LL_B32_B16; 7415ffd83dbSDimitry Andric if (Shift0 && Shift1) { 7425ffd83dbSDimitry Andric Opc = AMDGPU::S_PACK_HH_B32_B16; 7435ffd83dbSDimitry Andric MI.getOperand(1).setReg(ShiftSrc0); 7445ffd83dbSDimitry Andric MI.getOperand(2).setReg(ShiftSrc1); 7455ffd83dbSDimitry Andric } else if (Shift1) { 7465ffd83dbSDimitry Andric Opc = AMDGPU::S_PACK_LH_B32_B16; 7475ffd83dbSDimitry Andric MI.getOperand(2).setReg(ShiftSrc1); 74881ad6265SDimitry Andric } else if (Shift0) { 749bdd1243dSDimitry Andric auto ConstSrc1 = 750bdd1243dSDimitry Andric getAnyConstantVRegValWithLookThrough(Src1, *MRI, true, true); 75181ad6265SDimitry Andric if (ConstSrc1 && ConstSrc1->Value == 0) { 7525ffd83dbSDimitry Andric // build_vector_trunc (lshr $src0, 16), 0 -> s_lshr_b32 $src0, 16 7535ffd83dbSDimitry Andric auto MIB = BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_LSHR_B32), Dst) 7545ffd83dbSDimitry Andric .addReg(ShiftSrc0) 7555f757f3fSDimitry Andric .addImm(16) 7565f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 7575ffd83dbSDimitry Andric 7585ffd83dbSDimitry Andric MI.eraseFromParent(); 7595ffd83dbSDimitry Andric return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 7605ffd83dbSDimitry Andric } 76181ad6265SDimitry Andric if (STI.hasSPackHL()) { 76281ad6265SDimitry Andric Opc = AMDGPU::S_PACK_HL_B32_B16; 76381ad6265SDimitry Andric MI.getOperand(1).setReg(ShiftSrc0); 76481ad6265SDimitry Andric } 76581ad6265SDimitry Andric } 7665ffd83dbSDimitry Andric 7675ffd83dbSDimitry Andric MI.setDesc(TII.get(Opc)); 7685ffd83dbSDimitry Andric return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); 7695ffd83dbSDimitry Andric } 7705ffd83dbSDimitry Andric 7710b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const { 7720b57cec5SDimitry Andric const MachineOperand &MO = I.getOperand(0); 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric // FIXME: Interface for getConstrainedRegClassForOperand needs work. The 7750b57cec5SDimitry Andric // regbank check here is to know why getConstrainedRegClassForOperand failed. 7768bcb0991SDimitry Andric const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI); 7778bcb0991SDimitry Andric if ((!RC && !MRI->getRegBankOrNull(MO.getReg())) || 7788bcb0991SDimitry Andric (RC && RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI))) { 7790b57cec5SDimitry Andric I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 7800b57cec5SDimitry Andric return true; 7810b57cec5SDimitry Andric } 7820b57cec5SDimitry Andric 7830b57cec5SDimitry Andric return false; 7840b57cec5SDimitry Andric } 7850b57cec5SDimitry Andric 7860b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { 7870b57cec5SDimitry Andric MachineBasicBlock *BB = I.getParent(); 7888bcb0991SDimitry Andric 7898bcb0991SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 7908bcb0991SDimitry Andric Register Src0Reg = I.getOperand(1).getReg(); 7918bcb0991SDimitry Andric Register Src1Reg = I.getOperand(2).getReg(); 7928bcb0991SDimitry Andric LLT Src1Ty = MRI->getType(Src1Reg); 7938bcb0991SDimitry Andric 7948bcb0991SDimitry Andric unsigned DstSize = MRI->getType(DstReg).getSizeInBits(); 7958bcb0991SDimitry Andric unsigned InsSize = Src1Ty.getSizeInBits(); 7968bcb0991SDimitry Andric 7978bcb0991SDimitry Andric int64_t Offset = I.getOperand(3).getImm(); 7985ffd83dbSDimitry Andric 7995ffd83dbSDimitry Andric // FIXME: These cases should have been illegal and unnecessary to check here. 8005ffd83dbSDimitry Andric if (Offset % 32 != 0 || InsSize % 32 != 0) 8018bcb0991SDimitry Andric return false; 8028bcb0991SDimitry Andric 803e8d8bef9SDimitry Andric // Currently not handled by getSubRegFromChannel. 804e8d8bef9SDimitry Andric if (InsSize > 128) 805e8d8bef9SDimitry Andric return false; 806e8d8bef9SDimitry Andric 8078bcb0991SDimitry Andric unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32); 8088bcb0991SDimitry Andric if (SubReg == AMDGPU::NoSubRegister) 8098bcb0991SDimitry Andric return false; 8108bcb0991SDimitry Andric 8118bcb0991SDimitry Andric const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); 8128bcb0991SDimitry Andric const TargetRegisterClass *DstRC = 81381ad6265SDimitry Andric TRI.getRegClassForSizeOnBank(DstSize, *DstBank); 8148bcb0991SDimitry Andric if (!DstRC) 8158bcb0991SDimitry Andric return false; 8168bcb0991SDimitry Andric 8178bcb0991SDimitry Andric const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI); 8188bcb0991SDimitry Andric const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI); 8198bcb0991SDimitry Andric const TargetRegisterClass *Src0RC = 82081ad6265SDimitry Andric TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank); 8218bcb0991SDimitry Andric const TargetRegisterClass *Src1RC = 82281ad6265SDimitry Andric TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank); 8238bcb0991SDimitry Andric 8248bcb0991SDimitry Andric // Deal with weird cases where the class only partially supports the subreg 8258bcb0991SDimitry Andric // index. 8268bcb0991SDimitry Andric Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg); 8275ffd83dbSDimitry Andric if (!Src0RC || !Src1RC) 8288bcb0991SDimitry Andric return false; 8298bcb0991SDimitry Andric 8308bcb0991SDimitry Andric if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || 8318bcb0991SDimitry Andric !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) || 8328bcb0991SDimitry Andric !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI)) 8338bcb0991SDimitry Andric return false; 8348bcb0991SDimitry Andric 8358bcb0991SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 8368bcb0991SDimitry Andric BuildMI(*BB, &I, DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg) 8378bcb0991SDimitry Andric .addReg(Src0Reg) 8388bcb0991SDimitry Andric .addReg(Src1Reg) 8390b57cec5SDimitry Andric .addImm(SubReg); 8400b57cec5SDimitry Andric 8410b57cec5SDimitry Andric I.eraseFromParent(); 8420b57cec5SDimitry Andric return true; 8430b57cec5SDimitry Andric } 8440b57cec5SDimitry Andric 845fe6060f1SDimitry Andric bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(MachineInstr &MI) const { 846fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 847fe6060f1SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 848fe6060f1SDimitry Andric Register OffsetReg = MI.getOperand(2).getReg(); 849fe6060f1SDimitry Andric Register WidthReg = MI.getOperand(3).getReg(); 850fe6060f1SDimitry Andric 851fe6060f1SDimitry Andric assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID && 852fe6060f1SDimitry Andric "scalar BFX instructions are expanded in regbankselect"); 853fe6060f1SDimitry Andric assert(MRI->getType(MI.getOperand(0).getReg()).getSizeInBits() == 32 && 854fe6060f1SDimitry Andric "64-bit vector BFX instructions are expanded in regbankselect"); 855fe6060f1SDimitry Andric 856fe6060f1SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 857fe6060f1SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 858fe6060f1SDimitry Andric 859fe6060f1SDimitry Andric bool IsSigned = MI.getOpcode() == TargetOpcode::G_SBFX; 860fe6060f1SDimitry Andric unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64; 861fe6060f1SDimitry Andric auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), DstReg) 862fe6060f1SDimitry Andric .addReg(SrcReg) 863fe6060f1SDimitry Andric .addReg(OffsetReg) 864fe6060f1SDimitry Andric .addReg(WidthReg); 865fe6060f1SDimitry Andric MI.eraseFromParent(); 866fe6060f1SDimitry Andric return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 867fe6060f1SDimitry Andric } 868fe6060f1SDimitry Andric 8695ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const { 8705ffd83dbSDimitry Andric if (STI.getLDSBankCount() != 16) 8715ffd83dbSDimitry Andric return selectImpl(MI, *CoverageInfo); 8725ffd83dbSDimitry Andric 8735ffd83dbSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 8745ffd83dbSDimitry Andric Register Src0 = MI.getOperand(2).getReg(); 8755ffd83dbSDimitry Andric Register M0Val = MI.getOperand(6).getReg(); 8765ffd83dbSDimitry Andric if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) || 8775ffd83dbSDimitry Andric !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) || 8785ffd83dbSDimitry Andric !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI)) 8795ffd83dbSDimitry Andric return false; 8805ffd83dbSDimitry Andric 8815ffd83dbSDimitry Andric // This requires 2 instructions. It is possible to write a pattern to support 8825ffd83dbSDimitry Andric // this, but the generated isel emitter doesn't correctly deal with multiple 8835ffd83dbSDimitry Andric // output instructions using the same physical register input. The copy to m0 8845ffd83dbSDimitry Andric // is incorrectly placed before the second instruction. 8855ffd83dbSDimitry Andric // 8865ffd83dbSDimitry Andric // TODO: Match source modifiers. 8875ffd83dbSDimitry Andric 8885ffd83dbSDimitry Andric Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); 8895ffd83dbSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 8905ffd83dbSDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 8915ffd83dbSDimitry Andric 8925ffd83dbSDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) 8935ffd83dbSDimitry Andric .addReg(M0Val); 8945ffd83dbSDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov) 8955ffd83dbSDimitry Andric .addImm(2) 8965ffd83dbSDimitry Andric .addImm(MI.getOperand(4).getImm()) // $attr 8975ffd83dbSDimitry Andric .addImm(MI.getOperand(3).getImm()); // $attrchan 8985ffd83dbSDimitry Andric 8995ffd83dbSDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_INTERP_P1LV_F16), Dst) 9005ffd83dbSDimitry Andric .addImm(0) // $src0_modifiers 9015ffd83dbSDimitry Andric .addReg(Src0) // $src0 9025ffd83dbSDimitry Andric .addImm(MI.getOperand(4).getImm()) // $attr 9035ffd83dbSDimitry Andric .addImm(MI.getOperand(3).getImm()) // $attrchan 9045ffd83dbSDimitry Andric .addImm(0) // $src2_modifiers 9055ffd83dbSDimitry Andric .addReg(InterpMov) // $src2 - 2 f16 values selected by high 9065ffd83dbSDimitry Andric .addImm(MI.getOperand(5).getImm()) // $high 9075ffd83dbSDimitry Andric .addImm(0) // $clamp 9085ffd83dbSDimitry Andric .addImm(0); // $omod 9095ffd83dbSDimitry Andric 9105ffd83dbSDimitry Andric MI.eraseFromParent(); 9115ffd83dbSDimitry Andric return true; 9125ffd83dbSDimitry Andric } 9135ffd83dbSDimitry Andric 914e8d8bef9SDimitry Andric // Writelane is special in that it can use SGPR and M0 (which would normally 915e8d8bef9SDimitry Andric // count as using the constant bus twice - but in this case it is allowed since 916e8d8bef9SDimitry Andric // the lane selector doesn't count as a use of the constant bus). However, it is 917e8d8bef9SDimitry Andric // still required to abide by the 1 SGPR rule. Fix this up if we might have 918e8d8bef9SDimitry Andric // multiple SGPRs. 919e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const { 920e8d8bef9SDimitry Andric // With a constant bus limit of at least 2, there's no issue. 921e8d8bef9SDimitry Andric if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1) 922e8d8bef9SDimitry Andric return selectImpl(MI, *CoverageInfo); 923e8d8bef9SDimitry Andric 924e8d8bef9SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 925e8d8bef9SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 926e8d8bef9SDimitry Andric Register VDst = MI.getOperand(0).getReg(); 927e8d8bef9SDimitry Andric Register Val = MI.getOperand(2).getReg(); 928e8d8bef9SDimitry Andric Register LaneSelect = MI.getOperand(3).getReg(); 929e8d8bef9SDimitry Andric Register VDstIn = MI.getOperand(4).getReg(); 930e8d8bef9SDimitry Andric 931e8d8bef9SDimitry Andric auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst); 932e8d8bef9SDimitry Andric 933bdd1243dSDimitry Andric std::optional<ValueAndVReg> ConstSelect = 934349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(LaneSelect, *MRI); 935e8d8bef9SDimitry Andric if (ConstSelect) { 936e8d8bef9SDimitry Andric // The selector has to be an inline immediate, so we can use whatever for 937e8d8bef9SDimitry Andric // the other operands. 938e8d8bef9SDimitry Andric MIB.addReg(Val); 939e8d8bef9SDimitry Andric MIB.addImm(ConstSelect->Value.getSExtValue() & 940e8d8bef9SDimitry Andric maskTrailingOnes<uint64_t>(STI.getWavefrontSizeLog2())); 941e8d8bef9SDimitry Andric } else { 942bdd1243dSDimitry Andric std::optional<ValueAndVReg> ConstVal = 943349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(Val, *MRI); 944e8d8bef9SDimitry Andric 945e8d8bef9SDimitry Andric // If the value written is an inline immediate, we can get away without a 946e8d8bef9SDimitry Andric // copy to m0. 947e8d8bef9SDimitry Andric if (ConstVal && AMDGPU::isInlinableLiteral32(ConstVal->Value.getSExtValue(), 948e8d8bef9SDimitry Andric STI.hasInv2PiInlineImm())) { 949e8d8bef9SDimitry Andric MIB.addImm(ConstVal->Value.getSExtValue()); 950e8d8bef9SDimitry Andric MIB.addReg(LaneSelect); 951e8d8bef9SDimitry Andric } else { 952e8d8bef9SDimitry Andric MIB.addReg(Val); 953e8d8bef9SDimitry Andric 954e8d8bef9SDimitry Andric // If the lane selector was originally in a VGPR and copied with 955e8d8bef9SDimitry Andric // readfirstlane, there's a hazard to read the same SGPR from the 956e8d8bef9SDimitry Andric // VALU. Constrain to a different SGPR to help avoid needing a nop later. 957e8d8bef9SDimitry Andric RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI); 958e8d8bef9SDimitry Andric 959e8d8bef9SDimitry Andric BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) 960e8d8bef9SDimitry Andric .addReg(LaneSelect); 961e8d8bef9SDimitry Andric MIB.addReg(AMDGPU::M0); 962e8d8bef9SDimitry Andric } 963e8d8bef9SDimitry Andric } 964e8d8bef9SDimitry Andric 965e8d8bef9SDimitry Andric MIB.addReg(VDstIn); 966e8d8bef9SDimitry Andric 967e8d8bef9SDimitry Andric MI.eraseFromParent(); 968e8d8bef9SDimitry Andric return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 969e8d8bef9SDimitry Andric } 970e8d8bef9SDimitry Andric 9715ffd83dbSDimitry Andric // We need to handle this here because tablegen doesn't support matching 9725ffd83dbSDimitry Andric // instructions with multiple outputs. 9735ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const { 9745ffd83dbSDimitry Andric Register Dst0 = MI.getOperand(0).getReg(); 9755ffd83dbSDimitry Andric Register Dst1 = MI.getOperand(1).getReg(); 9765ffd83dbSDimitry Andric 9775ffd83dbSDimitry Andric LLT Ty = MRI->getType(Dst0); 9785ffd83dbSDimitry Andric unsigned Opc; 9795ffd83dbSDimitry Andric if (Ty == LLT::scalar(32)) 980e8d8bef9SDimitry Andric Opc = AMDGPU::V_DIV_SCALE_F32_e64; 9815ffd83dbSDimitry Andric else if (Ty == LLT::scalar(64)) 982e8d8bef9SDimitry Andric Opc = AMDGPU::V_DIV_SCALE_F64_e64; 9835ffd83dbSDimitry Andric else 9845ffd83dbSDimitry Andric return false; 9855ffd83dbSDimitry Andric 986e8d8bef9SDimitry Andric // TODO: Match source modifiers. 987e8d8bef9SDimitry Andric 9885ffd83dbSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 9895ffd83dbSDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 9905ffd83dbSDimitry Andric 9915ffd83dbSDimitry Andric Register Numer = MI.getOperand(3).getReg(); 9925ffd83dbSDimitry Andric Register Denom = MI.getOperand(4).getReg(); 9935ffd83dbSDimitry Andric unsigned ChooseDenom = MI.getOperand(5).getImm(); 9945ffd83dbSDimitry Andric 9955ffd83dbSDimitry Andric Register Src0 = ChooseDenom != 0 ? Numer : Denom; 9965ffd83dbSDimitry Andric 9975ffd83dbSDimitry Andric auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0) 9985ffd83dbSDimitry Andric .addDef(Dst1) 999e8d8bef9SDimitry Andric .addImm(0) // $src0_modifiers 1000e8d8bef9SDimitry Andric .addUse(Src0) // $src0 1001e8d8bef9SDimitry Andric .addImm(0) // $src1_modifiers 1002e8d8bef9SDimitry Andric .addUse(Denom) // $src1 1003e8d8bef9SDimitry Andric .addImm(0) // $src2_modifiers 1004e8d8bef9SDimitry Andric .addUse(Numer) // $src2 1005e8d8bef9SDimitry Andric .addImm(0) // $clamp 1006e8d8bef9SDimitry Andric .addImm(0); // $omod 10075ffd83dbSDimitry Andric 10085ffd83dbSDimitry Andric MI.eraseFromParent(); 10095ffd83dbSDimitry Andric return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 10105ffd83dbSDimitry Andric } 10115ffd83dbSDimitry Andric 10128bcb0991SDimitry Andric bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { 1013*0fca6ea1SDimitry Andric Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID(); 10140b57cec5SDimitry Andric switch (IntrinsicID) { 10150b57cec5SDimitry Andric case Intrinsic::amdgcn_if_break: { 10160b57cec5SDimitry Andric MachineBasicBlock *BB = I.getParent(); 10170b57cec5SDimitry Andric 1018349cc55cSDimitry Andric // FIXME: Manually selecting to avoid dealing with the SReg_1 trick 10190b57cec5SDimitry Andric // SelectionDAG uses for wave32 vs wave64. 10200b57cec5SDimitry Andric BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK)) 10210b57cec5SDimitry Andric .add(I.getOperand(0)) 10220b57cec5SDimitry Andric .add(I.getOperand(2)) 10230b57cec5SDimitry Andric .add(I.getOperand(3)); 10240b57cec5SDimitry Andric 10250b57cec5SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 10260b57cec5SDimitry Andric Register Src0Reg = I.getOperand(2).getReg(); 10270b57cec5SDimitry Andric Register Src1Reg = I.getOperand(3).getReg(); 10280b57cec5SDimitry Andric 10290b57cec5SDimitry Andric I.eraseFromParent(); 10300b57cec5SDimitry Andric 10318bcb0991SDimitry Andric for (Register Reg : { DstReg, Src0Reg, Src1Reg }) 10328bcb0991SDimitry Andric MRI->setRegClass(Reg, TRI.getWaveMaskRegClass()); 10330b57cec5SDimitry Andric 10340b57cec5SDimitry Andric return true; 10350b57cec5SDimitry Andric } 10365ffd83dbSDimitry Andric case Intrinsic::amdgcn_interp_p1_f16: 10375ffd83dbSDimitry Andric return selectInterpP1F16(I); 10385ffd83dbSDimitry Andric case Intrinsic::amdgcn_wqm: 10395ffd83dbSDimitry Andric return constrainCopyLikeIntrin(I, AMDGPU::WQM); 10405ffd83dbSDimitry Andric case Intrinsic::amdgcn_softwqm: 10415ffd83dbSDimitry Andric return constrainCopyLikeIntrin(I, AMDGPU::SOFT_WQM); 1042fe6060f1SDimitry Andric case Intrinsic::amdgcn_strict_wwm: 10435ffd83dbSDimitry Andric case Intrinsic::amdgcn_wwm: 1044fe6060f1SDimitry Andric return constrainCopyLikeIntrin(I, AMDGPU::STRICT_WWM); 1045fe6060f1SDimitry Andric case Intrinsic::amdgcn_strict_wqm: 1046fe6060f1SDimitry Andric return constrainCopyLikeIntrin(I, AMDGPU::STRICT_WQM); 1047e8d8bef9SDimitry Andric case Intrinsic::amdgcn_writelane: 1048e8d8bef9SDimitry Andric return selectWritelane(I); 10495ffd83dbSDimitry Andric case Intrinsic::amdgcn_div_scale: 10505ffd83dbSDimitry Andric return selectDivScale(I); 10515ffd83dbSDimitry Andric case Intrinsic::amdgcn_icmp: 1052bdd1243dSDimitry Andric case Intrinsic::amdgcn_fcmp: 1053bdd1243dSDimitry Andric if (selectImpl(I, *CoverageInfo)) 1054bdd1243dSDimitry Andric return true; 1055bdd1243dSDimitry Andric return selectIntrinsicCmp(I); 10565ffd83dbSDimitry Andric case Intrinsic::amdgcn_ballot: 10575ffd83dbSDimitry Andric return selectBallot(I); 1058e8d8bef9SDimitry Andric case Intrinsic::amdgcn_reloc_constant: 1059e8d8bef9SDimitry Andric return selectRelocConstant(I); 1060e8d8bef9SDimitry Andric case Intrinsic::amdgcn_groupstaticsize: 1061e8d8bef9SDimitry Andric return selectGroupStaticSize(I); 1062e8d8bef9SDimitry Andric case Intrinsic::returnaddress: 1063e8d8bef9SDimitry Andric return selectReturnAddress(I); 106481ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16: 106581ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16: 106681ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16: 106781ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16: 106881ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8: 106981ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: 1070fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8: 1071fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8: 1072fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8: 1073fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8: 1074fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8: 1075fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8: 1076fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8: 1077fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: 107881ad6265SDimitry Andric return selectSMFMACIntrin(I); 10790b57cec5SDimitry Andric default: 10808bcb0991SDimitry Andric return selectImpl(I, *CoverageInfo); 10810b57cec5SDimitry Andric } 10820b57cec5SDimitry Andric } 10830b57cec5SDimitry Andric 1084bdd1243dSDimitry Andric static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, 1085bdd1243dSDimitry Andric const GCNSubtarget &ST) { 1086bdd1243dSDimitry Andric if (Size != 16 && Size != 32 && Size != 64) 10870b57cec5SDimitry Andric return -1; 1088bdd1243dSDimitry Andric 1089bdd1243dSDimitry Andric if (Size == 16 && !ST.has16BitInsts()) 1090bdd1243dSDimitry Andric return -1; 1091bdd1243dSDimitry Andric 1092bdd1243dSDimitry Andric const auto Select = [&](unsigned S16Opc, unsigned TrueS16Opc, unsigned S32Opc, 1093bdd1243dSDimitry Andric unsigned S64Opc) { 1094bdd1243dSDimitry Andric if (Size == 16) 1095bdd1243dSDimitry Andric return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc; 1096bdd1243dSDimitry Andric if (Size == 32) 1097bdd1243dSDimitry Andric return S32Opc; 1098bdd1243dSDimitry Andric return S64Opc; 1099bdd1243dSDimitry Andric }; 1100bdd1243dSDimitry Andric 11010b57cec5SDimitry Andric switch (P) { 11020b57cec5SDimitry Andric default: 11030b57cec5SDimitry Andric llvm_unreachable("Unknown condition code!"); 11040b57cec5SDimitry Andric case CmpInst::ICMP_NE: 1105bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64, 1106bdd1243dSDimitry Andric AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64); 11070b57cec5SDimitry Andric case CmpInst::ICMP_EQ: 1108bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64, 1109bdd1243dSDimitry Andric AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64); 11100b57cec5SDimitry Andric case CmpInst::ICMP_SGT: 1111bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64, 1112bdd1243dSDimitry Andric AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64); 11130b57cec5SDimitry Andric case CmpInst::ICMP_SGE: 1114bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64, 1115bdd1243dSDimitry Andric AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64); 11160b57cec5SDimitry Andric case CmpInst::ICMP_SLT: 1117bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64, 1118bdd1243dSDimitry Andric AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64); 11190b57cec5SDimitry Andric case CmpInst::ICMP_SLE: 1120bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64, 1121bdd1243dSDimitry Andric AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64); 11220b57cec5SDimitry Andric case CmpInst::ICMP_UGT: 1123bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64, 1124bdd1243dSDimitry Andric AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64); 11250b57cec5SDimitry Andric case CmpInst::ICMP_UGE: 1126bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64, 1127bdd1243dSDimitry Andric AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64); 11280b57cec5SDimitry Andric case CmpInst::ICMP_ULT: 1129bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64, 1130bdd1243dSDimitry Andric AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64); 11310b57cec5SDimitry Andric case CmpInst::ICMP_ULE: 1132bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64, 1133bdd1243dSDimitry Andric AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64); 1134bdd1243dSDimitry Andric 1135bdd1243dSDimitry Andric case CmpInst::FCMP_OEQ: 1136bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64, 1137bdd1243dSDimitry Andric AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64); 1138bdd1243dSDimitry Andric case CmpInst::FCMP_OGT: 1139bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64, 1140bdd1243dSDimitry Andric AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64); 1141bdd1243dSDimitry Andric case CmpInst::FCMP_OGE: 1142bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64, 1143bdd1243dSDimitry Andric AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64); 1144bdd1243dSDimitry Andric case CmpInst::FCMP_OLT: 1145bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64, 1146bdd1243dSDimitry Andric AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64); 1147bdd1243dSDimitry Andric case CmpInst::FCMP_OLE: 1148bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64, 1149bdd1243dSDimitry Andric AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64); 1150bdd1243dSDimitry Andric case CmpInst::FCMP_ONE: 1151bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64, 1152bdd1243dSDimitry Andric AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64); 1153bdd1243dSDimitry Andric case CmpInst::FCMP_ORD: 1154bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64, 1155bdd1243dSDimitry Andric AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64); 1156bdd1243dSDimitry Andric case CmpInst::FCMP_UNO: 1157bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64, 1158bdd1243dSDimitry Andric AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64); 1159bdd1243dSDimitry Andric case CmpInst::FCMP_UEQ: 1160bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64, 1161bdd1243dSDimitry Andric AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64); 1162bdd1243dSDimitry Andric case CmpInst::FCMP_UGT: 1163bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64, 1164bdd1243dSDimitry Andric AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64); 1165bdd1243dSDimitry Andric case CmpInst::FCMP_UGE: 1166bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64, 1167bdd1243dSDimitry Andric AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64); 1168bdd1243dSDimitry Andric case CmpInst::FCMP_ULT: 1169bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64, 1170bdd1243dSDimitry Andric AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64); 1171bdd1243dSDimitry Andric case CmpInst::FCMP_ULE: 1172bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64, 1173bdd1243dSDimitry Andric AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64); 1174bdd1243dSDimitry Andric case CmpInst::FCMP_UNE: 1175bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64, 1176bdd1243dSDimitry Andric AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64); 1177bdd1243dSDimitry Andric case CmpInst::FCMP_TRUE: 1178bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64, 1179bdd1243dSDimitry Andric AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64); 1180bdd1243dSDimitry Andric case CmpInst::FCMP_FALSE: 1181bdd1243dSDimitry Andric return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64, 1182bdd1243dSDimitry Andric AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64); 11830b57cec5SDimitry Andric } 11840b57cec5SDimitry Andric } 11850b57cec5SDimitry Andric 11860b57cec5SDimitry Andric int AMDGPUInstructionSelector::getS_CMPOpcode(CmpInst::Predicate P, 11870b57cec5SDimitry Andric unsigned Size) const { 11880b57cec5SDimitry Andric if (Size == 64) { 11890b57cec5SDimitry Andric if (!STI.hasScalarCompareEq64()) 11900b57cec5SDimitry Andric return -1; 11910b57cec5SDimitry Andric 11920b57cec5SDimitry Andric switch (P) { 11930b57cec5SDimitry Andric case CmpInst::ICMP_NE: 11940b57cec5SDimitry Andric return AMDGPU::S_CMP_LG_U64; 11950b57cec5SDimitry Andric case CmpInst::ICMP_EQ: 11960b57cec5SDimitry Andric return AMDGPU::S_CMP_EQ_U64; 11970b57cec5SDimitry Andric default: 11980b57cec5SDimitry Andric return -1; 11990b57cec5SDimitry Andric } 12000b57cec5SDimitry Andric } 12010b57cec5SDimitry Andric 12025f757f3fSDimitry Andric if (Size == 32) { 12030b57cec5SDimitry Andric switch (P) { 12040b57cec5SDimitry Andric case CmpInst::ICMP_NE: 12050b57cec5SDimitry Andric return AMDGPU::S_CMP_LG_U32; 12060b57cec5SDimitry Andric case CmpInst::ICMP_EQ: 12070b57cec5SDimitry Andric return AMDGPU::S_CMP_EQ_U32; 12080b57cec5SDimitry Andric case CmpInst::ICMP_SGT: 12090b57cec5SDimitry Andric return AMDGPU::S_CMP_GT_I32; 12100b57cec5SDimitry Andric case CmpInst::ICMP_SGE: 12110b57cec5SDimitry Andric return AMDGPU::S_CMP_GE_I32; 12120b57cec5SDimitry Andric case CmpInst::ICMP_SLT: 12130b57cec5SDimitry Andric return AMDGPU::S_CMP_LT_I32; 12140b57cec5SDimitry Andric case CmpInst::ICMP_SLE: 12150b57cec5SDimitry Andric return AMDGPU::S_CMP_LE_I32; 12160b57cec5SDimitry Andric case CmpInst::ICMP_UGT: 12170b57cec5SDimitry Andric return AMDGPU::S_CMP_GT_U32; 12180b57cec5SDimitry Andric case CmpInst::ICMP_UGE: 12190b57cec5SDimitry Andric return AMDGPU::S_CMP_GE_U32; 12200b57cec5SDimitry Andric case CmpInst::ICMP_ULT: 12210b57cec5SDimitry Andric return AMDGPU::S_CMP_LT_U32; 12220b57cec5SDimitry Andric case CmpInst::ICMP_ULE: 12230b57cec5SDimitry Andric return AMDGPU::S_CMP_LE_U32; 12245f757f3fSDimitry Andric case CmpInst::FCMP_OEQ: 12255f757f3fSDimitry Andric return AMDGPU::S_CMP_EQ_F32; 12265f757f3fSDimitry Andric case CmpInst::FCMP_OGT: 12275f757f3fSDimitry Andric return AMDGPU::S_CMP_GT_F32; 12285f757f3fSDimitry Andric case CmpInst::FCMP_OGE: 12295f757f3fSDimitry Andric return AMDGPU::S_CMP_GE_F32; 12305f757f3fSDimitry Andric case CmpInst::FCMP_OLT: 12315f757f3fSDimitry Andric return AMDGPU::S_CMP_LT_F32; 12325f757f3fSDimitry Andric case CmpInst::FCMP_OLE: 12335f757f3fSDimitry Andric return AMDGPU::S_CMP_LE_F32; 12345f757f3fSDimitry Andric case CmpInst::FCMP_ONE: 12355f757f3fSDimitry Andric return AMDGPU::S_CMP_LG_F32; 12365f757f3fSDimitry Andric case CmpInst::FCMP_ORD: 12375f757f3fSDimitry Andric return AMDGPU::S_CMP_O_F32; 12385f757f3fSDimitry Andric case CmpInst::FCMP_UNO: 12395f757f3fSDimitry Andric return AMDGPU::S_CMP_U_F32; 12405f757f3fSDimitry Andric case CmpInst::FCMP_UEQ: 12415f757f3fSDimitry Andric return AMDGPU::S_CMP_NLG_F32; 12425f757f3fSDimitry Andric case CmpInst::FCMP_UGT: 12435f757f3fSDimitry Andric return AMDGPU::S_CMP_NLE_F32; 12445f757f3fSDimitry Andric case CmpInst::FCMP_UGE: 12455f757f3fSDimitry Andric return AMDGPU::S_CMP_NLT_F32; 12465f757f3fSDimitry Andric case CmpInst::FCMP_ULT: 12475f757f3fSDimitry Andric return AMDGPU::S_CMP_NGE_F32; 12485f757f3fSDimitry Andric case CmpInst::FCMP_ULE: 12495f757f3fSDimitry Andric return AMDGPU::S_CMP_NGT_F32; 12505f757f3fSDimitry Andric case CmpInst::FCMP_UNE: 12515f757f3fSDimitry Andric return AMDGPU::S_CMP_NEQ_F32; 12520b57cec5SDimitry Andric default: 12530b57cec5SDimitry Andric llvm_unreachable("Unknown condition code!"); 12540b57cec5SDimitry Andric } 12550b57cec5SDimitry Andric } 12560b57cec5SDimitry Andric 12575f757f3fSDimitry Andric if (Size == 16) { 12585f757f3fSDimitry Andric if (!STI.hasSALUFloatInsts()) 12595f757f3fSDimitry Andric return -1; 12605f757f3fSDimitry Andric 12615f757f3fSDimitry Andric switch (P) { 12625f757f3fSDimitry Andric case CmpInst::FCMP_OEQ: 12635f757f3fSDimitry Andric return AMDGPU::S_CMP_EQ_F16; 12645f757f3fSDimitry Andric case CmpInst::FCMP_OGT: 12655f757f3fSDimitry Andric return AMDGPU::S_CMP_GT_F16; 12665f757f3fSDimitry Andric case CmpInst::FCMP_OGE: 12675f757f3fSDimitry Andric return AMDGPU::S_CMP_GE_F16; 12685f757f3fSDimitry Andric case CmpInst::FCMP_OLT: 12695f757f3fSDimitry Andric return AMDGPU::S_CMP_LT_F16; 12705f757f3fSDimitry Andric case CmpInst::FCMP_OLE: 12715f757f3fSDimitry Andric return AMDGPU::S_CMP_LE_F16; 12725f757f3fSDimitry Andric case CmpInst::FCMP_ONE: 12735f757f3fSDimitry Andric return AMDGPU::S_CMP_LG_F16; 12745f757f3fSDimitry Andric case CmpInst::FCMP_ORD: 12755f757f3fSDimitry Andric return AMDGPU::S_CMP_O_F16; 12765f757f3fSDimitry Andric case CmpInst::FCMP_UNO: 12775f757f3fSDimitry Andric return AMDGPU::S_CMP_U_F16; 12785f757f3fSDimitry Andric case CmpInst::FCMP_UEQ: 12795f757f3fSDimitry Andric return AMDGPU::S_CMP_NLG_F16; 12805f757f3fSDimitry Andric case CmpInst::FCMP_UGT: 12815f757f3fSDimitry Andric return AMDGPU::S_CMP_NLE_F16; 12825f757f3fSDimitry Andric case CmpInst::FCMP_UGE: 12835f757f3fSDimitry Andric return AMDGPU::S_CMP_NLT_F16; 12845f757f3fSDimitry Andric case CmpInst::FCMP_ULT: 12855f757f3fSDimitry Andric return AMDGPU::S_CMP_NGE_F16; 12865f757f3fSDimitry Andric case CmpInst::FCMP_ULE: 12875f757f3fSDimitry Andric return AMDGPU::S_CMP_NGT_F16; 12885f757f3fSDimitry Andric case CmpInst::FCMP_UNE: 12895f757f3fSDimitry Andric return AMDGPU::S_CMP_NEQ_F16; 12905f757f3fSDimitry Andric default: 12915f757f3fSDimitry Andric llvm_unreachable("Unknown condition code!"); 12925f757f3fSDimitry Andric } 12935f757f3fSDimitry Andric } 12945f757f3fSDimitry Andric 12955f757f3fSDimitry Andric return -1; 12965f757f3fSDimitry Andric } 12975f757f3fSDimitry Andric 12985f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(MachineInstr &I) const { 12995f757f3fSDimitry Andric 13000b57cec5SDimitry Andric MachineBasicBlock *BB = I.getParent(); 13010b57cec5SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 13020b57cec5SDimitry Andric 13038bcb0991SDimitry Andric Register SrcReg = I.getOperand(2).getReg(); 13048bcb0991SDimitry Andric unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI); 13050b57cec5SDimitry Andric 13060b57cec5SDimitry Andric auto Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 13070b57cec5SDimitry Andric 13088bcb0991SDimitry Andric Register CCReg = I.getOperand(0).getReg(); 1309480093f4SDimitry Andric if (!isVCC(CCReg, *MRI)) { 13100b57cec5SDimitry Andric int Opcode = getS_CMPOpcode(Pred, Size); 13110b57cec5SDimitry Andric if (Opcode == -1) 13120b57cec5SDimitry Andric return false; 13130b57cec5SDimitry Andric MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode)) 13140b57cec5SDimitry Andric .add(I.getOperand(2)) 13150b57cec5SDimitry Andric .add(I.getOperand(3)); 13160b57cec5SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg) 13170b57cec5SDimitry Andric .addReg(AMDGPU::SCC); 13180b57cec5SDimitry Andric bool Ret = 13190b57cec5SDimitry Andric constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) && 13208bcb0991SDimitry Andric RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI); 13210b57cec5SDimitry Andric I.eraseFromParent(); 13220b57cec5SDimitry Andric return Ret; 13230b57cec5SDimitry Andric } 13240b57cec5SDimitry Andric 13255f757f3fSDimitry Andric if (I.getOpcode() == AMDGPU::G_FCMP) 13265f757f3fSDimitry Andric return false; 13275f757f3fSDimitry Andric 1328bdd1243dSDimitry Andric int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget); 13290b57cec5SDimitry Andric if (Opcode == -1) 13300b57cec5SDimitry Andric return false; 13310b57cec5SDimitry Andric 13320b57cec5SDimitry Andric MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode), 13330b57cec5SDimitry Andric I.getOperand(0).getReg()) 13340b57cec5SDimitry Andric .add(I.getOperand(2)) 13350b57cec5SDimitry Andric .add(I.getOperand(3)); 13360b57cec5SDimitry Andric RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(), 13378bcb0991SDimitry Andric *TRI.getBoolRC(), *MRI); 13380b57cec5SDimitry Andric bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI); 13390b57cec5SDimitry Andric I.eraseFromParent(); 13400b57cec5SDimitry Andric return Ret; 13410b57cec5SDimitry Andric } 13420b57cec5SDimitry Andric 1343bdd1243dSDimitry Andric bool AMDGPUInstructionSelector::selectIntrinsicCmp(MachineInstr &I) const { 13445ffd83dbSDimitry Andric Register Dst = I.getOperand(0).getReg(); 13455ffd83dbSDimitry Andric if (isVCC(Dst, *MRI)) 13465ffd83dbSDimitry Andric return false; 13475ffd83dbSDimitry Andric 1348bdd1243dSDimitry Andric LLT DstTy = MRI->getType(Dst); 1349bdd1243dSDimitry Andric if (DstTy.getSizeInBits() != STI.getWavefrontSize()) 13505ffd83dbSDimitry Andric return false; 13515ffd83dbSDimitry Andric 13525ffd83dbSDimitry Andric MachineBasicBlock *BB = I.getParent(); 13535ffd83dbSDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 13545ffd83dbSDimitry Andric Register SrcReg = I.getOperand(2).getReg(); 13555ffd83dbSDimitry Andric unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI); 135604eeddc0SDimitry Andric 1357bdd1243dSDimitry Andric // i1 inputs are not supported in GlobalISel. 1358bdd1243dSDimitry Andric if (Size == 1) 135904eeddc0SDimitry Andric return false; 1360bdd1243dSDimitry Andric 1361bdd1243dSDimitry Andric auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(4).getImm()); 1362bdd1243dSDimitry Andric if (!CmpInst::isIntPredicate(Pred) && !CmpInst::isFPPredicate(Pred)) { 1363bdd1243dSDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst); 136404eeddc0SDimitry Andric I.eraseFromParent(); 1365bdd1243dSDimitry Andric return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI); 136604eeddc0SDimitry Andric } 13675ffd83dbSDimitry Andric 1368bdd1243dSDimitry Andric const int Opcode = getV_CMPOpcode(Pred, Size, *Subtarget); 13695ffd83dbSDimitry Andric if (Opcode == -1) 13705ffd83dbSDimitry Andric return false; 13715ffd83dbSDimitry Andric 137206c3fb27SDimitry Andric MachineInstrBuilder SelectedMI; 1373bdd1243dSDimitry Andric MachineOperand &LHS = I.getOperand(2); 1374bdd1243dSDimitry Andric MachineOperand &RHS = I.getOperand(3); 1375bdd1243dSDimitry Andric auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS); 1376bdd1243dSDimitry Andric auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS); 1377bdd1243dSDimitry Andric Register Src0Reg = 1378bdd1243dSDimitry Andric copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &I, /*ForceVGPR*/ true); 1379bdd1243dSDimitry Andric Register Src1Reg = 1380bdd1243dSDimitry Andric copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &I, /*ForceVGPR*/ true); 138106c3fb27SDimitry Andric SelectedMI = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst); 138206c3fb27SDimitry Andric if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers)) 138306c3fb27SDimitry Andric SelectedMI.addImm(Src0Mods); 138406c3fb27SDimitry Andric SelectedMI.addReg(Src0Reg); 138506c3fb27SDimitry Andric if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1_modifiers)) 138606c3fb27SDimitry Andric SelectedMI.addImm(Src1Mods); 138706c3fb27SDimitry Andric SelectedMI.addReg(Src1Reg); 138806c3fb27SDimitry Andric if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::clamp)) 138906c3fb27SDimitry Andric SelectedMI.addImm(0); // clamp 139006c3fb27SDimitry Andric if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel)) 139106c3fb27SDimitry Andric SelectedMI.addImm(0); // op_sel 1392bdd1243dSDimitry Andric 1393bdd1243dSDimitry Andric RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI); 1394bdd1243dSDimitry Andric if (!constrainSelectedInstRegOperands(*SelectedMI, TII, TRI, RBI)) 1395bdd1243dSDimitry Andric return false; 1396bdd1243dSDimitry Andric 13975ffd83dbSDimitry Andric I.eraseFromParent(); 1398bdd1243dSDimitry Andric return true; 13990b57cec5SDimitry Andric } 14000b57cec5SDimitry Andric 14015ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const { 14025ffd83dbSDimitry Andric MachineBasicBlock *BB = I.getParent(); 14035ffd83dbSDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 14045ffd83dbSDimitry Andric Register DstReg = I.getOperand(0).getReg(); 14055ffd83dbSDimitry Andric const unsigned Size = MRI->getType(DstReg).getSizeInBits(); 14065ffd83dbSDimitry Andric const bool Is64 = Size == 64; 140706c3fb27SDimitry Andric const bool IsWave32 = (STI.getWavefrontSize() == 32); 14080b57cec5SDimitry Andric 140906c3fb27SDimitry Andric // In the common case, the return type matches the wave size. 141006c3fb27SDimitry Andric // However we also support emitting i64 ballots in wave32 mode. 141106c3fb27SDimitry Andric if (Size != STI.getWavefrontSize() && (!Is64 || !IsWave32)) 14125ffd83dbSDimitry Andric return false; 14138bcb0991SDimitry Andric 1414bdd1243dSDimitry Andric std::optional<ValueAndVReg> Arg = 1415349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI); 14168bcb0991SDimitry Andric 141706c3fb27SDimitry Andric const auto BuildCopy = [&](Register SrcReg) { 141806c3fb27SDimitry Andric if (Size == STI.getWavefrontSize()) { 141906c3fb27SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg) 142006c3fb27SDimitry Andric .addReg(SrcReg); 142106c3fb27SDimitry Andric return; 142206c3fb27SDimitry Andric } 142306c3fb27SDimitry Andric 142406c3fb27SDimitry Andric // If emitting a i64 ballot in wave32, fill the upper bits with zeroes. 142506c3fb27SDimitry Andric Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 142606c3fb27SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg).addImm(0); 142706c3fb27SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 142806c3fb27SDimitry Andric .addReg(SrcReg) 142906c3fb27SDimitry Andric .addImm(AMDGPU::sub0) 143006c3fb27SDimitry Andric .addReg(HiReg) 143106c3fb27SDimitry Andric .addImm(AMDGPU::sub1); 143206c3fb27SDimitry Andric }; 143306c3fb27SDimitry Andric 143481ad6265SDimitry Andric if (Arg) { 1435bdd1243dSDimitry Andric const int64_t Value = Arg->Value.getSExtValue(); 14365ffd83dbSDimitry Andric if (Value == 0) { 14375ffd83dbSDimitry Andric unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32; 14385ffd83dbSDimitry Andric BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0); 143906c3fb27SDimitry Andric } else if (Value == -1) // all ones 144006c3fb27SDimitry Andric BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC); 144106c3fb27SDimitry Andric else 14425ffd83dbSDimitry Andric return false; 144306c3fb27SDimitry Andric } else 144406c3fb27SDimitry Andric BuildCopy(I.getOperand(2).getReg()); 144506c3fb27SDimitry Andric 144606c3fb27SDimitry Andric I.eraseFromParent(); 144706c3fb27SDimitry Andric return true; 14488bcb0991SDimitry Andric } 14498bcb0991SDimitry Andric 1450e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const { 1451e8d8bef9SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 1452e8d8bef9SDimitry Andric const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); 145381ad6265SDimitry Andric const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank); 1454e8d8bef9SDimitry Andric if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) 1455e8d8bef9SDimitry Andric return false; 1456e8d8bef9SDimitry Andric 1457e8d8bef9SDimitry Andric const bool IsVALU = DstBank->getID() == AMDGPU::VGPRRegBankID; 1458e8d8bef9SDimitry Andric 1459e8d8bef9SDimitry Andric Module *M = MF->getFunction().getParent(); 1460e8d8bef9SDimitry Andric const MDNode *Metadata = I.getOperand(2).getMetadata(); 1461e8d8bef9SDimitry Andric auto SymbolName = cast<MDString>(Metadata->getOperand(0))->getString(); 1462e8d8bef9SDimitry Andric auto RelocSymbol = cast<GlobalVariable>( 1463e8d8bef9SDimitry Andric M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext()))); 1464e8d8bef9SDimitry Andric 1465e8d8bef9SDimitry Andric MachineBasicBlock *BB = I.getParent(); 1466e8d8bef9SDimitry Andric BuildMI(*BB, &I, I.getDebugLoc(), 1467e8d8bef9SDimitry Andric TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg) 1468e8d8bef9SDimitry Andric .addGlobalAddress(RelocSymbol, 0, SIInstrInfo::MO_ABS32_LO); 1469e8d8bef9SDimitry Andric 1470e8d8bef9SDimitry Andric I.eraseFromParent(); 1471e8d8bef9SDimitry Andric return true; 1472e8d8bef9SDimitry Andric } 1473e8d8bef9SDimitry Andric 1474e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectGroupStaticSize(MachineInstr &I) const { 1475e8d8bef9SDimitry Andric Triple::OSType OS = MF->getTarget().getTargetTriple().getOS(); 1476e8d8bef9SDimitry Andric 1477e8d8bef9SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 1478e8d8bef9SDimitry Andric const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 1479e8d8bef9SDimitry Andric unsigned Mov = DstRB->getID() == AMDGPU::SGPRRegBankID ? 1480e8d8bef9SDimitry Andric AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 1481e8d8bef9SDimitry Andric 1482e8d8bef9SDimitry Andric MachineBasicBlock *MBB = I.getParent(); 1483e8d8bef9SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 1484e8d8bef9SDimitry Andric 1485e8d8bef9SDimitry Andric auto MIB = BuildMI(*MBB, &I, DL, TII.get(Mov), DstReg); 1486e8d8bef9SDimitry Andric 1487e8d8bef9SDimitry Andric if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) { 1488e8d8bef9SDimitry Andric const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 1489e8d8bef9SDimitry Andric MIB.addImm(MFI->getLDSSize()); 1490e8d8bef9SDimitry Andric } else { 1491e8d8bef9SDimitry Andric Module *M = MF->getFunction().getParent(); 1492e8d8bef9SDimitry Andric const GlobalValue *GV 1493e8d8bef9SDimitry Andric = Intrinsic::getDeclaration(M, Intrinsic::amdgcn_groupstaticsize); 1494e8d8bef9SDimitry Andric MIB.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO); 1495e8d8bef9SDimitry Andric } 1496e8d8bef9SDimitry Andric 1497e8d8bef9SDimitry Andric I.eraseFromParent(); 1498e8d8bef9SDimitry Andric return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1499e8d8bef9SDimitry Andric } 1500e8d8bef9SDimitry Andric 1501e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const { 1502e8d8bef9SDimitry Andric MachineBasicBlock *MBB = I.getParent(); 1503e8d8bef9SDimitry Andric MachineFunction &MF = *MBB->getParent(); 1504e8d8bef9SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 1505e8d8bef9SDimitry Andric 1506e8d8bef9SDimitry Andric MachineOperand &Dst = I.getOperand(0); 1507e8d8bef9SDimitry Andric Register DstReg = Dst.getReg(); 1508e8d8bef9SDimitry Andric unsigned Depth = I.getOperand(2).getImm(); 1509e8d8bef9SDimitry Andric 1510e8d8bef9SDimitry Andric const TargetRegisterClass *RC 1511e8d8bef9SDimitry Andric = TRI.getConstrainedRegClassForOperand(Dst, *MRI); 1512e8d8bef9SDimitry Andric if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) || 1513e8d8bef9SDimitry Andric !RBI.constrainGenericRegister(DstReg, *RC, *MRI)) 1514e8d8bef9SDimitry Andric return false; 1515e8d8bef9SDimitry Andric 1516e8d8bef9SDimitry Andric // Check for kernel and shader functions 1517e8d8bef9SDimitry Andric if (Depth != 0 || 1518e8d8bef9SDimitry Andric MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) { 1519e8d8bef9SDimitry Andric BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg) 1520e8d8bef9SDimitry Andric .addImm(0); 1521e8d8bef9SDimitry Andric I.eraseFromParent(); 1522e8d8bef9SDimitry Andric return true; 1523e8d8bef9SDimitry Andric } 1524e8d8bef9SDimitry Andric 1525e8d8bef9SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 1526e8d8bef9SDimitry Andric // There is a call to @llvm.returnaddress in this function 1527e8d8bef9SDimitry Andric MFI.setReturnAddressIsTaken(true); 1528e8d8bef9SDimitry Andric 1529e8d8bef9SDimitry Andric // Get the return address reg and mark it as an implicit live-in 1530e8d8bef9SDimitry Andric Register ReturnAddrReg = TRI.getReturnAddressReg(MF); 1531e8d8bef9SDimitry Andric Register LiveIn = getFunctionLiveInPhysReg(MF, TII, ReturnAddrReg, 153204eeddc0SDimitry Andric AMDGPU::SReg_64RegClass, DL); 1533e8d8bef9SDimitry Andric BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg) 1534e8d8bef9SDimitry Andric .addReg(LiveIn); 1535e8d8bef9SDimitry Andric I.eraseFromParent(); 1536e8d8bef9SDimitry Andric return true; 1537e8d8bef9SDimitry Andric } 1538e8d8bef9SDimitry Andric 15395ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const { 1540349cc55cSDimitry Andric // FIXME: Manually selecting to avoid dealing with the SReg_1 trick 15415ffd83dbSDimitry Andric // SelectionDAG uses for wave32 vs wave64. 15425ffd83dbSDimitry Andric MachineBasicBlock *BB = MI.getParent(); 15435ffd83dbSDimitry Andric BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF)) 15445ffd83dbSDimitry Andric .add(MI.getOperand(1)); 15458bcb0991SDimitry Andric 15465ffd83dbSDimitry Andric Register Reg = MI.getOperand(1).getReg(); 15478bcb0991SDimitry Andric MI.eraseFromParent(); 15488bcb0991SDimitry Andric 15495ffd83dbSDimitry Andric if (!MRI->getRegClassOrNull(Reg)) 15505ffd83dbSDimitry Andric MRI->setRegClass(Reg, TRI.getWaveMaskRegClass()); 15515ffd83dbSDimitry Andric return true; 15528bcb0991SDimitry Andric } 15538bcb0991SDimitry Andric 1554480093f4SDimitry Andric bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic( 1555480093f4SDimitry Andric MachineInstr &MI, Intrinsic::ID IntrID) const { 1556480093f4SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 1557480093f4SDimitry Andric MachineFunction *MF = MBB->getParent(); 1558480093f4SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 1559480093f4SDimitry Andric 1560480093f4SDimitry Andric unsigned IndexOperand = MI.getOperand(7).getImm(); 1561480093f4SDimitry Andric bool WaveRelease = MI.getOperand(8).getImm() != 0; 1562480093f4SDimitry Andric bool WaveDone = MI.getOperand(9).getImm() != 0; 1563480093f4SDimitry Andric 1564480093f4SDimitry Andric if (WaveDone && !WaveRelease) 1565480093f4SDimitry Andric report_fatal_error("ds_ordered_count: wave_done requires wave_release"); 1566480093f4SDimitry Andric 1567480093f4SDimitry Andric unsigned OrderedCountIndex = IndexOperand & 0x3f; 1568480093f4SDimitry Andric IndexOperand &= ~0x3f; 1569480093f4SDimitry Andric unsigned CountDw = 0; 1570480093f4SDimitry Andric 1571480093f4SDimitry Andric if (STI.getGeneration() >= AMDGPUSubtarget::GFX10) { 1572480093f4SDimitry Andric CountDw = (IndexOperand >> 24) & 0xf; 1573480093f4SDimitry Andric IndexOperand &= ~(0xf << 24); 1574480093f4SDimitry Andric 1575480093f4SDimitry Andric if (CountDw < 1 || CountDw > 4) { 1576480093f4SDimitry Andric report_fatal_error( 1577480093f4SDimitry Andric "ds_ordered_count: dword count must be between 1 and 4"); 1578480093f4SDimitry Andric } 1579480093f4SDimitry Andric } 1580480093f4SDimitry Andric 1581480093f4SDimitry Andric if (IndexOperand) 1582480093f4SDimitry Andric report_fatal_error("ds_ordered_count: bad index operand"); 1583480093f4SDimitry Andric 1584480093f4SDimitry Andric unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1; 1585e8d8bef9SDimitry Andric unsigned ShaderType = SIInstrInfo::getDSShaderTypeValue(*MF); 1586480093f4SDimitry Andric 1587480093f4SDimitry Andric unsigned Offset0 = OrderedCountIndex << 2; 158881ad6265SDimitry Andric unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4); 1589480093f4SDimitry Andric 1590480093f4SDimitry Andric if (STI.getGeneration() >= AMDGPUSubtarget::GFX10) 1591480093f4SDimitry Andric Offset1 |= (CountDw - 1) << 6; 1592480093f4SDimitry Andric 159381ad6265SDimitry Andric if (STI.getGeneration() < AMDGPUSubtarget::GFX11) 159481ad6265SDimitry Andric Offset1 |= ShaderType << 2; 159581ad6265SDimitry Andric 1596480093f4SDimitry Andric unsigned Offset = Offset0 | (Offset1 << 8); 1597480093f4SDimitry Andric 1598480093f4SDimitry Andric Register M0Val = MI.getOperand(2).getReg(); 1599480093f4SDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) 1600480093f4SDimitry Andric .addReg(M0Val); 1601480093f4SDimitry Andric 1602480093f4SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 1603480093f4SDimitry Andric Register ValReg = MI.getOperand(3).getReg(); 1604480093f4SDimitry Andric MachineInstrBuilder DS = 1605480093f4SDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg) 1606480093f4SDimitry Andric .addReg(ValReg) 1607480093f4SDimitry Andric .addImm(Offset) 1608480093f4SDimitry Andric .cloneMemRefs(MI); 1609480093f4SDimitry Andric 1610480093f4SDimitry Andric if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI)) 1611480093f4SDimitry Andric return false; 1612480093f4SDimitry Andric 1613480093f4SDimitry Andric bool Ret = constrainSelectedInstRegOperands(*DS, TII, TRI, RBI); 1614480093f4SDimitry Andric MI.eraseFromParent(); 1615480093f4SDimitry Andric return Ret; 1616480093f4SDimitry Andric } 1617480093f4SDimitry Andric 16185ffd83dbSDimitry Andric static unsigned gwsIntrinToOpcode(unsigned IntrID) { 16195ffd83dbSDimitry Andric switch (IntrID) { 16205ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_init: 16215ffd83dbSDimitry Andric return AMDGPU::DS_GWS_INIT; 16225ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_barrier: 16235ffd83dbSDimitry Andric return AMDGPU::DS_GWS_BARRIER; 16245ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_v: 16255ffd83dbSDimitry Andric return AMDGPU::DS_GWS_SEMA_V; 16265ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_br: 16275ffd83dbSDimitry Andric return AMDGPU::DS_GWS_SEMA_BR; 16285ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_p: 16295ffd83dbSDimitry Andric return AMDGPU::DS_GWS_SEMA_P; 16305ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_release_all: 16315ffd83dbSDimitry Andric return AMDGPU::DS_GWS_SEMA_RELEASE_ALL; 16325ffd83dbSDimitry Andric default: 16335ffd83dbSDimitry Andric llvm_unreachable("not a gws intrinsic"); 16340b57cec5SDimitry Andric } 16350b57cec5SDimitry Andric } 16360b57cec5SDimitry Andric 16375ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI, 16385ffd83dbSDimitry Andric Intrinsic::ID IID) const { 16395f757f3fSDimitry Andric if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all && 16405f757f3fSDimitry Andric !STI.hasGWSSemaReleaseAll())) 16415ffd83dbSDimitry Andric return false; 16420b57cec5SDimitry Andric 16435ffd83dbSDimitry Andric // intrinsic ID, vsrc, offset 16445ffd83dbSDimitry Andric const bool HasVSrc = MI.getNumOperands() == 3; 16455ffd83dbSDimitry Andric assert(HasVSrc || MI.getNumOperands() == 2); 16465ffd83dbSDimitry Andric 16475ffd83dbSDimitry Andric Register BaseOffset = MI.getOperand(HasVSrc ? 2 : 1).getReg(); 16485ffd83dbSDimitry Andric const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI); 16495ffd83dbSDimitry Andric if (OffsetRB->getID() != AMDGPU::SGPRRegBankID) 16505ffd83dbSDimitry Andric return false; 16515ffd83dbSDimitry Andric 16525ffd83dbSDimitry Andric MachineInstr *OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI); 16535ffd83dbSDimitry Andric unsigned ImmOffset; 16545ffd83dbSDimitry Andric 16555ffd83dbSDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 16565ffd83dbSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 16575ffd83dbSDimitry Andric 16585ffd83dbSDimitry Andric MachineInstr *Readfirstlane = nullptr; 16595ffd83dbSDimitry Andric 16605ffd83dbSDimitry Andric // If we legalized the VGPR input, strip out the readfirstlane to analyze the 16615ffd83dbSDimitry Andric // incoming offset, in case there's an add of a constant. We'll have to put it 16625ffd83dbSDimitry Andric // back later. 16635ffd83dbSDimitry Andric if (OffsetDef->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) { 16645ffd83dbSDimitry Andric Readfirstlane = OffsetDef; 16655ffd83dbSDimitry Andric BaseOffset = OffsetDef->getOperand(1).getReg(); 16665ffd83dbSDimitry Andric OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI); 16675ffd83dbSDimitry Andric } 16685ffd83dbSDimitry Andric 16695ffd83dbSDimitry Andric if (OffsetDef->getOpcode() == AMDGPU::G_CONSTANT) { 16705ffd83dbSDimitry Andric // If we have a constant offset, try to use the 0 in m0 as the base. 16715ffd83dbSDimitry Andric // TODO: Look into changing the default m0 initialization value. If the 16725ffd83dbSDimitry Andric // default -1 only set the low 16-bits, we could leave it as-is and add 1 to 16735ffd83dbSDimitry Andric // the immediate offset. 16745ffd83dbSDimitry Andric 16755ffd83dbSDimitry Andric ImmOffset = OffsetDef->getOperand(1).getCImm()->getZExtValue(); 16765ffd83dbSDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) 16775ffd83dbSDimitry Andric .addImm(0); 16785ffd83dbSDimitry Andric } else { 1679e8d8bef9SDimitry Andric std::tie(BaseOffset, ImmOffset) = 168006c3fb27SDimitry Andric AMDGPU::getBaseWithConstantOffset(*MRI, BaseOffset, KB); 16815ffd83dbSDimitry Andric 16825ffd83dbSDimitry Andric if (Readfirstlane) { 16835ffd83dbSDimitry Andric // We have the constant offset now, so put the readfirstlane back on the 16845ffd83dbSDimitry Andric // variable component. 16855ffd83dbSDimitry Andric if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI)) 16865ffd83dbSDimitry Andric return false; 16875ffd83dbSDimitry Andric 16885ffd83dbSDimitry Andric Readfirstlane->getOperand(1).setReg(BaseOffset); 16895ffd83dbSDimitry Andric BaseOffset = Readfirstlane->getOperand(0).getReg(); 16905ffd83dbSDimitry Andric } else { 16915ffd83dbSDimitry Andric if (!RBI.constrainGenericRegister(BaseOffset, 16925ffd83dbSDimitry Andric AMDGPU::SReg_32RegClass, *MRI)) 16935ffd83dbSDimitry Andric return false; 16945ffd83dbSDimitry Andric } 16955ffd83dbSDimitry Andric 16965ffd83dbSDimitry Andric Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 16975ffd83dbSDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base) 16985ffd83dbSDimitry Andric .addReg(BaseOffset) 16995f757f3fSDimitry Andric .addImm(16) 17005f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 17015ffd83dbSDimitry Andric 17025ffd83dbSDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) 17035ffd83dbSDimitry Andric .addReg(M0Base); 17045ffd83dbSDimitry Andric } 17055ffd83dbSDimitry Andric 17065ffd83dbSDimitry Andric // The resource id offset is computed as (<isa opaque base> + M0[21:16] + 17075ffd83dbSDimitry Andric // offset field) % 64. Some versions of the programming guide omit the m0 17085ffd83dbSDimitry Andric // part, or claim it's from offset 0. 17095ffd83dbSDimitry Andric auto MIB = BuildMI(*MBB, &MI, DL, TII.get(gwsIntrinToOpcode(IID))); 17105ffd83dbSDimitry Andric 17115ffd83dbSDimitry Andric if (HasVSrc) { 17125ffd83dbSDimitry Andric Register VSrc = MI.getOperand(1).getReg(); 17135ffd83dbSDimitry Andric MIB.addReg(VSrc); 1714fe6060f1SDimitry Andric 17155ffd83dbSDimitry Andric if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI)) 17165ffd83dbSDimitry Andric return false; 17175ffd83dbSDimitry Andric } 17185ffd83dbSDimitry Andric 17195ffd83dbSDimitry Andric MIB.addImm(ImmOffset) 17205ffd83dbSDimitry Andric .cloneMemRefs(MI); 17215ffd83dbSDimitry Andric 172281ad6265SDimitry Andric TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0); 172381ad6265SDimitry Andric 17245ffd83dbSDimitry Andric MI.eraseFromParent(); 17250b57cec5SDimitry Andric return true; 17260b57cec5SDimitry Andric } 17275ffd83dbSDimitry Andric 17285ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI, 17295ffd83dbSDimitry Andric bool IsAppend) const { 17305ffd83dbSDimitry Andric Register PtrBase = MI.getOperand(2).getReg(); 17315ffd83dbSDimitry Andric LLT PtrTy = MRI->getType(PtrBase); 17325ffd83dbSDimitry Andric bool IsGDS = PtrTy.getAddressSpace() == AMDGPUAS::REGION_ADDRESS; 17335ffd83dbSDimitry Andric 17345ffd83dbSDimitry Andric unsigned Offset; 17355ffd83dbSDimitry Andric std::tie(PtrBase, Offset) = selectDS1Addr1OffsetImpl(MI.getOperand(2)); 17365ffd83dbSDimitry Andric 17375ffd83dbSDimitry Andric // TODO: Should this try to look through readfirstlane like GWS? 1738e8d8bef9SDimitry Andric if (!isDSOffsetLegal(PtrBase, Offset)) { 17395ffd83dbSDimitry Andric PtrBase = MI.getOperand(2).getReg(); 17405ffd83dbSDimitry Andric Offset = 0; 17415ffd83dbSDimitry Andric } 17425ffd83dbSDimitry Andric 17435ffd83dbSDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 17445ffd83dbSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 17455ffd83dbSDimitry Andric const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME; 17465ffd83dbSDimitry Andric 17475ffd83dbSDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) 17485ffd83dbSDimitry Andric .addReg(PtrBase); 1749e8d8bef9SDimitry Andric if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI)) 1750e8d8bef9SDimitry Andric return false; 1751e8d8bef9SDimitry Andric 1752e8d8bef9SDimitry Andric auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), MI.getOperand(0).getReg()) 17535ffd83dbSDimitry Andric .addImm(Offset) 17545ffd83dbSDimitry Andric .addImm(IsGDS ? -1 : 0) 17555ffd83dbSDimitry Andric .cloneMemRefs(MI); 17565ffd83dbSDimitry Andric MI.eraseFromParent(); 1757e8d8bef9SDimitry Andric return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1758e8d8bef9SDimitry Andric } 1759e8d8bef9SDimitry Andric 1760e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectSBarrier(MachineInstr &MI) const { 17615f757f3fSDimitry Andric if (TM.getOptLevel() > CodeGenOptLevel::None) { 1762e8d8bef9SDimitry Andric unsigned WGSize = STI.getFlatWorkGroupSizes(MF->getFunction()).second; 1763e8d8bef9SDimitry Andric if (WGSize <= STI.getWavefrontSize()) { 1764e8d8bef9SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 1765e8d8bef9SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 1766e8d8bef9SDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::WAVE_BARRIER)); 1767e8d8bef9SDimitry Andric MI.eraseFromParent(); 17685ffd83dbSDimitry Andric return true; 17695ffd83dbSDimitry Andric } 1770e8d8bef9SDimitry Andric } 17715f757f3fSDimitry Andric 17725f757f3fSDimitry Andric // On GFX12 lower s_barrier into s_barrier_signal_imm and s_barrier_wait 17735f757f3fSDimitry Andric if (STI.hasSplitBarriers()) { 17745f757f3fSDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 17755f757f3fSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 17765f757f3fSDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM)) 17775f757f3fSDimitry Andric .addImm(AMDGPU::Barrier::WORKGROUP); 17785f757f3fSDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_BARRIER_WAIT)) 17795f757f3fSDimitry Andric .addImm(AMDGPU::Barrier::WORKGROUP); 17805f757f3fSDimitry Andric MI.eraseFromParent(); 17815f757f3fSDimitry Andric return true; 17825f757f3fSDimitry Andric } 17835f757f3fSDimitry Andric 1784e8d8bef9SDimitry Andric return selectImpl(MI, *CoverageInfo); 1785e8d8bef9SDimitry Andric } 17865ffd83dbSDimitry Andric 17875ffd83dbSDimitry Andric static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, 17885ffd83dbSDimitry Andric bool &IsTexFail) { 17895ffd83dbSDimitry Andric if (TexFailCtrl) 17905ffd83dbSDimitry Andric IsTexFail = true; 17915ffd83dbSDimitry Andric 179204eeddc0SDimitry Andric TFE = (TexFailCtrl & 0x1) ? true : false; 17935ffd83dbSDimitry Andric TexFailCtrl &= ~(uint64_t)0x1; 179404eeddc0SDimitry Andric LWE = (TexFailCtrl & 0x2) ? true : false; 17955ffd83dbSDimitry Andric TexFailCtrl &= ~(uint64_t)0x2; 17965ffd83dbSDimitry Andric 17975ffd83dbSDimitry Andric return TexFailCtrl == 0; 17985ffd83dbSDimitry Andric } 17995ffd83dbSDimitry Andric 18005ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectImageIntrinsic( 18015ffd83dbSDimitry Andric MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const { 18025ffd83dbSDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 18035ffd83dbSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 18045ffd83dbSDimitry Andric 18055ffd83dbSDimitry Andric const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 18065ffd83dbSDimitry Andric AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); 18075ffd83dbSDimitry Andric 18085ffd83dbSDimitry Andric const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim); 18095ffd83dbSDimitry Andric unsigned IntrOpcode = Intr->BaseOpcode; 1810e8d8bef9SDimitry Andric const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI); 181181ad6265SDimitry Andric const bool IsGFX11Plus = AMDGPU::isGFX11Plus(STI); 18125f757f3fSDimitry Andric const bool IsGFX12Plus = AMDGPU::isGFX12Plus(STI); 18135ffd83dbSDimitry Andric 1814e8d8bef9SDimitry Andric const unsigned ArgOffset = MI.getNumExplicitDefs() + 1; 18155ffd83dbSDimitry Andric 18165ffd83dbSDimitry Andric Register VDataIn, VDataOut; 18175ffd83dbSDimitry Andric LLT VDataTy; 18185ffd83dbSDimitry Andric int NumVDataDwords = -1; 181904eeddc0SDimitry Andric bool IsD16 = MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 || 182004eeddc0SDimitry Andric MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16; 18215ffd83dbSDimitry Andric 18225ffd83dbSDimitry Andric bool Unorm; 1823e8d8bef9SDimitry Andric if (!BaseOpcode->Sampler) 18245ffd83dbSDimitry Andric Unorm = true; 1825e8d8bef9SDimitry Andric else 1826e8d8bef9SDimitry Andric Unorm = MI.getOperand(ArgOffset + Intr->UnormIndex).getImm() != 0; 18275ffd83dbSDimitry Andric 18285ffd83dbSDimitry Andric bool TFE; 18295ffd83dbSDimitry Andric bool LWE; 18305ffd83dbSDimitry Andric bool IsTexFail = false; 1831e8d8bef9SDimitry Andric if (!parseTexFail(MI.getOperand(ArgOffset + Intr->TexFailCtrlIndex).getImm(), 1832e8d8bef9SDimitry Andric TFE, LWE, IsTexFail)) 18335ffd83dbSDimitry Andric return false; 18345ffd83dbSDimitry Andric 1835e8d8bef9SDimitry Andric const int Flags = MI.getOperand(ArgOffset + Intr->NumArgs).getImm(); 18365ffd83dbSDimitry Andric const bool IsA16 = (Flags & 1) != 0; 18375ffd83dbSDimitry Andric const bool IsG16 = (Flags & 2) != 0; 18385ffd83dbSDimitry Andric 1839fe6060f1SDimitry Andric // A16 implies 16 bit gradients if subtarget doesn't support G16 1840fe6060f1SDimitry Andric if (IsA16 && !STI.hasG16() && !IsG16) 18415ffd83dbSDimitry Andric return false; 18425ffd83dbSDimitry Andric 18435ffd83dbSDimitry Andric unsigned DMask = 0; 18445ffd83dbSDimitry Andric unsigned DMaskLanes = 0; 18455ffd83dbSDimitry Andric 18465ffd83dbSDimitry Andric if (BaseOpcode->Atomic) { 18475ffd83dbSDimitry Andric VDataOut = MI.getOperand(0).getReg(); 18485ffd83dbSDimitry Andric VDataIn = MI.getOperand(2).getReg(); 18495ffd83dbSDimitry Andric LLT Ty = MRI->getType(VDataIn); 18505ffd83dbSDimitry Andric 18515ffd83dbSDimitry Andric // Be careful to allow atomic swap on 16-bit element vectors. 18525ffd83dbSDimitry Andric const bool Is64Bit = BaseOpcode->AtomicX2 ? 18535ffd83dbSDimitry Andric Ty.getSizeInBits() == 128 : 18545ffd83dbSDimitry Andric Ty.getSizeInBits() == 64; 18555ffd83dbSDimitry Andric 18565ffd83dbSDimitry Andric if (BaseOpcode->AtomicX2) { 18575ffd83dbSDimitry Andric assert(MI.getOperand(3).getReg() == AMDGPU::NoRegister); 18585ffd83dbSDimitry Andric 18595ffd83dbSDimitry Andric DMask = Is64Bit ? 0xf : 0x3; 18605ffd83dbSDimitry Andric NumVDataDwords = Is64Bit ? 4 : 2; 18615ffd83dbSDimitry Andric } else { 18625ffd83dbSDimitry Andric DMask = Is64Bit ? 0x3 : 0x1; 18635ffd83dbSDimitry Andric NumVDataDwords = Is64Bit ? 2 : 1; 18645ffd83dbSDimitry Andric } 18655ffd83dbSDimitry Andric } else { 1866e8d8bef9SDimitry Andric DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm(); 1867bdd1243dSDimitry Andric DMaskLanes = BaseOpcode->Gather4 ? 4 : llvm::popcount(DMask); 18685ffd83dbSDimitry Andric 18695ffd83dbSDimitry Andric if (BaseOpcode->Store) { 18705ffd83dbSDimitry Andric VDataIn = MI.getOperand(1).getReg(); 18715ffd83dbSDimitry Andric VDataTy = MRI->getType(VDataIn); 18725ffd83dbSDimitry Andric NumVDataDwords = (VDataTy.getSizeInBits() + 31) / 32; 1873*0fca6ea1SDimitry Andric } else if (BaseOpcode->NoReturn) { 1874*0fca6ea1SDimitry Andric NumVDataDwords = 0; 18755ffd83dbSDimitry Andric } else { 18765ffd83dbSDimitry Andric VDataOut = MI.getOperand(0).getReg(); 18775ffd83dbSDimitry Andric VDataTy = MRI->getType(VDataOut); 18785ffd83dbSDimitry Andric NumVDataDwords = DMaskLanes; 18795ffd83dbSDimitry Andric 18805ffd83dbSDimitry Andric if (IsD16 && !STI.hasUnpackedD16VMem()) 18815ffd83dbSDimitry Andric NumVDataDwords = (DMaskLanes + 1) / 2; 18825ffd83dbSDimitry Andric } 18835ffd83dbSDimitry Andric } 18845ffd83dbSDimitry Andric 18855ffd83dbSDimitry Andric // Set G16 opcode 188606c3fb27SDimitry Andric if (Subtarget->hasG16() && IsG16) { 18875ffd83dbSDimitry Andric const AMDGPU::MIMGG16MappingInfo *G16MappingInfo = 18885ffd83dbSDimitry Andric AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode); 18895ffd83dbSDimitry Andric assert(G16MappingInfo); 18905ffd83dbSDimitry Andric IntrOpcode = G16MappingInfo->G16; // set opcode to variant with _g16 18915ffd83dbSDimitry Andric } 18925ffd83dbSDimitry Andric 18935ffd83dbSDimitry Andric // TODO: Check this in verifier. 18945ffd83dbSDimitry Andric assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this"); 18955ffd83dbSDimitry Andric 1896fe6060f1SDimitry Andric unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(); 1897fe6060f1SDimitry Andric if (BaseOpcode->Atomic) 1898fe6060f1SDimitry Andric CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization 18997a6dacacSDimitry Andric if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) | 19007a6dacacSDimitry Andric AMDGPU::CPol::VOLATILE)) 19015ffd83dbSDimitry Andric return false; 19025ffd83dbSDimitry Andric 19035ffd83dbSDimitry Andric int NumVAddrRegs = 0; 19045ffd83dbSDimitry Andric int NumVAddrDwords = 0; 1905e8d8bef9SDimitry Andric for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) { 19065ffd83dbSDimitry Andric // Skip the $noregs and 0s inserted during legalization. 1907e8d8bef9SDimitry Andric MachineOperand &AddrOp = MI.getOperand(ArgOffset + I); 19085ffd83dbSDimitry Andric if (!AddrOp.isReg()) 19095ffd83dbSDimitry Andric continue; // XXX - Break? 19105ffd83dbSDimitry Andric 19115ffd83dbSDimitry Andric Register Addr = AddrOp.getReg(); 19125ffd83dbSDimitry Andric if (!Addr) 19135ffd83dbSDimitry Andric break; 19145ffd83dbSDimitry Andric 19155ffd83dbSDimitry Andric ++NumVAddrRegs; 19165ffd83dbSDimitry Andric NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32; 19175ffd83dbSDimitry Andric } 19185ffd83dbSDimitry Andric 19195ffd83dbSDimitry Andric // The legalizer preprocessed the intrinsic arguments. If we aren't using 192081ad6265SDimitry Andric // NSA, these should have been packed into a single value in the first 19215ffd83dbSDimitry Andric // address register 192206c3fb27SDimitry Andric const bool UseNSA = 192306c3fb27SDimitry Andric NumVAddrRegs != 1 && 192406c3fb27SDimitry Andric (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs 192506c3fb27SDimitry Andric : NumVAddrDwords == NumVAddrRegs); 19265ffd83dbSDimitry Andric if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) { 19275ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Trying to use NSA on non-NSA target\n"); 19285ffd83dbSDimitry Andric return false; 19295ffd83dbSDimitry Andric } 19305ffd83dbSDimitry Andric 19315ffd83dbSDimitry Andric if (IsTexFail) 19325ffd83dbSDimitry Andric ++NumVDataDwords; 19335ffd83dbSDimitry Andric 19345ffd83dbSDimitry Andric int Opcode = -1; 19355f757f3fSDimitry Andric if (IsGFX12Plus) { 19365f757f3fSDimitry Andric Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx12, 19375f757f3fSDimitry Andric NumVDataDwords, NumVAddrDwords); 19385f757f3fSDimitry Andric } else if (IsGFX11Plus) { 193981ad6265SDimitry Andric Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, 194081ad6265SDimitry Andric UseNSA ? AMDGPU::MIMGEncGfx11NSA 194181ad6265SDimitry Andric : AMDGPU::MIMGEncGfx11Default, 194281ad6265SDimitry Andric NumVDataDwords, NumVAddrDwords); 194381ad6265SDimitry Andric } else if (IsGFX10Plus) { 19445ffd83dbSDimitry Andric Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, 19455ffd83dbSDimitry Andric UseNSA ? AMDGPU::MIMGEncGfx10NSA 19465ffd83dbSDimitry Andric : AMDGPU::MIMGEncGfx10Default, 19475ffd83dbSDimitry Andric NumVDataDwords, NumVAddrDwords); 19485ffd83dbSDimitry Andric } else { 194981ad6265SDimitry Andric if (Subtarget->hasGFX90AInsts()) { 195081ad6265SDimitry Andric Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a, 195181ad6265SDimitry Andric NumVDataDwords, NumVAddrDwords); 195281ad6265SDimitry Andric if (Opcode == -1) { 195381ad6265SDimitry Andric LLVM_DEBUG( 195481ad6265SDimitry Andric dbgs() 195581ad6265SDimitry Andric << "requested image instruction is not supported on this GPU\n"); 195681ad6265SDimitry Andric return false; 195781ad6265SDimitry Andric } 195881ad6265SDimitry Andric } 195981ad6265SDimitry Andric if (Opcode == -1 && 196081ad6265SDimitry Andric STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) 19615ffd83dbSDimitry Andric Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8, 19625ffd83dbSDimitry Andric NumVDataDwords, NumVAddrDwords); 19635ffd83dbSDimitry Andric if (Opcode == -1) 19645ffd83dbSDimitry Andric Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6, 19655ffd83dbSDimitry Andric NumVDataDwords, NumVAddrDwords); 19665ffd83dbSDimitry Andric } 196706c3fb27SDimitry Andric if (Opcode == -1) 196806c3fb27SDimitry Andric return false; 19695ffd83dbSDimitry Andric 19705ffd83dbSDimitry Andric auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opcode)) 19715ffd83dbSDimitry Andric .cloneMemRefs(MI); 19725ffd83dbSDimitry Andric 19735ffd83dbSDimitry Andric if (VDataOut) { 19745ffd83dbSDimitry Andric if (BaseOpcode->AtomicX2) { 19755ffd83dbSDimitry Andric const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64; 19765ffd83dbSDimitry Andric 19775ffd83dbSDimitry Andric Register TmpReg = MRI->createVirtualRegister( 19785ffd83dbSDimitry Andric Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass); 19795ffd83dbSDimitry Andric unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0; 19805ffd83dbSDimitry Andric 19815ffd83dbSDimitry Andric MIB.addDef(TmpReg); 1982fe6060f1SDimitry Andric if (!MRI->use_empty(VDataOut)) { 19835ffd83dbSDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), VDataOut) 19845ffd83dbSDimitry Andric .addReg(TmpReg, RegState::Kill, SubReg); 1985fe6060f1SDimitry Andric } 19865ffd83dbSDimitry Andric 19875ffd83dbSDimitry Andric } else { 19885ffd83dbSDimitry Andric MIB.addDef(VDataOut); // vdata output 19895ffd83dbSDimitry Andric } 19905ffd83dbSDimitry Andric } 19915ffd83dbSDimitry Andric 19925ffd83dbSDimitry Andric if (VDataIn) 19935ffd83dbSDimitry Andric MIB.addReg(VDataIn); // vdata input 19945ffd83dbSDimitry Andric 1995e8d8bef9SDimitry Andric for (int I = 0; I != NumVAddrRegs; ++I) { 1996e8d8bef9SDimitry Andric MachineOperand &SrcOp = MI.getOperand(ArgOffset + Intr->VAddrStart + I); 19975ffd83dbSDimitry Andric if (SrcOp.isReg()) { 19985ffd83dbSDimitry Andric assert(SrcOp.getReg() != 0); 19995ffd83dbSDimitry Andric MIB.addReg(SrcOp.getReg()); 20005ffd83dbSDimitry Andric } 20015ffd83dbSDimitry Andric } 20025ffd83dbSDimitry Andric 2003e8d8bef9SDimitry Andric MIB.addReg(MI.getOperand(ArgOffset + Intr->RsrcIndex).getReg()); 20045ffd83dbSDimitry Andric if (BaseOpcode->Sampler) 2005e8d8bef9SDimitry Andric MIB.addReg(MI.getOperand(ArgOffset + Intr->SampIndex).getReg()); 20065ffd83dbSDimitry Andric 20075ffd83dbSDimitry Andric MIB.addImm(DMask); // dmask 20085ffd83dbSDimitry Andric 2009e8d8bef9SDimitry Andric if (IsGFX10Plus) 20105ffd83dbSDimitry Andric MIB.addImm(DimInfo->Encoding); 20115f757f3fSDimitry Andric if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::unorm)) 20125ffd83dbSDimitry Andric MIB.addImm(Unorm); 20135ffd83dbSDimitry Andric 2014fe6060f1SDimitry Andric MIB.addImm(CPol); 20155ffd83dbSDimitry Andric MIB.addImm(IsA16 && // a16 or r128 20165ffd83dbSDimitry Andric STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0); 2017e8d8bef9SDimitry Andric if (IsGFX10Plus) 20185ffd83dbSDimitry Andric MIB.addImm(IsA16 ? -1 : 0); 20195ffd83dbSDimitry Andric 202081ad6265SDimitry Andric if (!Subtarget->hasGFX90AInsts()) { 20215ffd83dbSDimitry Andric MIB.addImm(TFE); // tfe 202281ad6265SDimitry Andric } else if (TFE) { 202381ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "TFE is not supported on this GPU\n"); 202481ad6265SDimitry Andric return false; 202581ad6265SDimitry Andric } 202681ad6265SDimitry Andric 20275f757f3fSDimitry Andric if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::lwe)) 20285ffd83dbSDimitry Andric MIB.addImm(LWE); // lwe 2029e8d8bef9SDimitry Andric if (!IsGFX10Plus) 20305ffd83dbSDimitry Andric MIB.addImm(DimInfo->DA ? -1 : 0); 20315ffd83dbSDimitry Andric if (BaseOpcode->HasD16) 20325ffd83dbSDimitry Andric MIB.addImm(IsD16 ? -1 : 0); 20335ffd83dbSDimitry Andric 20345ffd83dbSDimitry Andric MI.eraseFromParent(); 203581ad6265SDimitry Andric constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 203681ad6265SDimitry Andric TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr); 203781ad6265SDimitry Andric return true; 20385ffd83dbSDimitry Andric } 20395ffd83dbSDimitry Andric 2040bdd1243dSDimitry Andric // We need to handle this here because tablegen doesn't support matching 2041bdd1243dSDimitry Andric // instructions with multiple outputs. 2042bdd1243dSDimitry Andric bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic( 2043bdd1243dSDimitry Andric MachineInstr &MI) const { 2044bdd1243dSDimitry Andric Register Dst0 = MI.getOperand(0).getReg(); 2045bdd1243dSDimitry Andric Register Dst1 = MI.getOperand(1).getReg(); 2046bdd1243dSDimitry Andric 2047bdd1243dSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 2048bdd1243dSDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 2049bdd1243dSDimitry Andric 2050bdd1243dSDimitry Andric Register Addr = MI.getOperand(3).getReg(); 2051bdd1243dSDimitry Andric Register Data0 = MI.getOperand(4).getReg(); 2052bdd1243dSDimitry Andric Register Data1 = MI.getOperand(5).getReg(); 2053bdd1243dSDimitry Andric unsigned Offset = MI.getOperand(6).getImm(); 2054bdd1243dSDimitry Andric 2055bdd1243dSDimitry Andric auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0) 2056bdd1243dSDimitry Andric .addDef(Dst1) 2057bdd1243dSDimitry Andric .addUse(Addr) 2058bdd1243dSDimitry Andric .addUse(Data0) 2059bdd1243dSDimitry Andric .addUse(Data1) 2060bdd1243dSDimitry Andric .addImm(Offset) 2061bdd1243dSDimitry Andric .cloneMemRefs(MI); 2062bdd1243dSDimitry Andric 2063bdd1243dSDimitry Andric MI.eraseFromParent(); 2064bdd1243dSDimitry Andric return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 2065bdd1243dSDimitry Andric } 2066bdd1243dSDimitry Andric 20675ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( 20685ffd83dbSDimitry Andric MachineInstr &I) const { 2069*0fca6ea1SDimitry Andric Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID(); 20705ffd83dbSDimitry Andric switch (IntrinsicID) { 20715ffd83dbSDimitry Andric case Intrinsic::amdgcn_end_cf: 20725ffd83dbSDimitry Andric return selectEndCfIntrinsic(I); 2073480093f4SDimitry Andric case Intrinsic::amdgcn_ds_ordered_add: 2074480093f4SDimitry Andric case Intrinsic::amdgcn_ds_ordered_swap: 2075480093f4SDimitry Andric return selectDSOrderedIntrinsic(I, IntrinsicID); 20765ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_init: 20775ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_barrier: 20785ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_v: 20795ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_br: 20805ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_p: 20815ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_gws_sema_release_all: 20825ffd83dbSDimitry Andric return selectDSGWSIntrinsic(I, IntrinsicID); 20835ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_append: 20845ffd83dbSDimitry Andric return selectDSAppendConsume(I, true); 20855ffd83dbSDimitry Andric case Intrinsic::amdgcn_ds_consume: 20865ffd83dbSDimitry Andric return selectDSAppendConsume(I, false); 2087e8d8bef9SDimitry Andric case Intrinsic::amdgcn_s_barrier: 2088e8d8bef9SDimitry Andric return selectSBarrier(I); 208981ad6265SDimitry Andric case Intrinsic::amdgcn_raw_buffer_load_lds: 209006c3fb27SDimitry Andric case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: 209181ad6265SDimitry Andric case Intrinsic::amdgcn_struct_buffer_load_lds: 209206c3fb27SDimitry Andric case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: 209381ad6265SDimitry Andric return selectBufferLoadLds(I); 209481ad6265SDimitry Andric case Intrinsic::amdgcn_global_load_lds: 209581ad6265SDimitry Andric return selectGlobalLoadLds(I); 209681ad6265SDimitry Andric case Intrinsic::amdgcn_exp_compr: 209781ad6265SDimitry Andric if (!STI.hasCompressedExport()) { 209881ad6265SDimitry Andric Function &F = I.getMF()->getFunction(); 209981ad6265SDimitry Andric DiagnosticInfoUnsupported NoFpRet( 210081ad6265SDimitry Andric F, "intrinsic not supported on subtarget", I.getDebugLoc(), DS_Error); 210181ad6265SDimitry Andric F.getContext().diagnose(NoFpRet); 210281ad6265SDimitry Andric return false; 210381ad6265SDimitry Andric } 210481ad6265SDimitry Andric break; 2105bdd1243dSDimitry Andric case Intrinsic::amdgcn_ds_bvh_stack_rtn: 2106bdd1243dSDimitry Andric return selectDSBvhStackIntrinsic(I); 21075f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_init: 21085f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_join: 21095f757f3fSDimitry Andric case Intrinsic::amdgcn_s_wakeup_barrier: 21105f757f3fSDimitry Andric case Intrinsic::amdgcn_s_get_barrier_state: 21115f757f3fSDimitry Andric return selectNamedBarrierInst(I, IntrinsicID); 21125f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_signal_isfirst: 21135f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_signal_isfirst_var: 21145f757f3fSDimitry Andric return selectSBarrierSignalIsfirst(I, IntrinsicID); 21155f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_leave: 21165f757f3fSDimitry Andric return selectSBarrierLeave(I); 211781ad6265SDimitry Andric } 21188bcb0991SDimitry Andric return selectImpl(I, *CoverageInfo); 21190b57cec5SDimitry Andric } 21200b57cec5SDimitry Andric 21210b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { 21225ffd83dbSDimitry Andric if (selectImpl(I, *CoverageInfo)) 21235ffd83dbSDimitry Andric return true; 21245ffd83dbSDimitry Andric 21250b57cec5SDimitry Andric MachineBasicBlock *BB = I.getParent(); 21260b57cec5SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 21270b57cec5SDimitry Andric 21288bcb0991SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 21298bcb0991SDimitry Andric unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI); 21300b57cec5SDimitry Andric assert(Size <= 32 || Size == 64); 21310b57cec5SDimitry Andric const MachineOperand &CCOp = I.getOperand(1); 21328bcb0991SDimitry Andric Register CCReg = CCOp.getReg(); 2133480093f4SDimitry Andric if (!isVCC(CCReg, *MRI)) { 21340b57cec5SDimitry Andric unsigned SelectOpcode = Size == 64 ? AMDGPU::S_CSELECT_B64 : 21350b57cec5SDimitry Andric AMDGPU::S_CSELECT_B32; 21360b57cec5SDimitry Andric MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) 21370b57cec5SDimitry Andric .addReg(CCReg); 21380b57cec5SDimitry Andric 21390b57cec5SDimitry Andric // The generic constrainSelectedInstRegOperands doesn't work for the scc register 21400b57cec5SDimitry Andric // bank, because it does not cover the register class that we used to represent 21410b57cec5SDimitry Andric // for it. So we need to manually set the register class here. 21428bcb0991SDimitry Andric if (!MRI->getRegClassOrNull(CCReg)) 21438bcb0991SDimitry Andric MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI)); 21440b57cec5SDimitry Andric MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg) 21450b57cec5SDimitry Andric .add(I.getOperand(2)) 21460b57cec5SDimitry Andric .add(I.getOperand(3)); 21470b57cec5SDimitry Andric 2148349cc55cSDimitry Andric bool Ret = false; 2149349cc55cSDimitry Andric Ret |= constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); 2150349cc55cSDimitry Andric Ret |= constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI); 21510b57cec5SDimitry Andric I.eraseFromParent(); 21520b57cec5SDimitry Andric return Ret; 21530b57cec5SDimitry Andric } 21540b57cec5SDimitry Andric 21550b57cec5SDimitry Andric // Wide VGPR select should have been split in RegBankSelect. 21560b57cec5SDimitry Andric if (Size > 32) 21570b57cec5SDimitry Andric return false; 21580b57cec5SDimitry Andric 21590b57cec5SDimitry Andric MachineInstr *Select = 21600b57cec5SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg) 21610b57cec5SDimitry Andric .addImm(0) 21620b57cec5SDimitry Andric .add(I.getOperand(3)) 21630b57cec5SDimitry Andric .addImm(0) 21640b57cec5SDimitry Andric .add(I.getOperand(2)) 21650b57cec5SDimitry Andric .add(I.getOperand(1)); 21660b57cec5SDimitry Andric 21670b57cec5SDimitry Andric bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI); 21680b57cec5SDimitry Andric I.eraseFromParent(); 21690b57cec5SDimitry Andric return Ret; 21700b57cec5SDimitry Andric } 21710b57cec5SDimitry Andric 21720b57cec5SDimitry Andric static int sizeToSubRegIndex(unsigned Size) { 21730b57cec5SDimitry Andric switch (Size) { 21740b57cec5SDimitry Andric case 32: 21750b57cec5SDimitry Andric return AMDGPU::sub0; 21760b57cec5SDimitry Andric case 64: 21770b57cec5SDimitry Andric return AMDGPU::sub0_sub1; 21780b57cec5SDimitry Andric case 96: 21790b57cec5SDimitry Andric return AMDGPU::sub0_sub1_sub2; 21800b57cec5SDimitry Andric case 128: 21810b57cec5SDimitry Andric return AMDGPU::sub0_sub1_sub2_sub3; 21820b57cec5SDimitry Andric case 256: 21830b57cec5SDimitry Andric return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7; 21840b57cec5SDimitry Andric default: 21850b57cec5SDimitry Andric if (Size < 32) 21860b57cec5SDimitry Andric return AMDGPU::sub0; 21870b57cec5SDimitry Andric if (Size > 256) 21880b57cec5SDimitry Andric return -1; 218906c3fb27SDimitry Andric return sizeToSubRegIndex(llvm::bit_ceil(Size)); 21900b57cec5SDimitry Andric } 21910b57cec5SDimitry Andric } 21920b57cec5SDimitry Andric 21930b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const { 21948bcb0991SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 21958bcb0991SDimitry Andric Register SrcReg = I.getOperand(1).getReg(); 21968bcb0991SDimitry Andric const LLT DstTy = MRI->getType(DstReg); 21978bcb0991SDimitry Andric const LLT SrcTy = MRI->getType(SrcReg); 2198480093f4SDimitry Andric const LLT S1 = LLT::scalar(1); 2199480093f4SDimitry Andric 22008bcb0991SDimitry Andric const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); 2201480093f4SDimitry Andric const RegisterBank *DstRB; 2202480093f4SDimitry Andric if (DstTy == S1) { 2203480093f4SDimitry Andric // This is a special case. We don't treat s1 for legalization artifacts as 2204480093f4SDimitry Andric // vcc booleans. 2205480093f4SDimitry Andric DstRB = SrcRB; 2206480093f4SDimitry Andric } else { 2207480093f4SDimitry Andric DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 22080b57cec5SDimitry Andric if (SrcRB != DstRB) 22090b57cec5SDimitry Andric return false; 2210480093f4SDimitry Andric } 22110b57cec5SDimitry Andric 22125ffd83dbSDimitry Andric const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID; 22135ffd83dbSDimitry Andric 22140b57cec5SDimitry Andric unsigned DstSize = DstTy.getSizeInBits(); 22150b57cec5SDimitry Andric unsigned SrcSize = SrcTy.getSizeInBits(); 22160b57cec5SDimitry Andric 221781ad6265SDimitry Andric const TargetRegisterClass *SrcRC = 221881ad6265SDimitry Andric TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB); 221981ad6265SDimitry Andric const TargetRegisterClass *DstRC = 222081ad6265SDimitry Andric TRI.getRegClassForSizeOnBank(DstSize, *DstRB); 22215ffd83dbSDimitry Andric if (!SrcRC || !DstRC) 22225ffd83dbSDimitry Andric return false; 22235ffd83dbSDimitry Andric 22245ffd83dbSDimitry Andric if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) || 22255ffd83dbSDimitry Andric !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) { 22265ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC\n"); 22275ffd83dbSDimitry Andric return false; 22285ffd83dbSDimitry Andric } 22295ffd83dbSDimitry Andric 2230fe6060f1SDimitry Andric if (DstTy == LLT::fixed_vector(2, 16) && SrcTy == LLT::fixed_vector(2, 32)) { 22315ffd83dbSDimitry Andric MachineBasicBlock *MBB = I.getParent(); 22325ffd83dbSDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 22335ffd83dbSDimitry Andric 22345ffd83dbSDimitry Andric Register LoReg = MRI->createVirtualRegister(DstRC); 22355ffd83dbSDimitry Andric Register HiReg = MRI->createVirtualRegister(DstRC); 22365ffd83dbSDimitry Andric BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), LoReg) 22375ffd83dbSDimitry Andric .addReg(SrcReg, 0, AMDGPU::sub0); 22385ffd83dbSDimitry Andric BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), HiReg) 22395ffd83dbSDimitry Andric .addReg(SrcReg, 0, AMDGPU::sub1); 22405ffd83dbSDimitry Andric 22415ffd83dbSDimitry Andric if (IsVALU && STI.hasSDWA()) { 22425ffd83dbSDimitry Andric // Write the low 16-bits of the high element into the high 16-bits of the 22435ffd83dbSDimitry Andric // low element. 22445ffd83dbSDimitry Andric MachineInstr *MovSDWA = 22455ffd83dbSDimitry Andric BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg) 22465ffd83dbSDimitry Andric .addImm(0) // $src0_modifiers 22475ffd83dbSDimitry Andric .addReg(HiReg) // $src0 22485ffd83dbSDimitry Andric .addImm(0) // $clamp 22495ffd83dbSDimitry Andric .addImm(AMDGPU::SDWA::WORD_1) // $dst_sel 22505ffd83dbSDimitry Andric .addImm(AMDGPU::SDWA::UNUSED_PRESERVE) // $dst_unused 22515ffd83dbSDimitry Andric .addImm(AMDGPU::SDWA::WORD_0) // $src0_sel 22525ffd83dbSDimitry Andric .addReg(LoReg, RegState::Implicit); 22535ffd83dbSDimitry Andric MovSDWA->tieOperands(0, MovSDWA->getNumOperands() - 1); 22545ffd83dbSDimitry Andric } else { 22555ffd83dbSDimitry Andric Register TmpReg0 = MRI->createVirtualRegister(DstRC); 22565ffd83dbSDimitry Andric Register TmpReg1 = MRI->createVirtualRegister(DstRC); 22575ffd83dbSDimitry Andric Register ImmReg = MRI->createVirtualRegister(DstRC); 22585ffd83dbSDimitry Andric if (IsVALU) { 22595ffd83dbSDimitry Andric BuildMI(*MBB, I, DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0) 22605ffd83dbSDimitry Andric .addImm(16) 22615ffd83dbSDimitry Andric .addReg(HiReg); 22625ffd83dbSDimitry Andric } else { 22635ffd83dbSDimitry Andric BuildMI(*MBB, I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0) 22645ffd83dbSDimitry Andric .addReg(HiReg) 22655f757f3fSDimitry Andric .addImm(16) 22665f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 22675ffd83dbSDimitry Andric } 22685ffd83dbSDimitry Andric 22695ffd83dbSDimitry Andric unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32; 22705ffd83dbSDimitry Andric unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32; 22715ffd83dbSDimitry Andric unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32; 22725ffd83dbSDimitry Andric 22735ffd83dbSDimitry Andric BuildMI(*MBB, I, DL, TII.get(MovOpc), ImmReg) 22745ffd83dbSDimitry Andric .addImm(0xffff); 22755f757f3fSDimitry Andric auto And = BuildMI(*MBB, I, DL, TII.get(AndOpc), TmpReg1) 22765ffd83dbSDimitry Andric .addReg(LoReg) 22775ffd83dbSDimitry Andric .addReg(ImmReg); 22785f757f3fSDimitry Andric auto Or = BuildMI(*MBB, I, DL, TII.get(OrOpc), DstReg) 22795ffd83dbSDimitry Andric .addReg(TmpReg0) 22805ffd83dbSDimitry Andric .addReg(TmpReg1); 22815f757f3fSDimitry Andric 22825f757f3fSDimitry Andric if (!IsVALU) { 22835f757f3fSDimitry Andric And.setOperandDead(3); // Dead scc 22845f757f3fSDimitry Andric Or.setOperandDead(3); // Dead scc 22855f757f3fSDimitry Andric } 22865ffd83dbSDimitry Andric } 22875ffd83dbSDimitry Andric 22885ffd83dbSDimitry Andric I.eraseFromParent(); 22895ffd83dbSDimitry Andric return true; 22905ffd83dbSDimitry Andric } 22915ffd83dbSDimitry Andric 22925ffd83dbSDimitry Andric if (!DstTy.isScalar()) 22935ffd83dbSDimitry Andric return false; 22940b57cec5SDimitry Andric 22950b57cec5SDimitry Andric if (SrcSize > 32) { 22960b57cec5SDimitry Andric int SubRegIdx = sizeToSubRegIndex(DstSize); 22970b57cec5SDimitry Andric if (SubRegIdx == -1) 22980b57cec5SDimitry Andric return false; 22990b57cec5SDimitry Andric 23000b57cec5SDimitry Andric // Deal with weird cases where the class only partially supports the subreg 23010b57cec5SDimitry Andric // index. 23025ffd83dbSDimitry Andric const TargetRegisterClass *SrcWithSubRC 23035ffd83dbSDimitry Andric = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx); 23045ffd83dbSDimitry Andric if (!SrcWithSubRC) 23050b57cec5SDimitry Andric return false; 23060b57cec5SDimitry Andric 23075ffd83dbSDimitry Andric if (SrcWithSubRC != SrcRC) { 23085ffd83dbSDimitry Andric if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI)) 23095ffd83dbSDimitry Andric return false; 23100b57cec5SDimitry Andric } 23110b57cec5SDimitry Andric 23125ffd83dbSDimitry Andric I.getOperand(1).setSubReg(SubRegIdx); 23130b57cec5SDimitry Andric } 23140b57cec5SDimitry Andric 23150b57cec5SDimitry Andric I.setDesc(TII.get(TargetOpcode::COPY)); 23160b57cec5SDimitry Andric return true; 23170b57cec5SDimitry Andric } 23180b57cec5SDimitry Andric 23190b57cec5SDimitry Andric /// \returns true if a bitmask for \p Size bits will be an inline immediate. 23200b57cec5SDimitry Andric static bool shouldUseAndMask(unsigned Size, unsigned &Mask) { 23210b57cec5SDimitry Andric Mask = maskTrailingOnes<unsigned>(Size); 23220b57cec5SDimitry Andric int SignedMask = static_cast<int>(Mask); 23230b57cec5SDimitry Andric return SignedMask >= -16 && SignedMask <= 64; 23240b57cec5SDimitry Andric } 23250b57cec5SDimitry Andric 2326480093f4SDimitry Andric // Like RegisterBankInfo::getRegBank, but don't assume vcc for s1. 2327480093f4SDimitry Andric const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank( 2328480093f4SDimitry Andric Register Reg, const MachineRegisterInfo &MRI, 2329480093f4SDimitry Andric const TargetRegisterInfo &TRI) const { 2330480093f4SDimitry Andric const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 2331480093f4SDimitry Andric if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>()) 2332480093f4SDimitry Andric return RB; 2333480093f4SDimitry Andric 2334480093f4SDimitry Andric // Ignore the type, since we don't use vcc in artifacts. 2335480093f4SDimitry Andric if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>()) 2336480093f4SDimitry Andric return &RBI.getRegBankFromRegClass(*RC, LLT()); 2337480093f4SDimitry Andric return nullptr; 2338480093f4SDimitry Andric } 2339480093f4SDimitry Andric 23400b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { 23415ffd83dbSDimitry Andric bool InReg = I.getOpcode() == AMDGPU::G_SEXT_INREG; 23425ffd83dbSDimitry Andric bool Signed = I.getOpcode() == AMDGPU::G_SEXT || InReg; 23430b57cec5SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 23440b57cec5SDimitry Andric MachineBasicBlock &MBB = *I.getParent(); 23458bcb0991SDimitry Andric const Register DstReg = I.getOperand(0).getReg(); 23468bcb0991SDimitry Andric const Register SrcReg = I.getOperand(1).getReg(); 23470b57cec5SDimitry Andric 23488bcb0991SDimitry Andric const LLT DstTy = MRI->getType(DstReg); 23498bcb0991SDimitry Andric const LLT SrcTy = MRI->getType(SrcReg); 23505ffd83dbSDimitry Andric const unsigned SrcSize = I.getOpcode() == AMDGPU::G_SEXT_INREG ? 23515ffd83dbSDimitry Andric I.getOperand(2).getImm() : SrcTy.getSizeInBits(); 23520b57cec5SDimitry Andric const unsigned DstSize = DstTy.getSizeInBits(); 23530b57cec5SDimitry Andric if (!DstTy.isScalar()) 23540b57cec5SDimitry Andric return false; 23550b57cec5SDimitry Andric 2356480093f4SDimitry Andric // Artifact casts should never use vcc. 2357480093f4SDimitry Andric const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI); 2358480093f4SDimitry Andric 2359e8d8bef9SDimitry Andric // FIXME: This should probably be illegal and split earlier. 2360e8d8bef9SDimitry Andric if (I.getOpcode() == AMDGPU::G_ANYEXT) { 2361e8d8bef9SDimitry Andric if (DstSize <= 32) 2362e8d8bef9SDimitry Andric return selectCOPY(I); 2363e8d8bef9SDimitry Andric 2364e8d8bef9SDimitry Andric const TargetRegisterClass *SrcRC = 236581ad6265SDimitry Andric TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank); 2366e8d8bef9SDimitry Andric const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI); 2367e8d8bef9SDimitry Andric const TargetRegisterClass *DstRC = 236881ad6265SDimitry Andric TRI.getRegClassForSizeOnBank(DstSize, *DstBank); 2369e8d8bef9SDimitry Andric 2370e8d8bef9SDimitry Andric Register UndefReg = MRI->createVirtualRegister(SrcRC); 2371e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); 2372e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 2373e8d8bef9SDimitry Andric .addReg(SrcReg) 2374e8d8bef9SDimitry Andric .addImm(AMDGPU::sub0) 2375e8d8bef9SDimitry Andric .addReg(UndefReg) 2376e8d8bef9SDimitry Andric .addImm(AMDGPU::sub1); 2377e8d8bef9SDimitry Andric I.eraseFromParent(); 2378e8d8bef9SDimitry Andric 2379e8d8bef9SDimitry Andric return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) && 2380e8d8bef9SDimitry Andric RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI); 2381e8d8bef9SDimitry Andric } 2382e8d8bef9SDimitry Andric 23830b57cec5SDimitry Andric if (SrcBank->getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) { 23840b57cec5SDimitry Andric // 64-bit should have been split up in RegBankSelect 23850b57cec5SDimitry Andric 23860b57cec5SDimitry Andric // Try to use an and with a mask if it will save code size. 23870b57cec5SDimitry Andric unsigned Mask; 23880b57cec5SDimitry Andric if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 23890b57cec5SDimitry Andric MachineInstr *ExtI = 23900b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII.get(AMDGPU::V_AND_B32_e32), DstReg) 23910b57cec5SDimitry Andric .addImm(Mask) 23920b57cec5SDimitry Andric .addReg(SrcReg); 23938bcb0991SDimitry Andric I.eraseFromParent(); 23940b57cec5SDimitry Andric return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 23950b57cec5SDimitry Andric } 23960b57cec5SDimitry Andric 2397e8d8bef9SDimitry Andric const unsigned BFE = Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64; 23980b57cec5SDimitry Andric MachineInstr *ExtI = 23990b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII.get(BFE), DstReg) 24000b57cec5SDimitry Andric .addReg(SrcReg) 24010b57cec5SDimitry Andric .addImm(0) // Offset 24020b57cec5SDimitry Andric .addImm(SrcSize); // Width 24038bcb0991SDimitry Andric I.eraseFromParent(); 24040b57cec5SDimitry Andric return constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 24050b57cec5SDimitry Andric } 24060b57cec5SDimitry Andric 24070b57cec5SDimitry Andric if (SrcBank->getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) { 24085ffd83dbSDimitry Andric const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ? 24095ffd83dbSDimitry Andric AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass; 24105ffd83dbSDimitry Andric if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI)) 24110b57cec5SDimitry Andric return false; 24120b57cec5SDimitry Andric 24130b57cec5SDimitry Andric if (Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) { 24140b57cec5SDimitry Andric const unsigned SextOpc = SrcSize == 8 ? 24150b57cec5SDimitry Andric AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16; 24160b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII.get(SextOpc), DstReg) 24170b57cec5SDimitry Andric .addReg(SrcReg); 24188bcb0991SDimitry Andric I.eraseFromParent(); 24198bcb0991SDimitry Andric return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI); 24200b57cec5SDimitry Andric } 24210b57cec5SDimitry Andric 2422bdd1243dSDimitry Andric // Using a single 32-bit SALU to calculate the high half is smaller than 2423bdd1243dSDimitry Andric // S_BFE with a literal constant operand. 2424bdd1243dSDimitry Andric if (DstSize > 32 && SrcSize == 32) { 2425bdd1243dSDimitry Andric Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 2426bdd1243dSDimitry Andric unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister; 2427bdd1243dSDimitry Andric if (Signed) { 2428bdd1243dSDimitry Andric BuildMI(MBB, I, DL, TII.get(AMDGPU::S_ASHR_I32), HiReg) 2429bdd1243dSDimitry Andric .addReg(SrcReg, 0, SubReg) 24305f757f3fSDimitry Andric .addImm(31) 24315f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 2432bdd1243dSDimitry Andric } else { 2433bdd1243dSDimitry Andric BuildMI(MBB, I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg) 2434bdd1243dSDimitry Andric .addImm(0); 2435bdd1243dSDimitry Andric } 2436bdd1243dSDimitry Andric BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 2437bdd1243dSDimitry Andric .addReg(SrcReg, 0, SubReg) 2438bdd1243dSDimitry Andric .addImm(AMDGPU::sub0) 2439bdd1243dSDimitry Andric .addReg(HiReg) 2440bdd1243dSDimitry Andric .addImm(AMDGPU::sub1); 2441bdd1243dSDimitry Andric I.eraseFromParent(); 2442bdd1243dSDimitry Andric return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, 2443bdd1243dSDimitry Andric *MRI); 2444bdd1243dSDimitry Andric } 2445bdd1243dSDimitry Andric 24460b57cec5SDimitry Andric const unsigned BFE64 = Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64; 24470b57cec5SDimitry Andric const unsigned BFE32 = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; 24480b57cec5SDimitry Andric 24490b57cec5SDimitry Andric // Scalar BFE is encoded as S1[5:0] = offset, S1[22:16]= width. 24505ffd83dbSDimitry Andric if (DstSize > 32 && (SrcSize <= 32 || InReg)) { 24510b57cec5SDimitry Andric // We need a 64-bit register source, but the high bits don't matter. 24528bcb0991SDimitry Andric Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); 24538bcb0991SDimitry Andric Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 2454bdd1243dSDimitry Andric unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister; 24555ffd83dbSDimitry Andric 24560b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg); 24570b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), ExtReg) 24585ffd83dbSDimitry Andric .addReg(SrcReg, 0, SubReg) 24590b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 24600b57cec5SDimitry Andric .addReg(UndefReg) 24610b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 24620b57cec5SDimitry Andric 24630b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII.get(BFE64), DstReg) 24640b57cec5SDimitry Andric .addReg(ExtReg) 24650b57cec5SDimitry Andric .addImm(SrcSize << 16); 24660b57cec5SDimitry Andric 24678bcb0991SDimitry Andric I.eraseFromParent(); 24688bcb0991SDimitry Andric return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI); 24690b57cec5SDimitry Andric } 24700b57cec5SDimitry Andric 24710b57cec5SDimitry Andric unsigned Mask; 24720b57cec5SDimitry Andric if (!Signed && shouldUseAndMask(SrcSize, Mask)) { 24730b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII.get(AMDGPU::S_AND_B32), DstReg) 24740b57cec5SDimitry Andric .addReg(SrcReg) 24755f757f3fSDimitry Andric .addImm(Mask) 24765f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 24770b57cec5SDimitry Andric } else { 24780b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII.get(BFE32), DstReg) 24790b57cec5SDimitry Andric .addReg(SrcReg) 24800b57cec5SDimitry Andric .addImm(SrcSize << 16); 24810b57cec5SDimitry Andric } 24820b57cec5SDimitry Andric 24838bcb0991SDimitry Andric I.eraseFromParent(); 24848bcb0991SDimitry Andric return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI); 24850b57cec5SDimitry Andric } 24860b57cec5SDimitry Andric 24870b57cec5SDimitry Andric return false; 24880b57cec5SDimitry Andric } 24890b57cec5SDimitry Andric 24905f757f3fSDimitry Andric static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, 24915f757f3fSDimitry Andric Register &Out) { 24925f757f3fSDimitry Andric Register LShlSrc; 24935f757f3fSDimitry Andric if (mi_match(In, MRI, 24945f757f3fSDimitry Andric m_GTrunc(m_GLShr(m_Reg(LShlSrc), m_SpecificICst(16))))) { 24955f757f3fSDimitry Andric Out = LShlSrc; 24965f757f3fSDimitry Andric return true; 24975f757f3fSDimitry Andric } 24985f757f3fSDimitry Andric return false; 24995f757f3fSDimitry Andric } 25005f757f3fSDimitry Andric 25015f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectG_FPEXT(MachineInstr &I) const { 25025f757f3fSDimitry Andric if (!Subtarget->hasSALUFloatInsts()) 25035f757f3fSDimitry Andric return false; 25045f757f3fSDimitry Andric 25055f757f3fSDimitry Andric Register Dst = I.getOperand(0).getReg(); 25065f757f3fSDimitry Andric const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI); 25075f757f3fSDimitry Andric if (DstRB->getID() != AMDGPU::SGPRRegBankID) 25085f757f3fSDimitry Andric return false; 25095f757f3fSDimitry Andric 25105f757f3fSDimitry Andric Register Src = I.getOperand(1).getReg(); 25115f757f3fSDimitry Andric 25125f757f3fSDimitry Andric if (MRI->getType(Dst) == LLT::scalar(32) && 25135f757f3fSDimitry Andric MRI->getType(Src) == LLT::scalar(16)) { 25145f757f3fSDimitry Andric if (isExtractHiElt(*MRI, Src, Src)) { 25155f757f3fSDimitry Andric MachineBasicBlock *BB = I.getParent(); 25165f757f3fSDimitry Andric BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst) 25175f757f3fSDimitry Andric .addUse(Src); 25185f757f3fSDimitry Andric I.eraseFromParent(); 25195f757f3fSDimitry Andric return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI); 25205f757f3fSDimitry Andric } 25215f757f3fSDimitry Andric } 25225f757f3fSDimitry Andric 25235f757f3fSDimitry Andric return false; 25245f757f3fSDimitry Andric } 25255f757f3fSDimitry Andric 25260b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { 25270b57cec5SDimitry Andric MachineBasicBlock *BB = I.getParent(); 25280b57cec5SDimitry Andric MachineOperand &ImmOp = I.getOperand(1); 2529e8d8bef9SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 2530e8d8bef9SDimitry Andric unsigned Size = MRI->getType(DstReg).getSizeInBits(); 25315f757f3fSDimitry Andric bool IsFP = false; 25320b57cec5SDimitry Andric 25330b57cec5SDimitry Andric // The AMDGPU backend only supports Imm operands and not CImm or FPImm. 25340b57cec5SDimitry Andric if (ImmOp.isFPImm()) { 25350b57cec5SDimitry Andric const APInt &Imm = ImmOp.getFPImm()->getValueAPF().bitcastToAPInt(); 25360b57cec5SDimitry Andric ImmOp.ChangeToImmediate(Imm.getZExtValue()); 25375f757f3fSDimitry Andric IsFP = true; 25380b57cec5SDimitry Andric } else if (ImmOp.isCImm()) { 2539e8d8bef9SDimitry Andric ImmOp.ChangeToImmediate(ImmOp.getCImm()->getSExtValue()); 25400b57cec5SDimitry Andric } else { 2541e8d8bef9SDimitry Andric llvm_unreachable("Not supported by g_constants"); 25420b57cec5SDimitry Andric } 25430b57cec5SDimitry Andric 2544e8d8bef9SDimitry Andric const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 2545e8d8bef9SDimitry Andric const bool IsSgpr = DstRB->getID() == AMDGPU::SGPRRegBankID; 25460b57cec5SDimitry Andric 2547e8d8bef9SDimitry Andric unsigned Opcode; 2548e8d8bef9SDimitry Andric if (DstRB->getID() == AMDGPU::VCCRegBankID) { 2549e8d8bef9SDimitry Andric Opcode = STI.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; 25505f757f3fSDimitry Andric } else if (Size == 64 && 25515f757f3fSDimitry Andric AMDGPU::isValid32BitLiteral(I.getOperand(1).getImm(), IsFP)) { 25525f757f3fSDimitry Andric Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO; 25535f757f3fSDimitry Andric I.setDesc(TII.get(Opcode)); 25545f757f3fSDimitry Andric I.addImplicitDefUseOperands(*MF); 25555f757f3fSDimitry Andric return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2556e8d8bef9SDimitry Andric } else { 2557e8d8bef9SDimitry Andric Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 2558e8d8bef9SDimitry Andric 2559e8d8bef9SDimitry Andric // We should never produce s1 values on banks other than VCC. If the user of 2560e8d8bef9SDimitry Andric // this already constrained the register, we may incorrectly think it's VCC 2561e8d8bef9SDimitry Andric // if it wasn't originally. 2562e8d8bef9SDimitry Andric if (Size == 1) 2563e8d8bef9SDimitry Andric return false; 2564e8d8bef9SDimitry Andric } 2565e8d8bef9SDimitry Andric 2566e8d8bef9SDimitry Andric if (Size != 64) { 25670b57cec5SDimitry Andric I.setDesc(TII.get(Opcode)); 25680b57cec5SDimitry Andric I.addImplicitDefUseOperands(*MF); 25690b57cec5SDimitry Andric return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 25700b57cec5SDimitry Andric } 25710b57cec5SDimitry Andric 25728bcb0991SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 25738bcb0991SDimitry Andric 25748bcb0991SDimitry Andric APInt Imm(Size, I.getOperand(1).getImm()); 25758bcb0991SDimitry Andric 25768bcb0991SDimitry Andric MachineInstr *ResInst; 25778bcb0991SDimitry Andric if (IsSgpr && TII.isInlineConstant(Imm)) { 25788bcb0991SDimitry Andric ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg) 25798bcb0991SDimitry Andric .addImm(I.getOperand(1).getImm()); 25808bcb0991SDimitry Andric } else { 25818bcb0991SDimitry Andric const TargetRegisterClass *RC = IsSgpr ? 25828bcb0991SDimitry Andric &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass; 25838bcb0991SDimitry Andric Register LoReg = MRI->createVirtualRegister(RC); 25848bcb0991SDimitry Andric Register HiReg = MRI->createVirtualRegister(RC); 25850b57cec5SDimitry Andric 25860b57cec5SDimitry Andric BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg) 25870b57cec5SDimitry Andric .addImm(Imm.trunc(32).getZExtValue()); 25880b57cec5SDimitry Andric 25890b57cec5SDimitry Andric BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg) 25900b57cec5SDimitry Andric .addImm(Imm.ashr(32).getZExtValue()); 25910b57cec5SDimitry Andric 25928bcb0991SDimitry Andric ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 25930b57cec5SDimitry Andric .addReg(LoReg) 25940b57cec5SDimitry Andric .addImm(AMDGPU::sub0) 25950b57cec5SDimitry Andric .addReg(HiReg) 25960b57cec5SDimitry Andric .addImm(AMDGPU::sub1); 25978bcb0991SDimitry Andric } 25980b57cec5SDimitry Andric 25990b57cec5SDimitry Andric // We can't call constrainSelectedInstRegOperands here, because it doesn't 26000b57cec5SDimitry Andric // work for target independent opcodes 26010b57cec5SDimitry Andric I.eraseFromParent(); 26020b57cec5SDimitry Andric const TargetRegisterClass *DstRC = 26038bcb0991SDimitry Andric TRI.getConstrainedRegClassForOperand(ResInst->getOperand(0), *MRI); 26040b57cec5SDimitry Andric if (!DstRC) 26050b57cec5SDimitry Andric return true; 26068bcb0991SDimitry Andric return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI); 26070b57cec5SDimitry Andric } 26080b57cec5SDimitry Andric 26095ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const { 26105ffd83dbSDimitry Andric // Only manually handle the f64 SGPR case. 26115ffd83dbSDimitry Andric // 26125ffd83dbSDimitry Andric // FIXME: This is a workaround for 2.5 different tablegen problems. Because 26135ffd83dbSDimitry Andric // the bit ops theoretically have a second result due to the implicit def of 26145ffd83dbSDimitry Andric // SCC, the GlobalISelEmitter is overly conservative and rejects it. Fixing 26155ffd83dbSDimitry Andric // that is easy by disabling the check. The result works, but uses a 26165ffd83dbSDimitry Andric // nonsensical sreg32orlds_and_sreg_1 regclass. 26175ffd83dbSDimitry Andric // 26185ffd83dbSDimitry Andric // The DAG emitter is more problematic, and incorrectly adds both S_XOR_B32 to 26195ffd83dbSDimitry Andric // the variadic REG_SEQUENCE operands. 26205ffd83dbSDimitry Andric 26215ffd83dbSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 26225ffd83dbSDimitry Andric const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI); 26235ffd83dbSDimitry Andric if (DstRB->getID() != AMDGPU::SGPRRegBankID || 26245ffd83dbSDimitry Andric MRI->getType(Dst) != LLT::scalar(64)) 26255ffd83dbSDimitry Andric return false; 26265ffd83dbSDimitry Andric 26275ffd83dbSDimitry Andric Register Src = MI.getOperand(1).getReg(); 26285ffd83dbSDimitry Andric MachineInstr *Fabs = getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI); 26295ffd83dbSDimitry Andric if (Fabs) 26305ffd83dbSDimitry Andric Src = Fabs->getOperand(1).getReg(); 26315ffd83dbSDimitry Andric 26325ffd83dbSDimitry Andric if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) || 26335ffd83dbSDimitry Andric !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI)) 26345ffd83dbSDimitry Andric return false; 26355ffd83dbSDimitry Andric 26365ffd83dbSDimitry Andric MachineBasicBlock *BB = MI.getParent(); 26375ffd83dbSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 26385ffd83dbSDimitry Andric Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 26395ffd83dbSDimitry Andric Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 26405ffd83dbSDimitry Andric Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 26415ffd83dbSDimitry Andric Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 26425ffd83dbSDimitry Andric 26435ffd83dbSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg) 26445ffd83dbSDimitry Andric .addReg(Src, 0, AMDGPU::sub0); 26455ffd83dbSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg) 26465ffd83dbSDimitry Andric .addReg(Src, 0, AMDGPU::sub1); 26475ffd83dbSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg) 26485ffd83dbSDimitry Andric .addImm(0x80000000); 26495ffd83dbSDimitry Andric 26505ffd83dbSDimitry Andric // Set or toggle sign bit. 26515ffd83dbSDimitry Andric unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32; 26525ffd83dbSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(Opc), OpReg) 26535ffd83dbSDimitry Andric .addReg(HiReg) 26545f757f3fSDimitry Andric .addReg(ConstReg) 26555f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 26565ffd83dbSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst) 26575ffd83dbSDimitry Andric .addReg(LoReg) 26585ffd83dbSDimitry Andric .addImm(AMDGPU::sub0) 26595ffd83dbSDimitry Andric .addReg(OpReg) 26605ffd83dbSDimitry Andric .addImm(AMDGPU::sub1); 26615ffd83dbSDimitry Andric MI.eraseFromParent(); 26625ffd83dbSDimitry Andric return true; 26635ffd83dbSDimitry Andric } 26645ffd83dbSDimitry Andric 26655ffd83dbSDimitry Andric // FIXME: This is a workaround for the same tablegen problems as G_FNEG 26665ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const { 26675ffd83dbSDimitry Andric Register Dst = MI.getOperand(0).getReg(); 26685ffd83dbSDimitry Andric const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI); 26695ffd83dbSDimitry Andric if (DstRB->getID() != AMDGPU::SGPRRegBankID || 26705ffd83dbSDimitry Andric MRI->getType(Dst) != LLT::scalar(64)) 26715ffd83dbSDimitry Andric return false; 26725ffd83dbSDimitry Andric 26735ffd83dbSDimitry Andric Register Src = MI.getOperand(1).getReg(); 26745ffd83dbSDimitry Andric MachineBasicBlock *BB = MI.getParent(); 26755ffd83dbSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 26765ffd83dbSDimitry Andric Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 26775ffd83dbSDimitry Andric Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 26785ffd83dbSDimitry Andric Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 26795ffd83dbSDimitry Andric Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 26805ffd83dbSDimitry Andric 26815ffd83dbSDimitry Andric if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) || 26825ffd83dbSDimitry Andric !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI)) 26835ffd83dbSDimitry Andric return false; 26845ffd83dbSDimitry Andric 26855ffd83dbSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg) 26865ffd83dbSDimitry Andric .addReg(Src, 0, AMDGPU::sub0); 26875ffd83dbSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg) 26885ffd83dbSDimitry Andric .addReg(Src, 0, AMDGPU::sub1); 26895ffd83dbSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg) 26905ffd83dbSDimitry Andric .addImm(0x7fffffff); 26915ffd83dbSDimitry Andric 26925ffd83dbSDimitry Andric // Clear sign bit. 26935ffd83dbSDimitry Andric // TODO: Should this used S_BITSET0_*? 26945ffd83dbSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_AND_B32), OpReg) 26955ffd83dbSDimitry Andric .addReg(HiReg) 26965f757f3fSDimitry Andric .addReg(ConstReg) 26975f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 26985ffd83dbSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst) 26995ffd83dbSDimitry Andric .addReg(LoReg) 27005ffd83dbSDimitry Andric .addImm(AMDGPU::sub0) 27015ffd83dbSDimitry Andric .addReg(OpReg) 27025ffd83dbSDimitry Andric .addImm(AMDGPU::sub1); 27035ffd83dbSDimitry Andric 27045ffd83dbSDimitry Andric MI.eraseFromParent(); 27055ffd83dbSDimitry Andric return true; 27065ffd83dbSDimitry Andric } 27075ffd83dbSDimitry Andric 27080b57cec5SDimitry Andric static bool isConstant(const MachineInstr &MI) { 27090b57cec5SDimitry Andric return MI.getOpcode() == TargetOpcode::G_CONSTANT; 27100b57cec5SDimitry Andric } 27110b57cec5SDimitry Andric 27120b57cec5SDimitry Andric void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, 27130b57cec5SDimitry Andric const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const { 27140b57cec5SDimitry Andric 27151db9f3b2SDimitry Andric unsigned OpNo = Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1; 27161db9f3b2SDimitry Andric const MachineInstr *PtrMI = 27171db9f3b2SDimitry Andric MRI.getUniqueVRegDef(Load.getOperand(OpNo).getReg()); 27180b57cec5SDimitry Andric 27190b57cec5SDimitry Andric assert(PtrMI); 27200b57cec5SDimitry Andric 2721480093f4SDimitry Andric if (PtrMI->getOpcode() != TargetOpcode::G_PTR_ADD) 27220b57cec5SDimitry Andric return; 27230b57cec5SDimitry Andric 2724fcaf7f86SDimitry Andric GEPInfo GEPInfo; 27250b57cec5SDimitry Andric 27268bcb0991SDimitry Andric for (unsigned i = 1; i != 3; ++i) { 27270b57cec5SDimitry Andric const MachineOperand &GEPOp = PtrMI->getOperand(i); 27280b57cec5SDimitry Andric const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); 27290b57cec5SDimitry Andric assert(OpDef); 27308bcb0991SDimitry Andric if (i == 2 && isConstant(*OpDef)) { 27318bcb0991SDimitry Andric // TODO: Could handle constant base + variable offset, but a combine 27328bcb0991SDimitry Andric // probably should have commuted it. 27330b57cec5SDimitry Andric assert(GEPInfo.Imm == 0); 27340b57cec5SDimitry Andric GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue(); 27350b57cec5SDimitry Andric continue; 27360b57cec5SDimitry Andric } 27370b57cec5SDimitry Andric const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); 27380b57cec5SDimitry Andric if (OpBank->getID() == AMDGPU::SGPRRegBankID) 27390b57cec5SDimitry Andric GEPInfo.SgprParts.push_back(GEPOp.getReg()); 27400b57cec5SDimitry Andric else 27410b57cec5SDimitry Andric GEPInfo.VgprParts.push_back(GEPOp.getReg()); 27420b57cec5SDimitry Andric } 27430b57cec5SDimitry Andric 27440b57cec5SDimitry Andric AddrInfo.push_back(GEPInfo); 27450b57cec5SDimitry Andric getAddrModeInfo(*PtrMI, MRI, AddrInfo); 27460b57cec5SDimitry Andric } 27470b57cec5SDimitry Andric 2748e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::isSGPR(Register Reg) const { 2749e8d8bef9SDimitry Andric return RBI.getRegBank(Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID; 2750e8d8bef9SDimitry Andric } 2751e8d8bef9SDimitry Andric 27520b57cec5SDimitry Andric bool AMDGPUInstructionSelector::isInstrUniform(const MachineInstr &MI) const { 27530b57cec5SDimitry Andric if (!MI.hasOneMemOperand()) 27540b57cec5SDimitry Andric return false; 27550b57cec5SDimitry Andric 27560b57cec5SDimitry Andric const MachineMemOperand *MMO = *MI.memoperands_begin(); 27570b57cec5SDimitry Andric const Value *Ptr = MMO->getValue(); 27580b57cec5SDimitry Andric 27590b57cec5SDimitry Andric // UndefValue means this is a load of a kernel input. These are uniform. 27600b57cec5SDimitry Andric // Sometimes LDS instructions have constant pointers. 27610b57cec5SDimitry Andric // If Ptr is null, then that means this mem operand contains a 27620b57cec5SDimitry Andric // PseudoSourceValue like GOT. 27630b57cec5SDimitry Andric if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 27640b57cec5SDimitry Andric isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 27650b57cec5SDimitry Andric return true; 27660b57cec5SDimitry Andric 27670b57cec5SDimitry Andric if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) 27680b57cec5SDimitry Andric return true; 27690b57cec5SDimitry Andric 27701db9f3b2SDimitry Andric if (MI.getOpcode() == AMDGPU::G_PREFETCH) 27711db9f3b2SDimitry Andric return RBI.getRegBank(MI.getOperand(0).getReg(), *MRI, TRI)->getID() == 27721db9f3b2SDimitry Andric AMDGPU::SGPRRegBankID; 27731db9f3b2SDimitry Andric 27740b57cec5SDimitry Andric const Instruction *I = dyn_cast<Instruction>(Ptr); 27750b57cec5SDimitry Andric return I && I->getMetadata("amdgpu.uniform"); 27760b57cec5SDimitry Andric } 27770b57cec5SDimitry Andric 27780b57cec5SDimitry Andric bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const { 27790b57cec5SDimitry Andric for (const GEPInfo &GEPInfo : AddrInfo) { 27800b57cec5SDimitry Andric if (!GEPInfo.VgprParts.empty()) 27810b57cec5SDimitry Andric return true; 27820b57cec5SDimitry Andric } 27830b57cec5SDimitry Andric return false; 27840b57cec5SDimitry Andric } 27850b57cec5SDimitry Andric 27868bcb0991SDimitry Andric void AMDGPUInstructionSelector::initM0(MachineInstr &I) const { 27878bcb0991SDimitry Andric const LLT PtrTy = MRI->getType(I.getOperand(1).getReg()); 27888bcb0991SDimitry Andric unsigned AS = PtrTy.getAddressSpace(); 27898bcb0991SDimitry Andric if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) && 27908bcb0991SDimitry Andric STI.ldsRequiresM0Init()) { 2791e8d8bef9SDimitry Andric MachineBasicBlock *BB = I.getParent(); 2792e8d8bef9SDimitry Andric 2793349cc55cSDimitry Andric // If DS instructions require M0 initialization, insert it before selecting. 27948bcb0991SDimitry Andric BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) 27958bcb0991SDimitry Andric .addImm(-1); 27968bcb0991SDimitry Andric } 27978bcb0991SDimitry Andric } 27988bcb0991SDimitry Andric 2799e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW( 2800e8d8bef9SDimitry Andric MachineInstr &I) const { 28018bcb0991SDimitry Andric initM0(I); 28028bcb0991SDimitry Andric return selectImpl(I, *CoverageInfo); 28030b57cec5SDimitry Andric } 28040b57cec5SDimitry Andric 2805349cc55cSDimitry Andric static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) { 2806349cc55cSDimitry Andric if (Reg.isPhysical()) 2807349cc55cSDimitry Andric return false; 2808349cc55cSDimitry Andric 2809349cc55cSDimitry Andric MachineInstr &MI = *MRI.getUniqueVRegDef(Reg); 2810349cc55cSDimitry Andric const unsigned Opcode = MI.getOpcode(); 2811349cc55cSDimitry Andric 2812349cc55cSDimitry Andric if (Opcode == AMDGPU::COPY) 2813349cc55cSDimitry Andric return isVCmpResult(MI.getOperand(1).getReg(), MRI); 2814349cc55cSDimitry Andric 2815349cc55cSDimitry Andric if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR || 2816349cc55cSDimitry Andric Opcode == AMDGPU::G_XOR) 2817349cc55cSDimitry Andric return isVCmpResult(MI.getOperand(1).getReg(), MRI) && 2818349cc55cSDimitry Andric isVCmpResult(MI.getOperand(2).getReg(), MRI); 2819349cc55cSDimitry Andric 28205f757f3fSDimitry Andric if (auto *GI = dyn_cast<GIntrinsic>(&MI)) 28215f757f3fSDimitry Andric return GI->is(Intrinsic::amdgcn_class); 2822349cc55cSDimitry Andric 2823349cc55cSDimitry Andric return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP; 2824349cc55cSDimitry Andric } 2825349cc55cSDimitry Andric 28260b57cec5SDimitry Andric bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { 28270b57cec5SDimitry Andric MachineBasicBlock *BB = I.getParent(); 28280b57cec5SDimitry Andric MachineOperand &CondOp = I.getOperand(0); 28290b57cec5SDimitry Andric Register CondReg = CondOp.getReg(); 28300b57cec5SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 28310b57cec5SDimitry Andric 28320b57cec5SDimitry Andric unsigned BrOpcode; 28330b57cec5SDimitry Andric Register CondPhysReg; 28340b57cec5SDimitry Andric const TargetRegisterClass *ConstrainRC; 28350b57cec5SDimitry Andric 28360b57cec5SDimitry Andric // In SelectionDAG, we inspect the IR block for uniformity metadata to decide 28370b57cec5SDimitry Andric // whether the branch is uniform when selecting the instruction. In 28380b57cec5SDimitry Andric // GlobalISel, we should push that decision into RegBankSelect. Assume for now 28390b57cec5SDimitry Andric // RegBankSelect knows what it's doing if the branch condition is scc, even 28400b57cec5SDimitry Andric // though it currently does not. 2841480093f4SDimitry Andric if (!isVCC(CondReg, *MRI)) { 2842480093f4SDimitry Andric if (MRI->getType(CondReg) != LLT::scalar(32)) 2843480093f4SDimitry Andric return false; 2844480093f4SDimitry Andric 28450b57cec5SDimitry Andric CondPhysReg = AMDGPU::SCC; 28460b57cec5SDimitry Andric BrOpcode = AMDGPU::S_CBRANCH_SCC1; 2847e8d8bef9SDimitry Andric ConstrainRC = &AMDGPU::SReg_32RegClass; 2848480093f4SDimitry Andric } else { 28490b57cec5SDimitry Andric // FIXME: Should scc->vcc copies and with exec? 2850349cc55cSDimitry Andric 2851349cc55cSDimitry Andric // Unless the value of CondReg is a result of a V_CMP* instruction then we 2852349cc55cSDimitry Andric // need to insert an and with exec. 2853349cc55cSDimitry Andric if (!isVCmpResult(CondReg, *MRI)) { 2854349cc55cSDimitry Andric const bool Is64 = STI.isWave64(); 2855349cc55cSDimitry Andric const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32; 2856349cc55cSDimitry Andric const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO; 2857349cc55cSDimitry Andric 2858349cc55cSDimitry Andric Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC()); 2859349cc55cSDimitry Andric BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg) 2860349cc55cSDimitry Andric .addReg(CondReg) 28615f757f3fSDimitry Andric .addReg(Exec) 28625f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 2863349cc55cSDimitry Andric CondReg = TmpReg; 2864349cc55cSDimitry Andric } 2865349cc55cSDimitry Andric 28660b57cec5SDimitry Andric CondPhysReg = TRI.getVCC(); 28670b57cec5SDimitry Andric BrOpcode = AMDGPU::S_CBRANCH_VCCNZ; 28680b57cec5SDimitry Andric ConstrainRC = TRI.getBoolRC(); 2869480093f4SDimitry Andric } 28700b57cec5SDimitry Andric 28718bcb0991SDimitry Andric if (!MRI->getRegClassOrNull(CondReg)) 28728bcb0991SDimitry Andric MRI->setRegClass(CondReg, ConstrainRC); 28730b57cec5SDimitry Andric 28740b57cec5SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CondPhysReg) 28750b57cec5SDimitry Andric .addReg(CondReg); 28760b57cec5SDimitry Andric BuildMI(*BB, &I, DL, TII.get(BrOpcode)) 28770b57cec5SDimitry Andric .addMBB(I.getOperand(1).getMBB()); 28780b57cec5SDimitry Andric 28790b57cec5SDimitry Andric I.eraseFromParent(); 28800b57cec5SDimitry Andric return true; 28810b57cec5SDimitry Andric } 28820b57cec5SDimitry Andric 2883e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE( 28845ffd83dbSDimitry Andric MachineInstr &I) const { 28850b57cec5SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 28868bcb0991SDimitry Andric const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 28870b57cec5SDimitry Andric const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; 28880b57cec5SDimitry Andric I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32)); 28890b57cec5SDimitry Andric if (IsVGPR) 28900b57cec5SDimitry Andric I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); 28910b57cec5SDimitry Andric 28920b57cec5SDimitry Andric return RBI.constrainGenericRegister( 28938bcb0991SDimitry Andric DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI); 28940b57cec5SDimitry Andric } 28950b57cec5SDimitry Andric 28965ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const { 28978bcb0991SDimitry Andric Register DstReg = I.getOperand(0).getReg(); 28988bcb0991SDimitry Andric Register SrcReg = I.getOperand(1).getReg(); 28995ffd83dbSDimitry Andric Register MaskReg = I.getOperand(2).getReg(); 29005ffd83dbSDimitry Andric LLT Ty = MRI->getType(DstReg); 29015ffd83dbSDimitry Andric LLT MaskTy = MRI->getType(MaskReg); 290204eeddc0SDimitry Andric MachineBasicBlock *BB = I.getParent(); 290304eeddc0SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 29048bcb0991SDimitry Andric 29058bcb0991SDimitry Andric const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 29068bcb0991SDimitry Andric const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); 29075ffd83dbSDimitry Andric const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI); 29088bcb0991SDimitry Andric const bool IsVGPR = DstRB->getID() == AMDGPU::VGPRRegBankID; 29095ffd83dbSDimitry Andric if (DstRB != SrcRB) // Should only happen for hand written MIR. 29105ffd83dbSDimitry Andric return false; 29115ffd83dbSDimitry Andric 291204eeddc0SDimitry Andric // Try to avoid emitting a bit operation when we only need to touch half of 291304eeddc0SDimitry Andric // the 64-bit pointer. 291406c3fb27SDimitry Andric APInt MaskOnes = KB->getKnownOnes(MaskReg).zext(64); 291504eeddc0SDimitry Andric const APInt MaskHi32 = APInt::getHighBitsSet(64, 32); 291604eeddc0SDimitry Andric const APInt MaskLo32 = APInt::getLowBitsSet(64, 32); 291704eeddc0SDimitry Andric 291804eeddc0SDimitry Andric const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32; 291904eeddc0SDimitry Andric const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32; 292004eeddc0SDimitry Andric 292104eeddc0SDimitry Andric if (!IsVGPR && Ty.getSizeInBits() == 64 && 292204eeddc0SDimitry Andric !CanCopyLow32 && !CanCopyHi32) { 292304eeddc0SDimitry Andric auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg) 292404eeddc0SDimitry Andric .addReg(SrcReg) 29255f757f3fSDimitry Andric .addReg(MaskReg) 29265f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 292704eeddc0SDimitry Andric I.eraseFromParent(); 292804eeddc0SDimitry Andric return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 292904eeddc0SDimitry Andric } 293004eeddc0SDimitry Andric 29318bcb0991SDimitry Andric unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32; 29328bcb0991SDimitry Andric const TargetRegisterClass &RegRC 29338bcb0991SDimitry Andric = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; 29348bcb0991SDimitry Andric 293581ad6265SDimitry Andric const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB); 293681ad6265SDimitry Andric const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB); 29375ffd83dbSDimitry Andric const TargetRegisterClass *MaskRC = 293881ad6265SDimitry Andric TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB); 29395ffd83dbSDimitry Andric 29408bcb0991SDimitry Andric if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || 29415ffd83dbSDimitry Andric !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) || 29425ffd83dbSDimitry Andric !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI)) 29438bcb0991SDimitry Andric return false; 29448bcb0991SDimitry Andric 29458bcb0991SDimitry Andric if (Ty.getSizeInBits() == 32) { 29465ffd83dbSDimitry Andric assert(MaskTy.getSizeInBits() == 32 && 29475ffd83dbSDimitry Andric "ptrmask should have been narrowed during legalize"); 29485ffd83dbSDimitry Andric 29495f757f3fSDimitry Andric auto NewOp = BuildMI(*BB, &I, DL, TII.get(NewOpc), DstReg) 29508bcb0991SDimitry Andric .addReg(SrcReg) 29515ffd83dbSDimitry Andric .addReg(MaskReg); 29525f757f3fSDimitry Andric 29535f757f3fSDimitry Andric if (!IsVGPR) 29545f757f3fSDimitry Andric NewOp.setOperandDead(3); // Dead scc 29558bcb0991SDimitry Andric I.eraseFromParent(); 29568bcb0991SDimitry Andric return true; 29578bcb0991SDimitry Andric } 29588bcb0991SDimitry Andric 29598bcb0991SDimitry Andric Register HiReg = MRI->createVirtualRegister(&RegRC); 29608bcb0991SDimitry Andric Register LoReg = MRI->createVirtualRegister(&RegRC); 29618bcb0991SDimitry Andric 29625ffd83dbSDimitry Andric // Extract the subregisters from the source pointer. 29638bcb0991SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), LoReg) 29648bcb0991SDimitry Andric .addReg(SrcReg, 0, AMDGPU::sub0); 29658bcb0991SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), HiReg) 29668bcb0991SDimitry Andric .addReg(SrcReg, 0, AMDGPU::sub1); 29678bcb0991SDimitry Andric 29685ffd83dbSDimitry Andric Register MaskedLo, MaskedHi; 29695ffd83dbSDimitry Andric 297004eeddc0SDimitry Andric if (CanCopyLow32) { 29715ffd83dbSDimitry Andric // If all the bits in the low half are 1, we only need a copy for it. 29725ffd83dbSDimitry Andric MaskedLo = LoReg; 29735ffd83dbSDimitry Andric } else { 29745ffd83dbSDimitry Andric // Extract the mask subregister and apply the and. 29755ffd83dbSDimitry Andric Register MaskLo = MRI->createVirtualRegister(&RegRC); 29765ffd83dbSDimitry Andric MaskedLo = MRI->createVirtualRegister(&RegRC); 29775ffd83dbSDimitry Andric 29785ffd83dbSDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskLo) 29795ffd83dbSDimitry Andric .addReg(MaskReg, 0, AMDGPU::sub0); 29805ffd83dbSDimitry Andric BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedLo) 29818bcb0991SDimitry Andric .addReg(LoReg) 29825ffd83dbSDimitry Andric .addReg(MaskLo); 29835ffd83dbSDimitry Andric } 29845ffd83dbSDimitry Andric 298504eeddc0SDimitry Andric if (CanCopyHi32) { 29865ffd83dbSDimitry Andric // If all the bits in the high half are 1, we only need a copy for it. 29875ffd83dbSDimitry Andric MaskedHi = HiReg; 29885ffd83dbSDimitry Andric } else { 29895ffd83dbSDimitry Andric Register MaskHi = MRI->createVirtualRegister(&RegRC); 29905ffd83dbSDimitry Andric MaskedHi = MRI->createVirtualRegister(&RegRC); 29915ffd83dbSDimitry Andric 29925ffd83dbSDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), MaskHi) 29935ffd83dbSDimitry Andric .addReg(MaskReg, 0, AMDGPU::sub1); 29945ffd83dbSDimitry Andric BuildMI(*BB, &I, DL, TII.get(NewOpc), MaskedHi) 29958bcb0991SDimitry Andric .addReg(HiReg) 29965ffd83dbSDimitry Andric .addReg(MaskHi); 29975ffd83dbSDimitry Andric } 29985ffd83dbSDimitry Andric 29995ffd83dbSDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) 30005ffd83dbSDimitry Andric .addReg(MaskedLo) 30015ffd83dbSDimitry Andric .addImm(AMDGPU::sub0) 30025ffd83dbSDimitry Andric .addReg(MaskedHi) 30038bcb0991SDimitry Andric .addImm(AMDGPU::sub1); 30048bcb0991SDimitry Andric I.eraseFromParent(); 30058bcb0991SDimitry Andric return true; 30068bcb0991SDimitry Andric } 30078bcb0991SDimitry Andric 30085ffd83dbSDimitry Andric /// Return the register to use for the index value, and the subregister to use 30095ffd83dbSDimitry Andric /// for the indirectly accessed register. 30105ffd83dbSDimitry Andric static std::pair<Register, unsigned> 3011bdd1243dSDimitry Andric computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, 3012bdd1243dSDimitry Andric const TargetRegisterClass *SuperRC, Register IdxReg, 3013bdd1243dSDimitry Andric unsigned EltSize, GISelKnownBits &KnownBits) { 30145ffd83dbSDimitry Andric Register IdxBaseReg; 30155ffd83dbSDimitry Andric int Offset; 30165ffd83dbSDimitry Andric 3017bdd1243dSDimitry Andric std::tie(IdxBaseReg, Offset) = 3018bdd1243dSDimitry Andric AMDGPU::getBaseWithConstantOffset(MRI, IdxReg, &KnownBits); 30195ffd83dbSDimitry Andric if (IdxBaseReg == AMDGPU::NoRegister) { 30205ffd83dbSDimitry Andric // This will happen if the index is a known constant. This should ordinarily 30215ffd83dbSDimitry Andric // be legalized out, but handle it as a register just in case. 30225ffd83dbSDimitry Andric assert(Offset == 0); 30235ffd83dbSDimitry Andric IdxBaseReg = IdxReg; 30245ffd83dbSDimitry Andric } 30255ffd83dbSDimitry Andric 30265ffd83dbSDimitry Andric ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SuperRC, EltSize); 30275ffd83dbSDimitry Andric 30285ffd83dbSDimitry Andric // Skip out of bounds offsets, or else we would end up using an undefined 30295ffd83dbSDimitry Andric // register. 30305ffd83dbSDimitry Andric if (static_cast<unsigned>(Offset) >= SubRegs.size()) 3031bdd1243dSDimitry Andric return std::pair(IdxReg, SubRegs[0]); 3032bdd1243dSDimitry Andric return std::pair(IdxBaseReg, SubRegs[Offset]); 30335ffd83dbSDimitry Andric } 30345ffd83dbSDimitry Andric 3035480093f4SDimitry Andric bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT( 3036480093f4SDimitry Andric MachineInstr &MI) const { 3037480093f4SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 3038480093f4SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 3039480093f4SDimitry Andric Register IdxReg = MI.getOperand(2).getReg(); 3040480093f4SDimitry Andric 3041480093f4SDimitry Andric LLT DstTy = MRI->getType(DstReg); 3042480093f4SDimitry Andric LLT SrcTy = MRI->getType(SrcReg); 3043480093f4SDimitry Andric 3044480093f4SDimitry Andric const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 3045480093f4SDimitry Andric const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI); 3046480093f4SDimitry Andric const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI); 3047480093f4SDimitry Andric 3048480093f4SDimitry Andric // The index must be scalar. If it wasn't RegBankSelect should have moved this 3049480093f4SDimitry Andric // into a waterfall loop. 3050480093f4SDimitry Andric if (IdxRB->getID() != AMDGPU::SGPRRegBankID) 3051480093f4SDimitry Andric return false; 3052480093f4SDimitry Andric 305381ad6265SDimitry Andric const TargetRegisterClass *SrcRC = 305481ad6265SDimitry Andric TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB); 305581ad6265SDimitry Andric const TargetRegisterClass *DstRC = 305681ad6265SDimitry Andric TRI.getRegClassForTypeOnBank(DstTy, *DstRB); 30575ffd83dbSDimitry Andric if (!SrcRC || !DstRC) 30585ffd83dbSDimitry Andric return false; 3059480093f4SDimitry Andric if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) || 3060480093f4SDimitry Andric !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) || 3061480093f4SDimitry Andric !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI)) 3062480093f4SDimitry Andric return false; 3063480093f4SDimitry Andric 3064480093f4SDimitry Andric MachineBasicBlock *BB = MI.getParent(); 3065480093f4SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 3066480093f4SDimitry Andric const bool Is64 = DstTy.getSizeInBits() == 64; 3067480093f4SDimitry Andric 30685ffd83dbSDimitry Andric unsigned SubReg; 3069bdd1243dSDimitry Andric std::tie(IdxReg, SubReg) = computeIndirectRegIndex( 307006c3fb27SDimitry Andric *MRI, TRI, SrcRC, IdxReg, DstTy.getSizeInBits() / 8, *KB); 3071480093f4SDimitry Andric 3072480093f4SDimitry Andric if (SrcRB->getID() == AMDGPU::SGPRRegBankID) { 3073480093f4SDimitry Andric if (DstTy.getSizeInBits() != 32 && !Is64) 3074480093f4SDimitry Andric return false; 3075480093f4SDimitry Andric 3076480093f4SDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) 3077480093f4SDimitry Andric .addReg(IdxReg); 3078480093f4SDimitry Andric 3079480093f4SDimitry Andric unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32; 3080480093f4SDimitry Andric BuildMI(*BB, &MI, DL, TII.get(Opc), DstReg) 3081480093f4SDimitry Andric .addReg(SrcReg, 0, SubReg) 3082480093f4SDimitry Andric .addReg(SrcReg, RegState::Implicit); 3083480093f4SDimitry Andric MI.eraseFromParent(); 3084480093f4SDimitry Andric return true; 3085480093f4SDimitry Andric } 3086480093f4SDimitry Andric 3087480093f4SDimitry Andric if (SrcRB->getID() != AMDGPU::VGPRRegBankID || DstTy.getSizeInBits() != 32) 3088480093f4SDimitry Andric return false; 3089480093f4SDimitry Andric 3090480093f4SDimitry Andric if (!STI.useVGPRIndexMode()) { 3091480093f4SDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) 3092480093f4SDimitry Andric .addReg(IdxReg); 3093480093f4SDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg) 3094e8d8bef9SDimitry Andric .addReg(SrcReg, 0, SubReg) 3095480093f4SDimitry Andric .addReg(SrcReg, RegState::Implicit); 3096480093f4SDimitry Andric MI.eraseFromParent(); 3097480093f4SDimitry Andric return true; 3098480093f4SDimitry Andric } 3099480093f4SDimitry Andric 3100e8d8bef9SDimitry Andric const MCInstrDesc &GPRIDXDesc = 3101e8d8bef9SDimitry Andric TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC), true); 3102e8d8bef9SDimitry Andric BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg) 3103e8d8bef9SDimitry Andric .addReg(SrcReg) 3104480093f4SDimitry Andric .addReg(IdxReg) 3105e8d8bef9SDimitry Andric .addImm(SubReg); 3106480093f4SDimitry Andric 3107480093f4SDimitry Andric MI.eraseFromParent(); 3108480093f4SDimitry Andric return true; 3109480093f4SDimitry Andric } 3110480093f4SDimitry Andric 31115ffd83dbSDimitry Andric // TODO: Fold insert_vector_elt (extract_vector_elt) into movrelsd 31125ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT( 31135ffd83dbSDimitry Andric MachineInstr &MI) const { 31145ffd83dbSDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 31155ffd83dbSDimitry Andric Register VecReg = MI.getOperand(1).getReg(); 31165ffd83dbSDimitry Andric Register ValReg = MI.getOperand(2).getReg(); 31175ffd83dbSDimitry Andric Register IdxReg = MI.getOperand(3).getReg(); 31185ffd83dbSDimitry Andric 31195ffd83dbSDimitry Andric LLT VecTy = MRI->getType(DstReg); 31205ffd83dbSDimitry Andric LLT ValTy = MRI->getType(ValReg); 31215ffd83dbSDimitry Andric unsigned VecSize = VecTy.getSizeInBits(); 31225ffd83dbSDimitry Andric unsigned ValSize = ValTy.getSizeInBits(); 31235ffd83dbSDimitry Andric 31245ffd83dbSDimitry Andric const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI); 31255ffd83dbSDimitry Andric const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI); 31265ffd83dbSDimitry Andric const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI); 31275ffd83dbSDimitry Andric 31285ffd83dbSDimitry Andric assert(VecTy.getElementType() == ValTy); 31295ffd83dbSDimitry Andric 31305ffd83dbSDimitry Andric // The index must be scalar. If it wasn't RegBankSelect should have moved this 31315ffd83dbSDimitry Andric // into a waterfall loop. 31325ffd83dbSDimitry Andric if (IdxRB->getID() != AMDGPU::SGPRRegBankID) 31335ffd83dbSDimitry Andric return false; 31345ffd83dbSDimitry Andric 313581ad6265SDimitry Andric const TargetRegisterClass *VecRC = 313681ad6265SDimitry Andric TRI.getRegClassForTypeOnBank(VecTy, *VecRB); 313781ad6265SDimitry Andric const TargetRegisterClass *ValRC = 313881ad6265SDimitry Andric TRI.getRegClassForTypeOnBank(ValTy, *ValRB); 31395ffd83dbSDimitry Andric 31405ffd83dbSDimitry Andric if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) || 31415ffd83dbSDimitry Andric !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) || 31425ffd83dbSDimitry Andric !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) || 31435ffd83dbSDimitry Andric !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI)) 31445ffd83dbSDimitry Andric return false; 31455ffd83dbSDimitry Andric 31465ffd83dbSDimitry Andric if (VecRB->getID() == AMDGPU::VGPRRegBankID && ValSize != 32) 31475ffd83dbSDimitry Andric return false; 31485ffd83dbSDimitry Andric 31495ffd83dbSDimitry Andric unsigned SubReg; 315006c3fb27SDimitry Andric std::tie(IdxReg, SubReg) = 315106c3fb27SDimitry Andric computeIndirectRegIndex(*MRI, TRI, VecRC, IdxReg, ValSize / 8, *KB); 31525ffd83dbSDimitry Andric 31535ffd83dbSDimitry Andric const bool IndexMode = VecRB->getID() == AMDGPU::VGPRRegBankID && 31545ffd83dbSDimitry Andric STI.useVGPRIndexMode(); 31555ffd83dbSDimitry Andric 31565ffd83dbSDimitry Andric MachineBasicBlock *BB = MI.getParent(); 31575ffd83dbSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 31585ffd83dbSDimitry Andric 3159e8d8bef9SDimitry Andric if (!IndexMode) { 31605ffd83dbSDimitry Andric BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) 31615ffd83dbSDimitry Andric .addReg(IdxReg); 31625ffd83dbSDimitry Andric 3163e8d8bef9SDimitry Andric const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo( 3164e8d8bef9SDimitry Andric VecSize, ValSize, VecRB->getID() == AMDGPU::SGPRRegBankID); 31655ffd83dbSDimitry Andric BuildMI(*BB, MI, DL, RegWriteOp, DstReg) 31665ffd83dbSDimitry Andric .addReg(VecReg) 31675ffd83dbSDimitry Andric .addReg(ValReg) 31685ffd83dbSDimitry Andric .addImm(SubReg); 3169e8d8bef9SDimitry Andric MI.eraseFromParent(); 3170e8d8bef9SDimitry Andric return true; 3171e8d8bef9SDimitry Andric } 31725ffd83dbSDimitry Andric 3173e8d8bef9SDimitry Andric const MCInstrDesc &GPRIDXDesc = 3174e8d8bef9SDimitry Andric TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false); 3175e8d8bef9SDimitry Andric BuildMI(*BB, MI, DL, GPRIDXDesc, DstReg) 3176e8d8bef9SDimitry Andric .addReg(VecReg) 3177e8d8bef9SDimitry Andric .addReg(ValReg) 3178e8d8bef9SDimitry Andric .addReg(IdxReg) 3179e8d8bef9SDimitry Andric .addImm(SubReg); 31805ffd83dbSDimitry Andric 31815ffd83dbSDimitry Andric MI.eraseFromParent(); 31825ffd83dbSDimitry Andric return true; 31835ffd83dbSDimitry Andric } 31845ffd83dbSDimitry Andric 318581ad6265SDimitry Andric bool AMDGPUInstructionSelector::selectBufferLoadLds(MachineInstr &MI) const { 31865f757f3fSDimitry Andric assert(!AMDGPU::isGFX12Plus(STI)); 318781ad6265SDimitry Andric unsigned Opc; 318881ad6265SDimitry Andric unsigned Size = MI.getOperand(3).getImm(); 318981ad6265SDimitry Andric 319081ad6265SDimitry Andric // The struct intrinsic variants add one additional operand over raw. 319181ad6265SDimitry Andric const bool HasVIndex = MI.getNumOperands() == 9; 319281ad6265SDimitry Andric Register VIndex; 319381ad6265SDimitry Andric int OpOffset = 0; 319481ad6265SDimitry Andric if (HasVIndex) { 319581ad6265SDimitry Andric VIndex = MI.getOperand(4).getReg(); 319681ad6265SDimitry Andric OpOffset = 1; 319781ad6265SDimitry Andric } 319881ad6265SDimitry Andric 319981ad6265SDimitry Andric Register VOffset = MI.getOperand(4 + OpOffset).getReg(); 3200bdd1243dSDimitry Andric std::optional<ValueAndVReg> MaybeVOffset = 320181ad6265SDimitry Andric getIConstantVRegValWithLookThrough(VOffset, *MRI); 320281ad6265SDimitry Andric const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue(); 320381ad6265SDimitry Andric 320481ad6265SDimitry Andric switch (Size) { 320581ad6265SDimitry Andric default: 320681ad6265SDimitry Andric return false; 320781ad6265SDimitry Andric case 1: 320881ad6265SDimitry Andric Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN 320981ad6265SDimitry Andric : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN 321081ad6265SDimitry Andric : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN 321181ad6265SDimitry Andric : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET; 321281ad6265SDimitry Andric break; 321381ad6265SDimitry Andric case 2: 321481ad6265SDimitry Andric Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN 321581ad6265SDimitry Andric : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN 321681ad6265SDimitry Andric : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN 321781ad6265SDimitry Andric : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET; 321881ad6265SDimitry Andric break; 321981ad6265SDimitry Andric case 4: 322081ad6265SDimitry Andric Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN 322181ad6265SDimitry Andric : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN 322281ad6265SDimitry Andric : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN 322381ad6265SDimitry Andric : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET; 322481ad6265SDimitry Andric break; 322581ad6265SDimitry Andric } 322681ad6265SDimitry Andric 322781ad6265SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 322881ad6265SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 322981ad6265SDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) 323081ad6265SDimitry Andric .add(MI.getOperand(2)); 323181ad6265SDimitry Andric 323281ad6265SDimitry Andric auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc)); 323381ad6265SDimitry Andric 323481ad6265SDimitry Andric if (HasVIndex && HasVOffset) { 323581ad6265SDimitry Andric Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class()); 323681ad6265SDimitry Andric BuildMI(*MBB, &*MIB, DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg) 323781ad6265SDimitry Andric .addReg(VIndex) 323881ad6265SDimitry Andric .addImm(AMDGPU::sub0) 323981ad6265SDimitry Andric .addReg(VOffset) 324081ad6265SDimitry Andric .addImm(AMDGPU::sub1); 324181ad6265SDimitry Andric 324281ad6265SDimitry Andric MIB.addReg(IdxReg); 324381ad6265SDimitry Andric } else if (HasVIndex) { 324481ad6265SDimitry Andric MIB.addReg(VIndex); 324581ad6265SDimitry Andric } else if (HasVOffset) { 324681ad6265SDimitry Andric MIB.addReg(VOffset); 324781ad6265SDimitry Andric } 324881ad6265SDimitry Andric 324981ad6265SDimitry Andric MIB.add(MI.getOperand(1)); // rsrc 325081ad6265SDimitry Andric MIB.add(MI.getOperand(5 + OpOffset)); // soffset 325181ad6265SDimitry Andric MIB.add(MI.getOperand(6 + OpOffset)); // imm offset 325281ad6265SDimitry Andric unsigned Aux = MI.getOperand(7 + OpOffset).getImm(); 325381ad6265SDimitry Andric MIB.addImm(Aux & AMDGPU::CPol::ALL); // cpol 32545f757f3fSDimitry Andric MIB.addImm(Aux & AMDGPU::CPol::SWZ_pregfx12 ? 1 : 0); // swz 325581ad6265SDimitry Andric 325681ad6265SDimitry Andric MachineMemOperand *LoadMMO = *MI.memoperands_begin(); 325781ad6265SDimitry Andric MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo(); 325881ad6265SDimitry Andric LoadPtrI.Offset = MI.getOperand(6 + OpOffset).getImm(); 325981ad6265SDimitry Andric MachinePointerInfo StorePtrI = LoadPtrI; 326081ad6265SDimitry Andric StorePtrI.V = nullptr; 326181ad6265SDimitry Andric StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS; 326281ad6265SDimitry Andric 326381ad6265SDimitry Andric auto F = LoadMMO->getFlags() & 326481ad6265SDimitry Andric ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad); 326581ad6265SDimitry Andric LoadMMO = MF->getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad, 326681ad6265SDimitry Andric Size, LoadMMO->getBaseAlign()); 326781ad6265SDimitry Andric 326881ad6265SDimitry Andric MachineMemOperand *StoreMMO = 326981ad6265SDimitry Andric MF->getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore, 327081ad6265SDimitry Andric sizeof(int32_t), LoadMMO->getBaseAlign()); 327181ad6265SDimitry Andric 327281ad6265SDimitry Andric MIB.setMemRefs({LoadMMO, StoreMMO}); 327381ad6265SDimitry Andric 327481ad6265SDimitry Andric MI.eraseFromParent(); 327581ad6265SDimitry Andric return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 327681ad6265SDimitry Andric } 327781ad6265SDimitry Andric 327881ad6265SDimitry Andric /// Match a zero extend from a 32-bit value to 64-bits. 327981ad6265SDimitry Andric static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { 328081ad6265SDimitry Andric Register ZExtSrc; 328181ad6265SDimitry Andric if (mi_match(Reg, MRI, m_GZExt(m_Reg(ZExtSrc)))) 328281ad6265SDimitry Andric return MRI.getType(ZExtSrc) == LLT::scalar(32) ? ZExtSrc : Register(); 328381ad6265SDimitry Andric 328481ad6265SDimitry Andric // Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0) 328581ad6265SDimitry Andric const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); 328681ad6265SDimitry Andric if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES) 3287753f127fSDimitry Andric return Register(); 328881ad6265SDimitry Andric 3289fcaf7f86SDimitry Andric assert(Def->getNumOperands() == 3 && 3290fcaf7f86SDimitry Andric MRI.getType(Def->getOperand(0).getReg()) == LLT::scalar(64)); 329181ad6265SDimitry Andric if (mi_match(Def->getOperand(2).getReg(), MRI, m_ZeroInt())) { 329281ad6265SDimitry Andric return Def->getOperand(1).getReg(); 329381ad6265SDimitry Andric } 329481ad6265SDimitry Andric 329581ad6265SDimitry Andric return Register(); 329681ad6265SDimitry Andric } 329781ad6265SDimitry Andric 329881ad6265SDimitry Andric bool AMDGPUInstructionSelector::selectGlobalLoadLds(MachineInstr &MI) const{ 329981ad6265SDimitry Andric unsigned Opc; 330081ad6265SDimitry Andric unsigned Size = MI.getOperand(3).getImm(); 330181ad6265SDimitry Andric 330281ad6265SDimitry Andric switch (Size) { 330381ad6265SDimitry Andric default: 330481ad6265SDimitry Andric return false; 330581ad6265SDimitry Andric case 1: 330681ad6265SDimitry Andric Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE; 330781ad6265SDimitry Andric break; 330881ad6265SDimitry Andric case 2: 330981ad6265SDimitry Andric Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT; 331081ad6265SDimitry Andric break; 331181ad6265SDimitry Andric case 4: 331281ad6265SDimitry Andric Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD; 331381ad6265SDimitry Andric break; 331481ad6265SDimitry Andric } 331581ad6265SDimitry Andric 331681ad6265SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 331781ad6265SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 331881ad6265SDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) 331981ad6265SDimitry Andric .add(MI.getOperand(2)); 332081ad6265SDimitry Andric 332181ad6265SDimitry Andric Register Addr = MI.getOperand(1).getReg(); 332281ad6265SDimitry Andric Register VOffset; 332381ad6265SDimitry Andric // Try to split SAddr and VOffset. Global and LDS pointers share the same 332481ad6265SDimitry Andric // immediate offset, so we cannot use a regular SelectGlobalSAddr(). 332581ad6265SDimitry Andric if (!isSGPR(Addr)) { 332681ad6265SDimitry Andric auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI); 332781ad6265SDimitry Andric if (isSGPR(AddrDef->Reg)) { 332881ad6265SDimitry Andric Addr = AddrDef->Reg; 332981ad6265SDimitry Andric } else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) { 333081ad6265SDimitry Andric Register SAddr = 333181ad6265SDimitry Andric getSrcRegIgnoringCopies(AddrDef->MI->getOperand(1).getReg(), *MRI); 3332bdd1243dSDimitry Andric if (isSGPR(SAddr)) { 333381ad6265SDimitry Andric Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg(); 333481ad6265SDimitry Andric if (Register Off = matchZeroExtendFromS32(*MRI, PtrBaseOffset)) { 333581ad6265SDimitry Andric Addr = SAddr; 333681ad6265SDimitry Andric VOffset = Off; 333781ad6265SDimitry Andric } 333881ad6265SDimitry Andric } 333981ad6265SDimitry Andric } 334081ad6265SDimitry Andric } 334181ad6265SDimitry Andric 334281ad6265SDimitry Andric if (isSGPR(Addr)) { 334381ad6265SDimitry Andric Opc = AMDGPU::getGlobalSaddrOp(Opc); 334481ad6265SDimitry Andric if (!VOffset) { 334581ad6265SDimitry Andric VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); 334681ad6265SDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_MOV_B32_e32), VOffset) 334781ad6265SDimitry Andric .addImm(0); 334881ad6265SDimitry Andric } 334981ad6265SDimitry Andric } 335081ad6265SDimitry Andric 335181ad6265SDimitry Andric auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc)) 335281ad6265SDimitry Andric .addReg(Addr); 335381ad6265SDimitry Andric 335481ad6265SDimitry Andric if (isSGPR(Addr)) 335581ad6265SDimitry Andric MIB.addReg(VOffset); 335681ad6265SDimitry Andric 335781ad6265SDimitry Andric MIB.add(MI.getOperand(4)) // offset 335881ad6265SDimitry Andric .add(MI.getOperand(5)); // cpol 335981ad6265SDimitry Andric 336081ad6265SDimitry Andric MachineMemOperand *LoadMMO = *MI.memoperands_begin(); 336181ad6265SDimitry Andric MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo(); 336281ad6265SDimitry Andric LoadPtrI.Offset = MI.getOperand(4).getImm(); 336381ad6265SDimitry Andric MachinePointerInfo StorePtrI = LoadPtrI; 336481ad6265SDimitry Andric LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS; 336581ad6265SDimitry Andric StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS; 336681ad6265SDimitry Andric auto F = LoadMMO->getFlags() & 336781ad6265SDimitry Andric ~(MachineMemOperand::MOStore | MachineMemOperand::MOLoad); 336881ad6265SDimitry Andric LoadMMO = MF->getMachineMemOperand(LoadPtrI, F | MachineMemOperand::MOLoad, 336981ad6265SDimitry Andric Size, LoadMMO->getBaseAlign()); 337081ad6265SDimitry Andric MachineMemOperand *StoreMMO = 337181ad6265SDimitry Andric MF->getMachineMemOperand(StorePtrI, F | MachineMemOperand::MOStore, 337281ad6265SDimitry Andric sizeof(int32_t), Align(4)); 337381ad6265SDimitry Andric 337481ad6265SDimitry Andric MIB.setMemRefs({LoadMMO, StoreMMO}); 337581ad6265SDimitry Andric 337681ad6265SDimitry Andric MI.eraseFromParent(); 337781ad6265SDimitry Andric return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 337881ad6265SDimitry Andric } 337981ad6265SDimitry Andric 3380e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::selectBVHIntrinsic(MachineInstr &MI) const{ 3381e8d8bef9SDimitry Andric MI.setDesc(TII.get(MI.getOperand(1).getImm())); 338281ad6265SDimitry Andric MI.removeOperand(1); 338381ad6265SDimitry Andric MI.addImplicitDefUseOperands(*MI.getParent()->getParent()); 338481ad6265SDimitry Andric return true; 338581ad6265SDimitry Andric } 338681ad6265SDimitry Andric 338781ad6265SDimitry Andric bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const { 338881ad6265SDimitry Andric unsigned Opc; 33895f757f3fSDimitry Andric switch (cast<GIntrinsic>(MI).getIntrinsicID()) { 339081ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16: 339181ad6265SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64; 339281ad6265SDimitry Andric break; 339381ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16: 339481ad6265SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64; 339581ad6265SDimitry Andric break; 339681ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16: 339781ad6265SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64; 339881ad6265SDimitry Andric break; 339981ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16: 340081ad6265SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64; 340181ad6265SDimitry Andric break; 340281ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8: 340381ad6265SDimitry Andric Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64; 340481ad6265SDimitry Andric break; 340581ad6265SDimitry Andric case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8: 340681ad6265SDimitry Andric Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64; 340781ad6265SDimitry Andric break; 3408fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8: 3409fcaf7f86SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64; 3410fcaf7f86SDimitry Andric break; 3411fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8: 3412fcaf7f86SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64; 3413fcaf7f86SDimitry Andric break; 3414fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8: 3415fcaf7f86SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64; 3416fcaf7f86SDimitry Andric break; 3417fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8: 3418fcaf7f86SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64; 3419fcaf7f86SDimitry Andric break; 3420fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8: 3421fcaf7f86SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64; 3422fcaf7f86SDimitry Andric break; 3423fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8: 3424fcaf7f86SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64; 3425fcaf7f86SDimitry Andric break; 3426fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8: 3427fcaf7f86SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64; 3428fcaf7f86SDimitry Andric break; 3429fcaf7f86SDimitry Andric case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: 3430fcaf7f86SDimitry Andric Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64; 3431fcaf7f86SDimitry Andric break; 343281ad6265SDimitry Andric default: 343381ad6265SDimitry Andric llvm_unreachable("unhandled smfmac intrinsic"); 343481ad6265SDimitry Andric } 343581ad6265SDimitry Andric 343681ad6265SDimitry Andric auto VDst_In = MI.getOperand(4); 343781ad6265SDimitry Andric 343881ad6265SDimitry Andric MI.setDesc(TII.get(Opc)); 343981ad6265SDimitry Andric MI.removeOperand(4); // VDst_In 344081ad6265SDimitry Andric MI.removeOperand(1); // Intrinsic ID 344181ad6265SDimitry Andric MI.addOperand(VDst_In); // Readd VDst_In to the end 3442e8d8bef9SDimitry Andric MI.addImplicitDefUseOperands(*MI.getParent()->getParent()); 3443e8d8bef9SDimitry Andric return true; 3444e8d8bef9SDimitry Andric } 3445e8d8bef9SDimitry Andric 344604eeddc0SDimitry Andric bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const { 344704eeddc0SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 344804eeddc0SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 344904eeddc0SDimitry Andric const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); 345004eeddc0SDimitry Andric const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID; 345104eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 345204eeddc0SDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 345304eeddc0SDimitry Andric 345404eeddc0SDimitry Andric if (IsVALU) { 345504eeddc0SDimitry Andric BuildMI(*MBB, MI, DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg) 345604eeddc0SDimitry Andric .addImm(Subtarget->getWavefrontSizeLog2()) 345704eeddc0SDimitry Andric .addReg(SrcReg); 345804eeddc0SDimitry Andric } else { 345904eeddc0SDimitry Andric BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), DstReg) 346004eeddc0SDimitry Andric .addReg(SrcReg) 34615f757f3fSDimitry Andric .addImm(Subtarget->getWavefrontSizeLog2()) 34625f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 346304eeddc0SDimitry Andric } 346404eeddc0SDimitry Andric 346504eeddc0SDimitry Andric const TargetRegisterClass &RC = 346604eeddc0SDimitry Andric IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass; 346704eeddc0SDimitry Andric if (!RBI.constrainGenericRegister(DstReg, RC, *MRI)) 346804eeddc0SDimitry Andric return false; 346904eeddc0SDimitry Andric 347004eeddc0SDimitry Andric MI.eraseFromParent(); 347104eeddc0SDimitry Andric return true; 347204eeddc0SDimitry Andric } 347304eeddc0SDimitry Andric 34745f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const { 34755f757f3fSDimitry Andric Register SrcReg = MI.getOperand(0).getReg(); 34765f757f3fSDimitry Andric if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI)) 34775f757f3fSDimitry Andric return false; 34785f757f3fSDimitry Andric 34795f757f3fSDimitry Andric MachineInstr *DefMI = MRI->getVRegDef(SrcReg); 34805f757f3fSDimitry Andric Register SP = 34815f757f3fSDimitry Andric Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore(); 34825f757f3fSDimitry Andric Register WaveAddr = getWaveAddress(DefMI); 34835f757f3fSDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 34845f757f3fSDimitry Andric const DebugLoc &DL = MI.getDebugLoc(); 34855f757f3fSDimitry Andric 34865f757f3fSDimitry Andric if (!WaveAddr) { 34875f757f3fSDimitry Andric WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 34885f757f3fSDimitry Andric BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), WaveAddr) 34895f757f3fSDimitry Andric .addReg(SrcReg) 34905f757f3fSDimitry Andric .addImm(Subtarget->getWavefrontSizeLog2()) 34915f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 34925f757f3fSDimitry Andric } 34935f757f3fSDimitry Andric 34945f757f3fSDimitry Andric BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), SP) 34955f757f3fSDimitry Andric .addReg(WaveAddr); 34965f757f3fSDimitry Andric 34975f757f3fSDimitry Andric MI.eraseFromParent(); 34985f757f3fSDimitry Andric return true; 34995f757f3fSDimitry Andric } 35005f757f3fSDimitry Andric 35018bcb0991SDimitry Andric bool AMDGPUInstructionSelector::select(MachineInstr &I) { 35020b57cec5SDimitry Andric 35038bcb0991SDimitry Andric if (!I.isPreISelOpcode()) { 35040b57cec5SDimitry Andric if (I.isCopy()) 35050b57cec5SDimitry Andric return selectCOPY(I); 35060b57cec5SDimitry Andric return true; 35070b57cec5SDimitry Andric } 35080b57cec5SDimitry Andric 35090b57cec5SDimitry Andric switch (I.getOpcode()) { 35100b57cec5SDimitry Andric case TargetOpcode::G_AND: 35110b57cec5SDimitry Andric case TargetOpcode::G_OR: 35120b57cec5SDimitry Andric case TargetOpcode::G_XOR: 35135ffd83dbSDimitry Andric if (selectImpl(I, *CoverageInfo)) 35140b57cec5SDimitry Andric return true; 35155ffd83dbSDimitry Andric return selectG_AND_OR_XOR(I); 35160b57cec5SDimitry Andric case TargetOpcode::G_ADD: 35170b57cec5SDimitry Andric case TargetOpcode::G_SUB: 3518*0fca6ea1SDimitry Andric case TargetOpcode::G_PTR_ADD: 35198bcb0991SDimitry Andric if (selectImpl(I, *CoverageInfo)) 35200b57cec5SDimitry Andric return true; 35218bcb0991SDimitry Andric return selectG_ADD_SUB(I); 35228bcb0991SDimitry Andric case TargetOpcode::G_UADDO: 35238bcb0991SDimitry Andric case TargetOpcode::G_USUBO: 3524480093f4SDimitry Andric case TargetOpcode::G_UADDE: 3525480093f4SDimitry Andric case TargetOpcode::G_USUBE: 3526480093f4SDimitry Andric return selectG_UADDO_USUBO_UADDE_USUBE(I); 352781ad6265SDimitry Andric case AMDGPU::G_AMDGPU_MAD_U64_U32: 352881ad6265SDimitry Andric case AMDGPU::G_AMDGPU_MAD_I64_I32: 352981ad6265SDimitry Andric return selectG_AMDGPU_MAD_64_32(I); 35300b57cec5SDimitry Andric case TargetOpcode::G_INTTOPTR: 35310b57cec5SDimitry Andric case TargetOpcode::G_BITCAST: 35328bcb0991SDimitry Andric case TargetOpcode::G_PTRTOINT: 3533*0fca6ea1SDimitry Andric case TargetOpcode::G_FREEZE: 35340b57cec5SDimitry Andric return selectCOPY(I); 35350b57cec5SDimitry Andric case TargetOpcode::G_CONSTANT: 35360b57cec5SDimitry Andric case TargetOpcode::G_FCONSTANT: 35370b57cec5SDimitry Andric return selectG_CONSTANT(I); 35385ffd83dbSDimitry Andric case TargetOpcode::G_FNEG: 35395ffd83dbSDimitry Andric if (selectImpl(I, *CoverageInfo)) 35405ffd83dbSDimitry Andric return true; 35415ffd83dbSDimitry Andric return selectG_FNEG(I); 35425ffd83dbSDimitry Andric case TargetOpcode::G_FABS: 35435ffd83dbSDimitry Andric if (selectImpl(I, *CoverageInfo)) 35445ffd83dbSDimitry Andric return true; 35455ffd83dbSDimitry Andric return selectG_FABS(I); 35460b57cec5SDimitry Andric case TargetOpcode::G_EXTRACT: 35470b57cec5SDimitry Andric return selectG_EXTRACT(I); 35480b57cec5SDimitry Andric case TargetOpcode::G_MERGE_VALUES: 35490b57cec5SDimitry Andric case TargetOpcode::G_CONCAT_VECTORS: 35500b57cec5SDimitry Andric return selectG_MERGE_VALUES(I); 35510b57cec5SDimitry Andric case TargetOpcode::G_UNMERGE_VALUES: 35520b57cec5SDimitry Andric return selectG_UNMERGE_VALUES(I); 3553bdd1243dSDimitry Andric case TargetOpcode::G_BUILD_VECTOR: 35545ffd83dbSDimitry Andric case TargetOpcode::G_BUILD_VECTOR_TRUNC: 3555bdd1243dSDimitry Andric return selectG_BUILD_VECTOR(I); 35560b57cec5SDimitry Andric case TargetOpcode::G_IMPLICIT_DEF: 35570b57cec5SDimitry Andric return selectG_IMPLICIT_DEF(I); 35580b57cec5SDimitry Andric case TargetOpcode::G_INSERT: 35590b57cec5SDimitry Andric return selectG_INSERT(I); 35600b57cec5SDimitry Andric case TargetOpcode::G_INTRINSIC: 35615f757f3fSDimitry Andric case TargetOpcode::G_INTRINSIC_CONVERGENT: 35628bcb0991SDimitry Andric return selectG_INTRINSIC(I); 35630b57cec5SDimitry Andric case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 35645f757f3fSDimitry Andric case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: 35658bcb0991SDimitry Andric return selectG_INTRINSIC_W_SIDE_EFFECTS(I); 35660b57cec5SDimitry Andric case TargetOpcode::G_ICMP: 35675f757f3fSDimitry Andric case TargetOpcode::G_FCMP: 35685f757f3fSDimitry Andric if (selectG_ICMP_or_FCMP(I)) 35690b57cec5SDimitry Andric return true; 35708bcb0991SDimitry Andric return selectImpl(I, *CoverageInfo); 35710b57cec5SDimitry Andric case TargetOpcode::G_LOAD: 3572e8d8bef9SDimitry Andric case TargetOpcode::G_STORE: 35738bcb0991SDimitry Andric case TargetOpcode::G_ATOMIC_CMPXCHG: 35748bcb0991SDimitry Andric case TargetOpcode::G_ATOMICRMW_XCHG: 35758bcb0991SDimitry Andric case TargetOpcode::G_ATOMICRMW_ADD: 35768bcb0991SDimitry Andric case TargetOpcode::G_ATOMICRMW_SUB: 35778bcb0991SDimitry Andric case TargetOpcode::G_ATOMICRMW_AND: 35788bcb0991SDimitry Andric case TargetOpcode::G_ATOMICRMW_OR: 35798bcb0991SDimitry Andric case TargetOpcode::G_ATOMICRMW_XOR: 35808bcb0991SDimitry Andric case TargetOpcode::G_ATOMICRMW_MIN: 35818bcb0991SDimitry Andric case TargetOpcode::G_ATOMICRMW_MAX: 35828bcb0991SDimitry Andric case TargetOpcode::G_ATOMICRMW_UMIN: 35838bcb0991SDimitry Andric case TargetOpcode::G_ATOMICRMW_UMAX: 358406c3fb27SDimitry Andric case TargetOpcode::G_ATOMICRMW_UINC_WRAP: 358506c3fb27SDimitry Andric case TargetOpcode::G_ATOMICRMW_UDEC_WRAP: 35868bcb0991SDimitry Andric case TargetOpcode::G_ATOMICRMW_FADD: 3587*0fca6ea1SDimitry Andric case TargetOpcode::G_ATOMICRMW_FMIN: 3588*0fca6ea1SDimitry Andric case TargetOpcode::G_ATOMICRMW_FMAX: 3589e8d8bef9SDimitry Andric return selectG_LOAD_STORE_ATOMICRMW(I); 35900b57cec5SDimitry Andric case TargetOpcode::G_SELECT: 35910b57cec5SDimitry Andric return selectG_SELECT(I); 35920b57cec5SDimitry Andric case TargetOpcode::G_TRUNC: 35930b57cec5SDimitry Andric return selectG_TRUNC(I); 35940b57cec5SDimitry Andric case TargetOpcode::G_SEXT: 35950b57cec5SDimitry Andric case TargetOpcode::G_ZEXT: 35960b57cec5SDimitry Andric case TargetOpcode::G_ANYEXT: 35975ffd83dbSDimitry Andric case TargetOpcode::G_SEXT_INREG: 359806c3fb27SDimitry Andric // This is a workaround. For extension from type i1, `selectImpl()` uses 359906c3fb27SDimitry Andric // patterns from TD file and generates an illegal VGPR to SGPR COPY as type 360006c3fb27SDimitry Andric // i1 can only be hold in a SGPR class. 360106c3fb27SDimitry Andric if (MRI->getType(I.getOperand(1).getReg()) != LLT::scalar(1) && 360206c3fb27SDimitry Andric selectImpl(I, *CoverageInfo)) 3603480093f4SDimitry Andric return true; 36048bcb0991SDimitry Andric return selectG_SZA_EXT(I); 36055f757f3fSDimitry Andric case TargetOpcode::G_FPEXT: 36065f757f3fSDimitry Andric if (selectG_FPEXT(I)) 36075f757f3fSDimitry Andric return true; 36085f757f3fSDimitry Andric return selectImpl(I, *CoverageInfo); 36090b57cec5SDimitry Andric case TargetOpcode::G_BRCOND: 36100b57cec5SDimitry Andric return selectG_BRCOND(I); 36115ffd83dbSDimitry Andric case TargetOpcode::G_GLOBAL_VALUE: 3612e8d8bef9SDimitry Andric return selectG_GLOBAL_VALUE(I); 36135ffd83dbSDimitry Andric case TargetOpcode::G_PTRMASK: 36145ffd83dbSDimitry Andric return selectG_PTRMASK(I); 3615480093f4SDimitry Andric case TargetOpcode::G_EXTRACT_VECTOR_ELT: 3616480093f4SDimitry Andric return selectG_EXTRACT_VECTOR_ELT(I); 36175ffd83dbSDimitry Andric case TargetOpcode::G_INSERT_VECTOR_ELT: 36185ffd83dbSDimitry Andric return selectG_INSERT_VECTOR_ELT(I); 36195ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD: 362004eeddc0SDimitry Andric case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16: 3621*0fca6ea1SDimitry Andric case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET: 362204eeddc0SDimitry Andric case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE: 362304eeddc0SDimitry Andric case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: { 36245f757f3fSDimitry Andric const AMDGPU::ImageDimIntrinsicInfo *Intr = 36255f757f3fSDimitry Andric AMDGPU::getImageDimIntrinsicInfo(AMDGPU::getIntrinsicID(I)); 36265ffd83dbSDimitry Andric assert(Intr && "not an image intrinsic with image pseudo"); 36275ffd83dbSDimitry Andric return selectImageIntrinsic(I, Intr); 36285ffd83dbSDimitry Andric } 3629e8d8bef9SDimitry Andric case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: 3630e8d8bef9SDimitry Andric return selectBVHIntrinsic(I); 3631fe6060f1SDimitry Andric case AMDGPU::G_SBFX: 3632fe6060f1SDimitry Andric case AMDGPU::G_UBFX: 3633fe6060f1SDimitry Andric return selectG_SBFX_UBFX(I); 3634349cc55cSDimitry Andric case AMDGPU::G_SI_CALL: 3635349cc55cSDimitry Andric I.setDesc(TII.get(AMDGPU::SI_CALL)); 3636349cc55cSDimitry Andric return true; 363704eeddc0SDimitry Andric case AMDGPU::G_AMDGPU_WAVE_ADDRESS: 363804eeddc0SDimitry Andric return selectWaveAddress(I); 36395f757f3fSDimitry Andric case AMDGPU::G_STACKRESTORE: 36405f757f3fSDimitry Andric return selectStackRestore(I); 3641*0fca6ea1SDimitry Andric case AMDGPU::G_PHI: 3642*0fca6ea1SDimitry Andric return selectPHI(I); 36438bcb0991SDimitry Andric default: 36448bcb0991SDimitry Andric return selectImpl(I, *CoverageInfo); 36450b57cec5SDimitry Andric } 36460b57cec5SDimitry Andric return false; 36470b57cec5SDimitry Andric } 36480b57cec5SDimitry Andric 36490b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns 36500b57cec5SDimitry Andric AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const { 36510b57cec5SDimitry Andric return {{ 36520b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 36530b57cec5SDimitry Andric }}; 36540b57cec5SDimitry Andric 36550b57cec5SDimitry Andric } 36560b57cec5SDimitry Andric 365706c3fb27SDimitry Andric std::pair<Register, unsigned> 365806c3fb27SDimitry Andric AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root, 365906c3fb27SDimitry Andric bool IsCanonicalizing, 366006c3fb27SDimitry Andric bool AllowAbs, bool OpSel) const { 36615ffd83dbSDimitry Andric Register Src = Root.getReg(); 36620b57cec5SDimitry Andric unsigned Mods = 0; 36635ffd83dbSDimitry Andric MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); 36640b57cec5SDimitry Andric 3665bdd1243dSDimitry Andric if (MI->getOpcode() == AMDGPU::G_FNEG) { 36660b57cec5SDimitry Andric Src = MI->getOperand(1).getReg(); 36670b57cec5SDimitry Andric Mods |= SISrcMods::NEG; 36685ffd83dbSDimitry Andric MI = getDefIgnoringCopies(Src, *MRI); 366906c3fb27SDimitry Andric } else if (MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) { 367006c3fb27SDimitry Andric // Fold fsub [+-]0 into fneg. This may not have folded depending on the 367106c3fb27SDimitry Andric // denormal mode, but we're implicitly canonicalizing in a source operand. 367206c3fb27SDimitry Andric const ConstantFP *LHS = 367306c3fb27SDimitry Andric getConstantFPVRegVal(MI->getOperand(1).getReg(), *MRI); 367406c3fb27SDimitry Andric if (LHS && LHS->isZero()) { 367506c3fb27SDimitry Andric Mods |= SISrcMods::NEG; 367606c3fb27SDimitry Andric Src = MI->getOperand(2).getReg(); 367706c3fb27SDimitry Andric } 36780b57cec5SDimitry Andric } 36790b57cec5SDimitry Andric 3680bdd1243dSDimitry Andric if (AllowAbs && MI->getOpcode() == AMDGPU::G_FABS) { 36810b57cec5SDimitry Andric Src = MI->getOperand(1).getReg(); 36820b57cec5SDimitry Andric Mods |= SISrcMods::ABS; 36830b57cec5SDimitry Andric } 36840b57cec5SDimitry Andric 368581ad6265SDimitry Andric if (OpSel) 368681ad6265SDimitry Andric Mods |= SISrcMods::OP_SEL_0; 368781ad6265SDimitry Andric 3688bdd1243dSDimitry Andric return std::pair(Src, Mods); 3689bdd1243dSDimitry Andric } 3690bdd1243dSDimitry Andric 3691bdd1243dSDimitry Andric Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded( 3692bdd1243dSDimitry Andric Register Src, unsigned Mods, MachineOperand Root, MachineInstr *InsertPt, 3693bdd1243dSDimitry Andric bool ForceVGPR) const { 369481ad6265SDimitry Andric if ((Mods != 0 || ForceVGPR) && 36955ffd83dbSDimitry Andric RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) { 36965ffd83dbSDimitry Andric 36975ffd83dbSDimitry Andric // If we looked through copies to find source modifiers on an SGPR operand, 36985ffd83dbSDimitry Andric // we now have an SGPR register source. To avoid potentially violating the 36995ffd83dbSDimitry Andric // constant bus restriction, we need to insert a copy to a VGPR. 3700bdd1243dSDimitry Andric Register VGPRSrc = MRI->cloneVirtualRegister(Root.getReg()); 3701bdd1243dSDimitry Andric BuildMI(*InsertPt->getParent(), InsertPt, InsertPt->getDebugLoc(), 37025ffd83dbSDimitry Andric TII.get(AMDGPU::COPY), VGPRSrc) 37035ffd83dbSDimitry Andric .addReg(Src); 37045ffd83dbSDimitry Andric Src = VGPRSrc; 37055ffd83dbSDimitry Andric } 37065ffd83dbSDimitry Andric 3707bdd1243dSDimitry Andric return Src; 37080b57cec5SDimitry Andric } 37090b57cec5SDimitry Andric 37100b57cec5SDimitry Andric /// 37110b57cec5SDimitry Andric /// This will select either an SGPR or VGPR operand and will save us from 37120b57cec5SDimitry Andric /// having to write an extra tablegen pattern. 37130b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns 37140b57cec5SDimitry Andric AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { 37150b57cec5SDimitry Andric return {{ 37160b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.add(Root); } 37170b57cec5SDimitry Andric }}; 37180b57cec5SDimitry Andric } 37190b57cec5SDimitry Andric 37200b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns 37210b57cec5SDimitry Andric AMDGPUInstructionSelector::selectVOP3Mods0(MachineOperand &Root) const { 37220b57cec5SDimitry Andric Register Src; 37230b57cec5SDimitry Andric unsigned Mods; 37245ffd83dbSDimitry Andric std::tie(Src, Mods) = selectVOP3ModsImpl(Root); 37250b57cec5SDimitry Andric 37260b57cec5SDimitry Andric return {{ 3727bdd1243dSDimitry Andric [=](MachineInstrBuilder &MIB) { 3728bdd1243dSDimitry Andric MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB)); 3729bdd1243dSDimitry Andric }, 37300b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 37310b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 37320b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 37330b57cec5SDimitry Andric }}; 37340b57cec5SDimitry Andric } 37358bcb0991SDimitry Andric 37368bcb0991SDimitry Andric InstructionSelector::ComplexRendererFns 3737e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const { 3738e8d8bef9SDimitry Andric Register Src; 3739e8d8bef9SDimitry Andric unsigned Mods; 374006c3fb27SDimitry Andric std::tie(Src, Mods) = selectVOP3ModsImpl(Root, 374106c3fb27SDimitry Andric /*IsCanonicalizing=*/true, 374206c3fb27SDimitry Andric /*AllowAbs=*/false); 3743e8d8bef9SDimitry Andric 3744e8d8bef9SDimitry Andric return {{ 3745bdd1243dSDimitry Andric [=](MachineInstrBuilder &MIB) { 3746bdd1243dSDimitry Andric MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB)); 3747bdd1243dSDimitry Andric }, 3748e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 3749e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 3750e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 3751e8d8bef9SDimitry Andric }}; 3752e8d8bef9SDimitry Andric } 3753e8d8bef9SDimitry Andric 3754e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns 37550b57cec5SDimitry Andric AMDGPUInstructionSelector::selectVOP3OMods(MachineOperand &Root) const { 37560b57cec5SDimitry Andric return {{ 37570b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 37580b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, // clamp 37590b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // omod 37600b57cec5SDimitry Andric }}; 37610b57cec5SDimitry Andric } 37620b57cec5SDimitry Andric 37630b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns 37640b57cec5SDimitry Andric AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const { 37650b57cec5SDimitry Andric Register Src; 37660b57cec5SDimitry Andric unsigned Mods; 37675ffd83dbSDimitry Andric std::tie(Src, Mods) = selectVOP3ModsImpl(Root); 37685ffd83dbSDimitry Andric 37695ffd83dbSDimitry Andric return {{ 3770bdd1243dSDimitry Andric [=](MachineInstrBuilder &MIB) { 3771bdd1243dSDimitry Andric MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB)); 3772bdd1243dSDimitry Andric }, 37735ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 37745ffd83dbSDimitry Andric }}; 37755ffd83dbSDimitry Andric } 37765ffd83dbSDimitry Andric 37775ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns 377806c3fb27SDimitry Andric AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing( 377906c3fb27SDimitry Andric MachineOperand &Root) const { 378006c3fb27SDimitry Andric Register Src; 378106c3fb27SDimitry Andric unsigned Mods; 378206c3fb27SDimitry Andric std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /*IsCanonicalizing=*/false); 378306c3fb27SDimitry Andric 378406c3fb27SDimitry Andric return {{ 378506c3fb27SDimitry Andric [=](MachineInstrBuilder &MIB) { 378606c3fb27SDimitry Andric MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB)); 378706c3fb27SDimitry Andric }, 378806c3fb27SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 378906c3fb27SDimitry Andric }}; 379006c3fb27SDimitry Andric } 379106c3fb27SDimitry Andric 379206c3fb27SDimitry Andric InstructionSelector::ComplexRendererFns 3793e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const { 3794e8d8bef9SDimitry Andric Register Src; 3795e8d8bef9SDimitry Andric unsigned Mods; 379606c3fb27SDimitry Andric std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /*IsCanonicalizing=*/true, 379706c3fb27SDimitry Andric /*AllowAbs=*/false); 3798e8d8bef9SDimitry Andric 3799e8d8bef9SDimitry Andric return {{ 3800bdd1243dSDimitry Andric [=](MachineInstrBuilder &MIB) { 3801bdd1243dSDimitry Andric MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB)); 3802bdd1243dSDimitry Andric }, 3803e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 3804e8d8bef9SDimitry Andric }}; 3805e8d8bef9SDimitry Andric } 3806e8d8bef9SDimitry Andric 3807e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns 38085ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const { 38095ffd83dbSDimitry Andric Register Reg = Root.getReg(); 38105ffd83dbSDimitry Andric const MachineInstr *Def = getDefIgnoringCopies(Reg, *MRI); 3811bdd1243dSDimitry Andric if (Def->getOpcode() == AMDGPU::G_FNEG || Def->getOpcode() == AMDGPU::G_FABS) 38125ffd83dbSDimitry Andric return {}; 38135ffd83dbSDimitry Andric return {{ 38145ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); }, 38155ffd83dbSDimitry Andric }}; 38165ffd83dbSDimitry Andric } 38175ffd83dbSDimitry Andric 38185ffd83dbSDimitry Andric std::pair<Register, unsigned> 38195ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectVOP3PModsImpl( 382081ad6265SDimitry Andric Register Src, const MachineRegisterInfo &MRI, bool IsDOT) const { 38215ffd83dbSDimitry Andric unsigned Mods = 0; 38225ffd83dbSDimitry Andric MachineInstr *MI = MRI.getVRegDef(Src); 38235ffd83dbSDimitry Andric 38245ffd83dbSDimitry Andric if (MI && MI->getOpcode() == AMDGPU::G_FNEG && 38255ffd83dbSDimitry Andric // It's possible to see an f32 fneg here, but unlikely. 38265ffd83dbSDimitry Andric // TODO: Treat f32 fneg as only high bit. 3827fe6060f1SDimitry Andric MRI.getType(Src) == LLT::fixed_vector(2, 16)) { 38285ffd83dbSDimitry Andric Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); 38295ffd83dbSDimitry Andric Src = MI->getOperand(1).getReg(); 38305ffd83dbSDimitry Andric MI = MRI.getVRegDef(Src); 38315ffd83dbSDimitry Andric } 38325ffd83dbSDimitry Andric 383306c3fb27SDimitry Andric // TODO: Handle G_FSUB 0 as fneg 383406c3fb27SDimitry Andric 38355ffd83dbSDimitry Andric // TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector. 383681ad6265SDimitry Andric (void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard() 38375ffd83dbSDimitry Andric 38385ffd83dbSDimitry Andric // Packed instructions do not have abs modifiers. 38395ffd83dbSDimitry Andric Mods |= SISrcMods::OP_SEL_1; 38405ffd83dbSDimitry Andric 3841bdd1243dSDimitry Andric return std::pair(Src, Mods); 38425ffd83dbSDimitry Andric } 38435ffd83dbSDimitry Andric 38445ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns 38455ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectVOP3PMods(MachineOperand &Root) const { 38465ffd83dbSDimitry Andric MachineRegisterInfo &MRI 38475ffd83dbSDimitry Andric = Root.getParent()->getParent()->getParent()->getRegInfo(); 38485ffd83dbSDimitry Andric 38495ffd83dbSDimitry Andric Register Src; 38505ffd83dbSDimitry Andric unsigned Mods; 38515ffd83dbSDimitry Andric std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI); 38520b57cec5SDimitry Andric 38530b57cec5SDimitry Andric return {{ 38540b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 38550b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 38560b57cec5SDimitry Andric }}; 38570b57cec5SDimitry Andric } 38580b57cec5SDimitry Andric 38590b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns 386081ad6265SDimitry Andric AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const { 386181ad6265SDimitry Andric MachineRegisterInfo &MRI 386281ad6265SDimitry Andric = Root.getParent()->getParent()->getParent()->getRegInfo(); 386381ad6265SDimitry Andric 386481ad6265SDimitry Andric Register Src; 386581ad6265SDimitry Andric unsigned Mods; 386681ad6265SDimitry Andric std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI, true); 386781ad6265SDimitry Andric 386881ad6265SDimitry Andric return {{ 386981ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 387081ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 387181ad6265SDimitry Andric }}; 387281ad6265SDimitry Andric } 387381ad6265SDimitry Andric 387481ad6265SDimitry Andric InstructionSelector::ComplexRendererFns 38757a6dacacSDimitry Andric AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const { 387681ad6265SDimitry Andric // Literal i1 value set in intrinsic, represents SrcMods for the next operand. 387781ad6265SDimitry Andric // Value is in Imm operand as i1 sign extended to int64_t. 387881ad6265SDimitry Andric // 1(-1) promotes packed values to signed, 0 treats them as unsigned. 387981ad6265SDimitry Andric assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) && 388081ad6265SDimitry Andric "expected i1 value"); 388181ad6265SDimitry Andric unsigned Mods = SISrcMods::OP_SEL_1; 388281ad6265SDimitry Andric if (Root.getImm() == -1) 388381ad6265SDimitry Andric Mods ^= SISrcMods::NEG; 388481ad6265SDimitry Andric return {{ 388581ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 388681ad6265SDimitry Andric }}; 388781ad6265SDimitry Andric } 388881ad6265SDimitry Andric 388981ad6265SDimitry Andric InstructionSelector::ComplexRendererFns 389081ad6265SDimitry Andric AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods( 389181ad6265SDimitry Andric MachineOperand &Root) const { 389281ad6265SDimitry Andric assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) && 389381ad6265SDimitry Andric "expected i1 value"); 389481ad6265SDimitry Andric unsigned Mods = SISrcMods::OP_SEL_1; 389581ad6265SDimitry Andric if (Root.getImm() != 0) 389681ad6265SDimitry Andric Mods |= SISrcMods::OP_SEL_0; 389781ad6265SDimitry Andric 389881ad6265SDimitry Andric return {{ 389981ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 390081ad6265SDimitry Andric }}; 390181ad6265SDimitry Andric } 390281ad6265SDimitry Andric 3903b3edf446SDimitry Andric static Register buildRegSequence(SmallVectorImpl<Register> &Elts, 3904b3edf446SDimitry Andric MachineInstr *InsertPt, 3905b3edf446SDimitry Andric MachineRegisterInfo &MRI) { 3906b3edf446SDimitry Andric const TargetRegisterClass *DstRegClass; 3907b3edf446SDimitry Andric switch (Elts.size()) { 3908b3edf446SDimitry Andric case 8: 3909b3edf446SDimitry Andric DstRegClass = &AMDGPU::VReg_256RegClass; 3910b3edf446SDimitry Andric break; 3911b3edf446SDimitry Andric case 4: 3912b3edf446SDimitry Andric DstRegClass = &AMDGPU::VReg_128RegClass; 3913b3edf446SDimitry Andric break; 3914b3edf446SDimitry Andric case 2: 3915b3edf446SDimitry Andric DstRegClass = &AMDGPU::VReg_64RegClass; 3916b3edf446SDimitry Andric break; 3917b3edf446SDimitry Andric default: 3918b3edf446SDimitry Andric llvm_unreachable("unhandled Reg sequence size"); 3919b3edf446SDimitry Andric } 3920b3edf446SDimitry Andric 3921b3edf446SDimitry Andric MachineIRBuilder B(*InsertPt); 3922b3edf446SDimitry Andric auto MIB = B.buildInstr(AMDGPU::REG_SEQUENCE) 3923b3edf446SDimitry Andric .addDef(MRI.createVirtualRegister(DstRegClass)); 3924b3edf446SDimitry Andric for (unsigned i = 0; i < Elts.size(); ++i) { 3925b3edf446SDimitry Andric MIB.addReg(Elts[i]); 3926b3edf446SDimitry Andric MIB.addImm(SIRegisterInfo::getSubRegFromChannel(i)); 3927b3edf446SDimitry Andric } 3928b3edf446SDimitry Andric return MIB->getOperand(0).getReg(); 3929b3edf446SDimitry Andric } 3930b3edf446SDimitry Andric 3931b3edf446SDimitry Andric static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, 3932b3edf446SDimitry Andric SmallVectorImpl<Register> &Elts, Register &Src, 3933b3edf446SDimitry Andric MachineInstr *InsertPt, 3934b3edf446SDimitry Andric MachineRegisterInfo &MRI) { 3935b3edf446SDimitry Andric if (ModOpcode == TargetOpcode::G_FNEG) { 3936b3edf446SDimitry Andric Mods |= SISrcMods::NEG; 3937b3edf446SDimitry Andric // Check if all elements also have abs modifier 3938b3edf446SDimitry Andric SmallVector<Register, 8> NegAbsElts; 3939b3edf446SDimitry Andric for (auto El : Elts) { 3940b3edf446SDimitry Andric Register FabsSrc; 3941b3edf446SDimitry Andric if (!mi_match(El, MRI, m_GFabs(m_Reg(FabsSrc)))) 3942b3edf446SDimitry Andric break; 3943b3edf446SDimitry Andric NegAbsElts.push_back(FabsSrc); 3944b3edf446SDimitry Andric } 3945b3edf446SDimitry Andric if (Elts.size() != NegAbsElts.size()) { 3946b3edf446SDimitry Andric // Neg 3947b3edf446SDimitry Andric Src = buildRegSequence(Elts, InsertPt, MRI); 3948b3edf446SDimitry Andric } else { 3949b3edf446SDimitry Andric // Neg and Abs 3950b3edf446SDimitry Andric Mods |= SISrcMods::NEG_HI; 3951b3edf446SDimitry Andric Src = buildRegSequence(NegAbsElts, InsertPt, MRI); 3952b3edf446SDimitry Andric } 3953b3edf446SDimitry Andric } else { 3954b3edf446SDimitry Andric assert(ModOpcode == TargetOpcode::G_FABS); 3955b3edf446SDimitry Andric // Abs 3956b3edf446SDimitry Andric Mods |= SISrcMods::NEG_HI; 3957b3edf446SDimitry Andric Src = buildRegSequence(Elts, InsertPt, MRI); 3958b3edf446SDimitry Andric } 3959b3edf446SDimitry Andric } 3960b3edf446SDimitry Andric 3961b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns 3962b3edf446SDimitry Andric AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(MachineOperand &Root) const { 3963b3edf446SDimitry Andric Register Src = Root.getReg(); 3964b3edf446SDimitry Andric unsigned Mods = SISrcMods::OP_SEL_1; 3965b3edf446SDimitry Andric SmallVector<Register, 8> EltsF32; 3966b3edf446SDimitry Andric 3967b3edf446SDimitry Andric if (GBuildVector *BV = dyn_cast<GBuildVector>(MRI->getVRegDef(Src))) { 3968*0fca6ea1SDimitry Andric assert(BV->getNumSources() > 0); 3969b3edf446SDimitry Andric // Based on first element decide which mod we match, neg or abs 3970*0fca6ea1SDimitry Andric MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0)); 3971*0fca6ea1SDimitry Andric unsigned ModOpcode = (ElF32->getOpcode() == AMDGPU::G_FNEG) 3972*0fca6ea1SDimitry Andric ? AMDGPU::G_FNEG 3973b3edf446SDimitry Andric : AMDGPU::G_FABS; 3974*0fca6ea1SDimitry Andric for (unsigned i = 0; i < BV->getNumSources(); ++i) { 3975*0fca6ea1SDimitry Andric ElF32 = MRI->getVRegDef(BV->getSourceReg(i)); 3976b3edf446SDimitry Andric if (ElF32->getOpcode() != ModOpcode) 3977b3edf446SDimitry Andric break; 3978b3edf446SDimitry Andric EltsF32.push_back(ElF32->getOperand(1).getReg()); 3979b3edf446SDimitry Andric } 3980b3edf446SDimitry Andric 3981b3edf446SDimitry Andric // All elements had ModOpcode modifier 3982b3edf446SDimitry Andric if (BV->getNumSources() == EltsF32.size()) { 3983b3edf446SDimitry Andric selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, Root.getParent(), 3984b3edf446SDimitry Andric *MRI); 3985b3edf446SDimitry Andric } 3986b3edf446SDimitry Andric } 3987b3edf446SDimitry Andric 3988b3edf446SDimitry Andric return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 3989b3edf446SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}}; 3990b3edf446SDimitry Andric } 3991b3edf446SDimitry Andric 3992b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns 3993b3edf446SDimitry Andric AMDGPUInstructionSelector::selectWMMAModsF16Neg(MachineOperand &Root) const { 3994b3edf446SDimitry Andric Register Src = Root.getReg(); 3995b3edf446SDimitry Andric unsigned Mods = SISrcMods::OP_SEL_1; 3996b3edf446SDimitry Andric SmallVector<Register, 8> EltsV2F16; 3997b3edf446SDimitry Andric 3998b3edf446SDimitry Andric if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef(Src))) { 3999b3edf446SDimitry Andric for (unsigned i = 0; i < CV->getNumSources(); ++i) { 4000b3edf446SDimitry Andric Register FNegSrc; 4001b3edf446SDimitry Andric if (!mi_match(CV->getSourceReg(i), *MRI, m_GFNeg(m_Reg(FNegSrc)))) 4002b3edf446SDimitry Andric break; 4003b3edf446SDimitry Andric EltsV2F16.push_back(FNegSrc); 4004b3edf446SDimitry Andric } 4005b3edf446SDimitry Andric 4006b3edf446SDimitry Andric // All elements had ModOpcode modifier 4007b3edf446SDimitry Andric if (CV->getNumSources() == EltsV2F16.size()) { 4008b3edf446SDimitry Andric Mods |= SISrcMods::NEG; 4009b3edf446SDimitry Andric Mods |= SISrcMods::NEG_HI; 4010b3edf446SDimitry Andric Src = buildRegSequence(EltsV2F16, Root.getParent(), *MRI); 4011b3edf446SDimitry Andric } 4012b3edf446SDimitry Andric } 4013b3edf446SDimitry Andric 4014b3edf446SDimitry Andric return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 4015b3edf446SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}}; 4016b3edf446SDimitry Andric } 4017b3edf446SDimitry Andric 4018b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns 4019b3edf446SDimitry Andric AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(MachineOperand &Root) const { 4020b3edf446SDimitry Andric Register Src = Root.getReg(); 4021b3edf446SDimitry Andric unsigned Mods = SISrcMods::OP_SEL_1; 4022b3edf446SDimitry Andric SmallVector<Register, 8> EltsV2F16; 4023b3edf446SDimitry Andric 4024b3edf446SDimitry Andric if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef(Src))) { 4025*0fca6ea1SDimitry Andric assert(CV->getNumSources() > 0); 4026*0fca6ea1SDimitry Andric MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0)); 4027b3edf446SDimitry Andric // Based on first element decide which mod we match, neg or abs 4028*0fca6ea1SDimitry Andric unsigned ModOpcode = (ElV2F16->getOpcode() == AMDGPU::G_FNEG) 4029*0fca6ea1SDimitry Andric ? AMDGPU::G_FNEG 4030b3edf446SDimitry Andric : AMDGPU::G_FABS; 4031*0fca6ea1SDimitry Andric 4032*0fca6ea1SDimitry Andric for (unsigned i = 0; i < CV->getNumSources(); ++i) { 4033*0fca6ea1SDimitry Andric ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i)); 4034b3edf446SDimitry Andric if (ElV2F16->getOpcode() != ModOpcode) 4035b3edf446SDimitry Andric break; 4036b3edf446SDimitry Andric EltsV2F16.push_back(ElV2F16->getOperand(1).getReg()); 4037b3edf446SDimitry Andric } 4038b3edf446SDimitry Andric 4039b3edf446SDimitry Andric // All elements had ModOpcode modifier 4040b3edf446SDimitry Andric if (CV->getNumSources() == EltsV2F16.size()) { 4041b3edf446SDimitry Andric MachineIRBuilder B(*Root.getParent()); 4042b3edf446SDimitry Andric selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, Root.getParent(), 4043b3edf446SDimitry Andric *MRI); 4044b3edf446SDimitry Andric } 4045b3edf446SDimitry Andric } 4046b3edf446SDimitry Andric 4047b3edf446SDimitry Andric return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 4048b3edf446SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }}}; 4049b3edf446SDimitry Andric } 4050b3edf446SDimitry Andric 4051b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns 4052b3edf446SDimitry Andric AMDGPUInstructionSelector::selectWMMAVISrc(MachineOperand &Root) const { 4053b3edf446SDimitry Andric std::optional<FPValueAndVReg> FPValReg; 4054b3edf446SDimitry Andric if (mi_match(Root.getReg(), *MRI, m_GFCstOrSplat(FPValReg))) { 4055*0fca6ea1SDimitry Andric if (TII.isInlineConstant(FPValReg->Value)) { 4056b3edf446SDimitry Andric return {{[=](MachineInstrBuilder &MIB) { 4057b3edf446SDimitry Andric MIB.addImm(FPValReg->Value.bitcastToAPInt().getSExtValue()); 4058b3edf446SDimitry Andric }}}; 4059b3edf446SDimitry Andric } 4060b3edf446SDimitry Andric // Non-inlineable splat floats should not fall-through for integer immediate 4061b3edf446SDimitry Andric // checks. 4062b3edf446SDimitry Andric return {}; 4063b3edf446SDimitry Andric } 4064b3edf446SDimitry Andric 4065b3edf446SDimitry Andric APInt ICst; 4066b3edf446SDimitry Andric if (mi_match(Root.getReg(), *MRI, m_ICstOrSplat(ICst))) { 4067b3edf446SDimitry Andric if (TII.isInlineConstant(ICst)) { 4068b3edf446SDimitry Andric return { 4069b3edf446SDimitry Andric {[=](MachineInstrBuilder &MIB) { MIB.addImm(ICst.getSExtValue()); }}}; 4070b3edf446SDimitry Andric } 4071b3edf446SDimitry Andric } 4072b3edf446SDimitry Andric 4073b3edf446SDimitry Andric return {}; 4074b3edf446SDimitry Andric } 4075b3edf446SDimitry Andric 4076b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns 4077b3edf446SDimitry Andric AMDGPUInstructionSelector::selectSWMMACIndex8(MachineOperand &Root) const { 4078b3edf446SDimitry Andric Register Src = 4079b3edf446SDimitry Andric getDefIgnoringCopies(Root.getReg(), *MRI)->getOperand(0).getReg(); 4080b3edf446SDimitry Andric unsigned Key = 0; 4081b3edf446SDimitry Andric 4082b3edf446SDimitry Andric Register ShiftSrc; 4083b3edf446SDimitry Andric std::optional<ValueAndVReg> ShiftAmt; 4084b3edf446SDimitry Andric if (mi_match(Src, *MRI, m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt))) && 4085b3edf446SDimitry Andric MRI->getType(ShiftSrc).getSizeInBits() == 32 && 4086b3edf446SDimitry Andric ShiftAmt->Value.getZExtValue() % 8 == 0) { 4087b3edf446SDimitry Andric Key = ShiftAmt->Value.getZExtValue() / 8; 4088b3edf446SDimitry Andric Src = ShiftSrc; 4089b3edf446SDimitry Andric } 4090b3edf446SDimitry Andric 4091b3edf446SDimitry Andric return {{ 4092b3edf446SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 4093b3edf446SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Key); } // index_key 4094b3edf446SDimitry Andric }}; 4095b3edf446SDimitry Andric } 4096b3edf446SDimitry Andric 4097b3edf446SDimitry Andric InstructionSelector::ComplexRendererFns 4098b3edf446SDimitry Andric AMDGPUInstructionSelector::selectSWMMACIndex16(MachineOperand &Root) const { 4099b3edf446SDimitry Andric 4100b3edf446SDimitry Andric Register Src = 4101b3edf446SDimitry Andric getDefIgnoringCopies(Root.getReg(), *MRI)->getOperand(0).getReg(); 4102b3edf446SDimitry Andric unsigned Key = 0; 4103b3edf446SDimitry Andric 4104b3edf446SDimitry Andric Register ShiftSrc; 4105b3edf446SDimitry Andric std::optional<ValueAndVReg> ShiftAmt; 4106b3edf446SDimitry Andric if (mi_match(Src, *MRI, m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt))) && 4107b3edf446SDimitry Andric MRI->getType(ShiftSrc).getSizeInBits() == 32 && 4108b3edf446SDimitry Andric ShiftAmt->Value.getZExtValue() == 16) { 4109b3edf446SDimitry Andric Src = ShiftSrc; 4110b3edf446SDimitry Andric Key = 1; 4111b3edf446SDimitry Andric } 4112b3edf446SDimitry Andric 4113b3edf446SDimitry Andric return {{ 4114b3edf446SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 4115b3edf446SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Key); } // index_key 4116b3edf446SDimitry Andric }}; 4117b3edf446SDimitry Andric } 4118b3edf446SDimitry Andric 411981ad6265SDimitry Andric InstructionSelector::ComplexRendererFns 4120bdd1243dSDimitry Andric AMDGPUInstructionSelector::selectVOP3OpSelMods(MachineOperand &Root) const { 4121480093f4SDimitry Andric Register Src; 4122480093f4SDimitry Andric unsigned Mods; 41235ffd83dbSDimitry Andric std::tie(Src, Mods) = selectVOP3ModsImpl(Root); 4124480093f4SDimitry Andric 4125bdd1243dSDimitry Andric // FIXME: Handle op_sel 4126480093f4SDimitry Andric return {{ 4127480093f4SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 4128480093f4SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 4129480093f4SDimitry Andric }}; 4130480093f4SDimitry Andric } 4131480093f4SDimitry Andric 4132480093f4SDimitry Andric InstructionSelector::ComplexRendererFns 413381ad6265SDimitry Andric AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const { 413481ad6265SDimitry Andric Register Src; 413581ad6265SDimitry Andric unsigned Mods; 413681ad6265SDimitry Andric std::tie(Src, Mods) = selectVOP3ModsImpl(Root, 413706c3fb27SDimitry Andric /*IsCanonicalizing=*/true, 413806c3fb27SDimitry Andric /*AllowAbs=*/false, 413906c3fb27SDimitry Andric /*OpSel=*/false); 414081ad6265SDimitry Andric 414181ad6265SDimitry Andric return {{ 4142bdd1243dSDimitry Andric [=](MachineInstrBuilder &MIB) { 4143bdd1243dSDimitry Andric MIB.addReg( 4144bdd1243dSDimitry Andric copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true)); 4145bdd1243dSDimitry Andric }, 414681ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 414781ad6265SDimitry Andric }}; 414881ad6265SDimitry Andric } 414981ad6265SDimitry Andric 415081ad6265SDimitry Andric InstructionSelector::ComplexRendererFns 415181ad6265SDimitry Andric AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const { 415281ad6265SDimitry Andric Register Src; 415381ad6265SDimitry Andric unsigned Mods; 415481ad6265SDimitry Andric std::tie(Src, Mods) = selectVOP3ModsImpl(Root, 415506c3fb27SDimitry Andric /*IsCanonicalizing=*/true, 415606c3fb27SDimitry Andric /*AllowAbs=*/false, 415706c3fb27SDimitry Andric /*OpSel=*/true); 415881ad6265SDimitry Andric 415981ad6265SDimitry Andric return {{ 4160bdd1243dSDimitry Andric [=](MachineInstrBuilder &MIB) { 4161bdd1243dSDimitry Andric MIB.addReg( 4162bdd1243dSDimitry Andric copyToVGPRIfSrcFolded(Src, Mods, Root, MIB, /* ForceVGPR */ true)); 4163bdd1243dSDimitry Andric }, 416481ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); }, // src0_mods 416581ad6265SDimitry Andric }}; 416681ad6265SDimitry Andric } 416781ad6265SDimitry Andric 4168fcaf7f86SDimitry Andric bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, 4169fcaf7f86SDimitry Andric Register &Base, 4170fcaf7f86SDimitry Andric Register *SOffset, 4171fcaf7f86SDimitry Andric int64_t *Offset) const { 4172fcaf7f86SDimitry Andric MachineInstr *MI = Root.getParent(); 4173fcaf7f86SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 4174fcaf7f86SDimitry Andric 4175fcaf7f86SDimitry Andric // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, 4176fcaf7f86SDimitry Andric // then we can select all ptr + 32-bit offsets. 4177fcaf7f86SDimitry Andric SmallVector<GEPInfo, 4> AddrInfo; 4178fcaf7f86SDimitry Andric getAddrModeInfo(*MI, *MRI, AddrInfo); 4179fcaf7f86SDimitry Andric 4180fcaf7f86SDimitry Andric if (AddrInfo.empty()) 4181fcaf7f86SDimitry Andric return false; 4182fcaf7f86SDimitry Andric 4183fcaf7f86SDimitry Andric const GEPInfo &GEPI = AddrInfo[0]; 4184*0fca6ea1SDimitry Andric std::optional<int64_t> EncodedImm; 4185fcaf7f86SDimitry Andric 4186fcaf7f86SDimitry Andric if (SOffset && Offset) { 4187*0fca6ea1SDimitry Andric EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false, 4188*0fca6ea1SDimitry Andric /*HasSOffset=*/true); 4189fcaf7f86SDimitry Andric if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm && 4190fcaf7f86SDimitry Andric AddrInfo.size() > 1) { 4191fcaf7f86SDimitry Andric const GEPInfo &GEPI2 = AddrInfo[1]; 4192fcaf7f86SDimitry Andric if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) { 4193fcaf7f86SDimitry Andric if (Register OffsetReg = 4194fcaf7f86SDimitry Andric matchZeroExtendFromS32(*MRI, GEPI2.SgprParts[1])) { 4195fcaf7f86SDimitry Andric Base = GEPI2.SgprParts[0]; 4196fcaf7f86SDimitry Andric *SOffset = OffsetReg; 4197fcaf7f86SDimitry Andric *Offset = *EncodedImm; 4198*0fca6ea1SDimitry Andric if (*Offset >= 0 || !AMDGPU::hasSMRDSignedImmOffset(STI)) 4199*0fca6ea1SDimitry Andric return true; 4200*0fca6ea1SDimitry Andric 4201*0fca6ea1SDimitry Andric // For unbuffered smem loads, it is illegal for the Immediate Offset 4202*0fca6ea1SDimitry Andric // to be negative if the resulting (Offset + (M0 or SOffset or zero) 4203*0fca6ea1SDimitry Andric // is negative. Handle the case where the Immediate Offset + SOffset 4204*0fca6ea1SDimitry Andric // is negative. 4205*0fca6ea1SDimitry Andric auto SKnown = KB->getKnownBits(*SOffset); 4206*0fca6ea1SDimitry Andric if (*Offset + SKnown.getMinValue().getSExtValue() < 0) 4207*0fca6ea1SDimitry Andric return false; 4208*0fca6ea1SDimitry Andric 4209fcaf7f86SDimitry Andric return true; 4210fcaf7f86SDimitry Andric } 4211fcaf7f86SDimitry Andric } 4212fcaf7f86SDimitry Andric } 4213fcaf7f86SDimitry Andric return false; 4214fcaf7f86SDimitry Andric } 4215fcaf7f86SDimitry Andric 4216*0fca6ea1SDimitry Andric EncodedImm = AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, /*IsBuffer=*/false, 4217*0fca6ea1SDimitry Andric /*HasSOffset=*/false); 4218fcaf7f86SDimitry Andric if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) { 4219fcaf7f86SDimitry Andric Base = GEPI.SgprParts[0]; 4220fcaf7f86SDimitry Andric *Offset = *EncodedImm; 4221fcaf7f86SDimitry Andric return true; 4222fcaf7f86SDimitry Andric } 4223fcaf7f86SDimitry Andric 4224fcaf7f86SDimitry Andric // SGPR offset is unsigned. 4225fcaf7f86SDimitry Andric if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) && 4226fcaf7f86SDimitry Andric GEPI.Imm != 0) { 4227fcaf7f86SDimitry Andric // If we make it this far we have a load with an 32-bit immediate offset. 4228fcaf7f86SDimitry Andric // It is OK to select this using a sgpr offset, because we have already 4229fcaf7f86SDimitry Andric // failed trying to select this load into one of the _IMM variants since 4230fcaf7f86SDimitry Andric // the _IMM Patterns are considered before the _SGPR patterns. 4231fcaf7f86SDimitry Andric Base = GEPI.SgprParts[0]; 4232fcaf7f86SDimitry Andric *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 4233fcaf7f86SDimitry Andric BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset) 4234fcaf7f86SDimitry Andric .addImm(GEPI.Imm); 4235fcaf7f86SDimitry Andric return true; 4236fcaf7f86SDimitry Andric } 4237fcaf7f86SDimitry Andric 4238fcaf7f86SDimitry Andric if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) { 4239fcaf7f86SDimitry Andric if (Register OffsetReg = matchZeroExtendFromS32(*MRI, GEPI.SgprParts[1])) { 4240fcaf7f86SDimitry Andric Base = GEPI.SgprParts[0]; 4241fcaf7f86SDimitry Andric *SOffset = OffsetReg; 4242fcaf7f86SDimitry Andric return true; 4243fcaf7f86SDimitry Andric } 4244fcaf7f86SDimitry Andric } 4245fcaf7f86SDimitry Andric 4246fcaf7f86SDimitry Andric return false; 4247fcaf7f86SDimitry Andric } 4248fcaf7f86SDimitry Andric 424981ad6265SDimitry Andric InstructionSelector::ComplexRendererFns 42508bcb0991SDimitry Andric AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { 4251fcaf7f86SDimitry Andric Register Base; 4252fcaf7f86SDimitry Andric int64_t Offset; 4253fcaf7f86SDimitry Andric if (!selectSmrdOffset(Root, Base, /* SOffset= */ nullptr, &Offset)) 4254bdd1243dSDimitry Andric return std::nullopt; 42550b57cec5SDimitry Andric 4256fcaf7f86SDimitry Andric return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, 4257fcaf7f86SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}}; 42580b57cec5SDimitry Andric } 42590b57cec5SDimitry Andric 42600b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns 42610b57cec5SDimitry Andric AMDGPUInstructionSelector::selectSmrdImm32(MachineOperand &Root) const { 42620b57cec5SDimitry Andric SmallVector<GEPInfo, 4> AddrInfo; 42638bcb0991SDimitry Andric getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo); 42640b57cec5SDimitry Andric 42650b57cec5SDimitry Andric if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) 4266bdd1243dSDimitry Andric return std::nullopt; 42670b57cec5SDimitry Andric 42680b57cec5SDimitry Andric const GEPInfo &GEPInfo = AddrInfo[0]; 42695ffd83dbSDimitry Andric Register PtrReg = GEPInfo.SgprParts[0]; 4270bdd1243dSDimitry Andric std::optional<int64_t> EncodedImm = 42715ffd83dbSDimitry Andric AMDGPU::getSMRDEncodedLiteralOffset32(STI, GEPInfo.Imm); 42725ffd83dbSDimitry Andric if (!EncodedImm) 4273bdd1243dSDimitry Andric return std::nullopt; 42740b57cec5SDimitry Andric 42750b57cec5SDimitry Andric return {{ 42760b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, 42775ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } 42780b57cec5SDimitry Andric }}; 42790b57cec5SDimitry Andric } 42800b57cec5SDimitry Andric 42810b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns 42820b57cec5SDimitry Andric AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { 4283fcaf7f86SDimitry Andric Register Base, SOffset; 4284fcaf7f86SDimitry Andric if (!selectSmrdOffset(Root, Base, &SOffset, /* Offset= */ nullptr)) 4285bdd1243dSDimitry Andric return std::nullopt; 42860b57cec5SDimitry Andric 4287fcaf7f86SDimitry Andric return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, 4288fcaf7f86SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}}; 4289753f127fSDimitry Andric } 4290753f127fSDimitry Andric 4291fcaf7f86SDimitry Andric InstructionSelector::ComplexRendererFns 4292fcaf7f86SDimitry Andric AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const { 4293fcaf7f86SDimitry Andric Register Base, SOffset; 4294fcaf7f86SDimitry Andric int64_t Offset; 4295fcaf7f86SDimitry Andric if (!selectSmrdOffset(Root, Base, &SOffset, &Offset)) 4296bdd1243dSDimitry Andric return std::nullopt; 4297fcaf7f86SDimitry Andric 4298fcaf7f86SDimitry Andric return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, 4299fcaf7f86SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }, 4300fcaf7f86SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}}; 43010b57cec5SDimitry Andric } 43020b57cec5SDimitry Andric 4303e8d8bef9SDimitry Andric std::pair<Register, int> 4304fe6060f1SDimitry Andric AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root, 4305fe6060f1SDimitry Andric uint64_t FlatVariant) const { 43060b57cec5SDimitry Andric MachineInstr *MI = Root.getParent(); 43070b57cec5SDimitry Andric 4308bdd1243dSDimitry Andric auto Default = std::pair(Root.getReg(), 0); 43090b57cec5SDimitry Andric 43100b57cec5SDimitry Andric if (!STI.hasFlatInstOffsets()) 43110b57cec5SDimitry Andric return Default; 43120b57cec5SDimitry Andric 4313e8d8bef9SDimitry Andric Register PtrBase; 4314e8d8bef9SDimitry Andric int64_t ConstOffset; 4315e8d8bef9SDimitry Andric std::tie(PtrBase, ConstOffset) = 4316e8d8bef9SDimitry Andric getPtrBaseWithConstantOffset(Root.getReg(), *MRI); 43175f757f3fSDimitry Andric 43185f757f3fSDimitry Andric if (ConstOffset == 0 || (FlatVariant == SIInstrFlags::FlatScratch && 43195f757f3fSDimitry Andric !isFlatScratchBaseLegal(Root.getReg()))) 43200b57cec5SDimitry Andric return Default; 43210b57cec5SDimitry Andric 43220b57cec5SDimitry Andric unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); 4323fe6060f1SDimitry Andric if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant)) 43240b57cec5SDimitry Andric return Default; 43250b57cec5SDimitry Andric 4326bdd1243dSDimitry Andric return std::pair(PtrBase, ConstOffset); 43270b57cec5SDimitry Andric } 43280b57cec5SDimitry Andric 43290b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns 43300b57cec5SDimitry Andric AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const { 4331fe6060f1SDimitry Andric auto PtrWithOffset = selectFlatOffsetImpl(Root, SIInstrFlags::FLAT); 4332e8d8bef9SDimitry Andric 4333e8d8bef9SDimitry Andric return {{ 4334e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); }, 4335e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); }, 4336e8d8bef9SDimitry Andric }}; 43370b57cec5SDimitry Andric } 43380b57cec5SDimitry Andric 43390b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns 4340fe6060f1SDimitry Andric AMDGPUInstructionSelector::selectGlobalOffset(MachineOperand &Root) const { 4341fe6060f1SDimitry Andric auto PtrWithOffset = selectFlatOffsetImpl(Root, SIInstrFlags::FlatGlobal); 4342fe6060f1SDimitry Andric 4343fe6060f1SDimitry Andric return {{ 4344fe6060f1SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); }, 4345fe6060f1SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); }, 4346fe6060f1SDimitry Andric }}; 4347fe6060f1SDimitry Andric } 4348fe6060f1SDimitry Andric 4349fe6060f1SDimitry Andric InstructionSelector::ComplexRendererFns 4350fe6060f1SDimitry Andric AMDGPUInstructionSelector::selectScratchOffset(MachineOperand &Root) const { 4351fe6060f1SDimitry Andric auto PtrWithOffset = selectFlatOffsetImpl(Root, SIInstrFlags::FlatScratch); 4352e8d8bef9SDimitry Andric 4353e8d8bef9SDimitry Andric return {{ 4354e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); }, 4355e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); }, 4356e8d8bef9SDimitry Andric }}; 4357e8d8bef9SDimitry Andric } 4358e8d8bef9SDimitry Andric 4359e8d8bef9SDimitry Andric // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset) 4360e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns 4361e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const { 4362e8d8bef9SDimitry Andric Register Addr = Root.getReg(); 4363e8d8bef9SDimitry Andric Register PtrBase; 4364e8d8bef9SDimitry Andric int64_t ConstOffset; 4365e8d8bef9SDimitry Andric int64_t ImmOffset = 0; 4366e8d8bef9SDimitry Andric 4367e8d8bef9SDimitry Andric // Match the immediate offset first, which canonically is moved as low as 4368e8d8bef9SDimitry Andric // possible. 4369e8d8bef9SDimitry Andric std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI); 4370e8d8bef9SDimitry Andric 4371e8d8bef9SDimitry Andric if (ConstOffset != 0) { 4372fe6060f1SDimitry Andric if (TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, 4373fe6060f1SDimitry Andric SIInstrFlags::FlatGlobal)) { 4374e8d8bef9SDimitry Andric Addr = PtrBase; 4375e8d8bef9SDimitry Andric ImmOffset = ConstOffset; 4376fe6060f1SDimitry Andric } else { 4377e8d8bef9SDimitry Andric auto PtrBaseDef = getDefSrcRegIgnoringCopies(PtrBase, *MRI); 4378e8d8bef9SDimitry Andric if (isSGPR(PtrBaseDef->Reg)) { 4379fe6060f1SDimitry Andric if (ConstOffset > 0) { 4380e8d8bef9SDimitry Andric // Offset is too large. 4381e8d8bef9SDimitry Andric // 4382fe6060f1SDimitry Andric // saddr + large_offset -> saddr + 4383fe6060f1SDimitry Andric // (voffset = large_offset & ~MaxOffset) + 4384fe6060f1SDimitry Andric // (large_offset & MaxOffset); 4385e8d8bef9SDimitry Andric int64_t SplitImmOffset, RemainderOffset; 4386fe6060f1SDimitry Andric std::tie(SplitImmOffset, RemainderOffset) = TII.splitFlatOffset( 4387fe6060f1SDimitry Andric ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal); 4388e8d8bef9SDimitry Andric 4389e8d8bef9SDimitry Andric if (isUInt<32>(RemainderOffset)) { 4390e8d8bef9SDimitry Andric MachineInstr *MI = Root.getParent(); 4391e8d8bef9SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 4392fe6060f1SDimitry Andric Register HighBits = 4393fe6060f1SDimitry Andric MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); 4394e8d8bef9SDimitry Andric 4395e8d8bef9SDimitry Andric BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), 4396e8d8bef9SDimitry Andric HighBits) 4397e8d8bef9SDimitry Andric .addImm(RemainderOffset); 4398e8d8bef9SDimitry Andric 4399e8d8bef9SDimitry Andric return {{ 4400e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrBase); }, // saddr 4401fe6060f1SDimitry Andric [=](MachineInstrBuilder &MIB) { 4402fe6060f1SDimitry Andric MIB.addReg(HighBits); 4403fe6060f1SDimitry Andric }, // voffset 4404e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(SplitImmOffset); }, 4405e8d8bef9SDimitry Andric }}; 4406e8d8bef9SDimitry Andric } 4407e8d8bef9SDimitry Andric } 4408fe6060f1SDimitry Andric 4409fe6060f1SDimitry Andric // We are adding a 64 bit SGPR and a constant. If constant bus limit 4410fe6060f1SDimitry Andric // is 1 we would need to perform 1 or 2 extra moves for each half of 4411fe6060f1SDimitry Andric // the constant and it is better to do a scalar add and then issue a 4412fe6060f1SDimitry Andric // single VALU instruction to materialize zero. Otherwise it is less 4413fe6060f1SDimitry Andric // instructions to perform VALU adds with immediates or inline literals. 4414fe6060f1SDimitry Andric unsigned NumLiterals = 4415fe6060f1SDimitry Andric !TII.isInlineConstant(APInt(32, ConstOffset & 0xffffffff)) + 4416fe6060f1SDimitry Andric !TII.isInlineConstant(APInt(32, ConstOffset >> 32)); 4417fe6060f1SDimitry Andric if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals) 4418bdd1243dSDimitry Andric return std::nullopt; 4419fe6060f1SDimitry Andric } 4420e8d8bef9SDimitry Andric } 4421e8d8bef9SDimitry Andric } 4422e8d8bef9SDimitry Andric 4423e8d8bef9SDimitry Andric // Match the variable offset. 442481ad6265SDimitry Andric auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI); 4425fe6060f1SDimitry Andric if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) { 4426e8d8bef9SDimitry Andric // Look through the SGPR->VGPR copy. 4427e8d8bef9SDimitry Andric Register SAddr = 4428e8d8bef9SDimitry Andric getSrcRegIgnoringCopies(AddrDef->MI->getOperand(1).getReg(), *MRI); 4429e8d8bef9SDimitry Andric 4430bdd1243dSDimitry Andric if (isSGPR(SAddr)) { 4431e8d8bef9SDimitry Andric Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg(); 4432e8d8bef9SDimitry Andric 4433e8d8bef9SDimitry Andric // It's possible voffset is an SGPR here, but the copy to VGPR will be 4434e8d8bef9SDimitry Andric // inserted later. 4435fe6060f1SDimitry Andric if (Register VOffset = matchZeroExtendFromS32(*MRI, PtrBaseOffset)) { 4436e8d8bef9SDimitry Andric return {{[=](MachineInstrBuilder &MIB) { // saddr 4437e8d8bef9SDimitry Andric MIB.addReg(SAddr); 4438e8d8bef9SDimitry Andric }, 4439e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { // voffset 4440e8d8bef9SDimitry Andric MIB.addReg(VOffset); 4441e8d8bef9SDimitry Andric }, 4442e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { // offset 4443e8d8bef9SDimitry Andric MIB.addImm(ImmOffset); 4444e8d8bef9SDimitry Andric }}}; 4445e8d8bef9SDimitry Andric } 4446fe6060f1SDimitry Andric } 4447fe6060f1SDimitry Andric } 4448fe6060f1SDimitry Andric 4449fe6060f1SDimitry Andric // FIXME: We should probably have folded COPY (G_IMPLICIT_DEF) earlier, and 4450fe6060f1SDimitry Andric // drop this. 4451fe6060f1SDimitry Andric if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF || 4452fe6060f1SDimitry Andric AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg)) 4453bdd1243dSDimitry Andric return std::nullopt; 4454fe6060f1SDimitry Andric 4455fe6060f1SDimitry Andric // It's cheaper to materialize a single 32-bit zero for vaddr than the two 4456fe6060f1SDimitry Andric // moves required to copy a 64-bit SGPR to VGPR. 4457fe6060f1SDimitry Andric MachineInstr *MI = Root.getParent(); 4458fe6060f1SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 4459fe6060f1SDimitry Andric Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); 4460fe6060f1SDimitry Andric 4461fe6060f1SDimitry Andric BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset) 4462fe6060f1SDimitry Andric .addImm(0); 4463fe6060f1SDimitry Andric 4464fe6060f1SDimitry Andric return {{ 4465fe6060f1SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(AddrDef->Reg); }, // saddr 4466fe6060f1SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(VOffset); }, // voffset 4467fe6060f1SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset 4468fe6060f1SDimitry Andric }}; 4469fe6060f1SDimitry Andric } 4470e8d8bef9SDimitry Andric 4471e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns 4472e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const { 4473e8d8bef9SDimitry Andric Register Addr = Root.getReg(); 4474e8d8bef9SDimitry Andric Register PtrBase; 4475e8d8bef9SDimitry Andric int64_t ConstOffset; 4476e8d8bef9SDimitry Andric int64_t ImmOffset = 0; 4477e8d8bef9SDimitry Andric 4478e8d8bef9SDimitry Andric // Match the immediate offset first, which canonically is moved as low as 4479e8d8bef9SDimitry Andric // possible. 4480e8d8bef9SDimitry Andric std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI); 4481e8d8bef9SDimitry Andric 44825f757f3fSDimitry Andric if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) && 4483fe6060f1SDimitry Andric TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, 4484fe6060f1SDimitry Andric SIInstrFlags::FlatScratch)) { 4485e8d8bef9SDimitry Andric Addr = PtrBase; 4486e8d8bef9SDimitry Andric ImmOffset = ConstOffset; 4487e8d8bef9SDimitry Andric } 4488e8d8bef9SDimitry Andric 4489e8d8bef9SDimitry Andric auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI); 4490e8d8bef9SDimitry Andric if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) { 4491e8d8bef9SDimitry Andric int FI = AddrDef->MI->getOperand(1).getIndex(); 4492e8d8bef9SDimitry Andric return {{ 4493e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr 4494e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset 4495e8d8bef9SDimitry Andric }}; 4496e8d8bef9SDimitry Andric } 4497e8d8bef9SDimitry Andric 4498e8d8bef9SDimitry Andric Register SAddr = AddrDef->Reg; 4499e8d8bef9SDimitry Andric 4500e8d8bef9SDimitry Andric if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) { 4501e8d8bef9SDimitry Andric Register LHS = AddrDef->MI->getOperand(1).getReg(); 4502e8d8bef9SDimitry Andric Register RHS = AddrDef->MI->getOperand(2).getReg(); 4503e8d8bef9SDimitry Andric auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI); 4504e8d8bef9SDimitry Andric auto RHSDef = getDefSrcRegIgnoringCopies(RHS, *MRI); 4505e8d8bef9SDimitry Andric 450681ad6265SDimitry Andric if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX && 4507e8d8bef9SDimitry Andric isSGPR(RHSDef->Reg)) { 4508e8d8bef9SDimitry Andric int FI = LHSDef->MI->getOperand(1).getIndex(); 4509e8d8bef9SDimitry Andric MachineInstr &I = *Root.getParent(); 4510e8d8bef9SDimitry Andric MachineBasicBlock *BB = I.getParent(); 4511e8d8bef9SDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 4512e8d8bef9SDimitry Andric SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 4513e8d8bef9SDimitry Andric 4514fe6060f1SDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_I32), SAddr) 4515e8d8bef9SDimitry Andric .addFrameIndex(FI) 45165f757f3fSDimitry Andric .addReg(RHSDef->Reg) 45175f757f3fSDimitry Andric .setOperandDead(3); // Dead scc 4518e8d8bef9SDimitry Andric } 4519e8d8bef9SDimitry Andric } 4520e8d8bef9SDimitry Andric 4521e8d8bef9SDimitry Andric if (!isSGPR(SAddr)) 4522bdd1243dSDimitry Andric return std::nullopt; 4523e8d8bef9SDimitry Andric 4524e8d8bef9SDimitry Andric return {{ 4525e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(SAddr); }, // saddr 4526e8d8bef9SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset 4527e8d8bef9SDimitry Andric }}; 45280b57cec5SDimitry Andric } 45290b57cec5SDimitry Andric 453081ad6265SDimitry Andric // Check whether the flat scratch SVS swizzle bug affects this access. 453181ad6265SDimitry Andric bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug( 453281ad6265SDimitry Andric Register VAddr, Register SAddr, uint64_t ImmOffset) const { 453381ad6265SDimitry Andric if (!Subtarget->hasFlatScratchSVSSwizzleBug()) 453481ad6265SDimitry Andric return false; 453581ad6265SDimitry Andric 453681ad6265SDimitry Andric // The bug affects the swizzling of SVS accesses if there is any carry out 453781ad6265SDimitry Andric // from the two low order bits (i.e. from bit 1 into bit 2) when adding 453881ad6265SDimitry Andric // voffset to (soffset + inst_offset). 453906c3fb27SDimitry Andric auto VKnown = KB->getKnownBits(VAddr); 454081ad6265SDimitry Andric auto SKnown = KnownBits::computeForAddSub( 4541*0fca6ea1SDimitry Andric /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KB->getKnownBits(SAddr), 454281ad6265SDimitry Andric KnownBits::makeConstant(APInt(32, ImmOffset))); 454381ad6265SDimitry Andric uint64_t VMax = VKnown.getMaxValue().getZExtValue(); 454481ad6265SDimitry Andric uint64_t SMax = SKnown.getMaxValue().getZExtValue(); 454581ad6265SDimitry Andric return (VMax & 3) + (SMax & 3) >= 4; 454681ad6265SDimitry Andric } 454781ad6265SDimitry Andric 454881ad6265SDimitry Andric InstructionSelector::ComplexRendererFns 454981ad6265SDimitry Andric AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const { 455081ad6265SDimitry Andric Register Addr = Root.getReg(); 455181ad6265SDimitry Andric Register PtrBase; 455281ad6265SDimitry Andric int64_t ConstOffset; 455381ad6265SDimitry Andric int64_t ImmOffset = 0; 455481ad6265SDimitry Andric 455581ad6265SDimitry Andric // Match the immediate offset first, which canonically is moved as low as 455681ad6265SDimitry Andric // possible. 455781ad6265SDimitry Andric std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI); 455881ad6265SDimitry Andric 45595f757f3fSDimitry Andric Register OrigAddr = Addr; 456081ad6265SDimitry Andric if (ConstOffset != 0 && 456181ad6265SDimitry Andric TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) { 456281ad6265SDimitry Andric Addr = PtrBase; 456381ad6265SDimitry Andric ImmOffset = ConstOffset; 456481ad6265SDimitry Andric } 456581ad6265SDimitry Andric 456681ad6265SDimitry Andric auto AddrDef = getDefSrcRegIgnoringCopies(Addr, *MRI); 456781ad6265SDimitry Andric if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD) 4568bdd1243dSDimitry Andric return std::nullopt; 456981ad6265SDimitry Andric 457081ad6265SDimitry Andric Register RHS = AddrDef->MI->getOperand(2).getReg(); 457181ad6265SDimitry Andric if (RBI.getRegBank(RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) 4572bdd1243dSDimitry Andric return std::nullopt; 457381ad6265SDimitry Andric 457481ad6265SDimitry Andric Register LHS = AddrDef->MI->getOperand(1).getReg(); 457581ad6265SDimitry Andric auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI); 457681ad6265SDimitry Andric 45775f757f3fSDimitry Andric if (OrigAddr != Addr) { 45785f757f3fSDimitry Andric if (!isFlatScratchBaseLegalSVImm(OrigAddr)) 457906c3fb27SDimitry Andric return std::nullopt; 45805f757f3fSDimitry Andric } else { 45815f757f3fSDimitry Andric if (!isFlatScratchBaseLegalSV(OrigAddr)) 45825f757f3fSDimitry Andric return std::nullopt; 45835f757f3fSDimitry Andric } 458406c3fb27SDimitry Andric 458581ad6265SDimitry Andric if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset)) 4586bdd1243dSDimitry Andric return std::nullopt; 458781ad6265SDimitry Andric 458881ad6265SDimitry Andric if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) { 458981ad6265SDimitry Andric int FI = LHSDef->MI->getOperand(1).getIndex(); 459081ad6265SDimitry Andric return {{ 459181ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr 459281ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addFrameIndex(FI); }, // saddr 459381ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset 459481ad6265SDimitry Andric }}; 459581ad6265SDimitry Andric } 459681ad6265SDimitry Andric 459781ad6265SDimitry Andric if (!isSGPR(LHS)) 4598bdd1243dSDimitry Andric return std::nullopt; 459981ad6265SDimitry Andric 460081ad6265SDimitry Andric return {{ 460181ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(RHS); }, // vaddr 460281ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(LHS); }, // saddr 460381ad6265SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(ImmOffset); } // offset 460481ad6265SDimitry Andric }}; 460581ad6265SDimitry Andric } 460681ad6265SDimitry Andric 46070b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns 46080b57cec5SDimitry Andric AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { 46090b57cec5SDimitry Andric MachineInstr *MI = Root.getParent(); 46100b57cec5SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 46110b57cec5SDimitry Andric MachineFunction *MF = MBB->getParent(); 46120b57cec5SDimitry Andric const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 46130b57cec5SDimitry Andric 46140b57cec5SDimitry Andric int64_t Offset = 0; 46155ffd83dbSDimitry Andric if (mi_match(Root.getReg(), *MRI, m_ICst(Offset)) && 46165ffd83dbSDimitry Andric Offset != TM.getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS)) { 46178bcb0991SDimitry Andric Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); 46180b57cec5SDimitry Andric 46190b57cec5SDimitry Andric // TODO: Should this be inside the render function? The iterator seems to 46200b57cec5SDimitry Andric // move. 46215f757f3fSDimitry Andric const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset(*Subtarget); 46220b57cec5SDimitry Andric BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), 46230b57cec5SDimitry Andric HighBits) 462406c3fb27SDimitry Andric .addImm(Offset & ~MaxOffset); 46250b57cec5SDimitry Andric 46260b57cec5SDimitry Andric return {{[=](MachineInstrBuilder &MIB) { // rsrc 46270b57cec5SDimitry Andric MIB.addReg(Info->getScratchRSrcReg()); 46280b57cec5SDimitry Andric }, 46290b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { // vaddr 46300b57cec5SDimitry Andric MIB.addReg(HighBits); 46310b57cec5SDimitry Andric }, 46320b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { // soffset 4633e8d8bef9SDimitry Andric // Use constant zero for soffset and rely on eliminateFrameIndex 4634e8d8bef9SDimitry Andric // to choose the appropriate frame register if need be. 46355ffd83dbSDimitry Andric MIB.addImm(0); 46360b57cec5SDimitry Andric }, 46370b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { // offset 463806c3fb27SDimitry Andric MIB.addImm(Offset & MaxOffset); 46390b57cec5SDimitry Andric }}}; 46400b57cec5SDimitry Andric } 46410b57cec5SDimitry Andric 46425ffd83dbSDimitry Andric assert(Offset == 0 || Offset == -1); 46430b57cec5SDimitry Andric 46440b57cec5SDimitry Andric // Try to fold a frame index directly into the MUBUF vaddr field, and any 46450b57cec5SDimitry Andric // offsets. 4646bdd1243dSDimitry Andric std::optional<int> FI; 46470b57cec5SDimitry Andric Register VAddr = Root.getReg(); 46488bcb0991SDimitry Andric if (const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg())) { 4649fe6060f1SDimitry Andric Register PtrBase; 4650fe6060f1SDimitry Andric int64_t ConstOffset; 4651fe6060f1SDimitry Andric std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI); 4652fe6060f1SDimitry Andric if (ConstOffset != 0) { 46535f757f3fSDimitry Andric if (TII.isLegalMUBUFImmOffset(ConstOffset) && 46540b57cec5SDimitry Andric (!STI.privateMemoryResourceIsRangeChecked() || 465506c3fb27SDimitry Andric KB->signBitIsZero(PtrBase))) { 4656fe6060f1SDimitry Andric const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase); 4657fe6060f1SDimitry Andric if (PtrBaseDef->getOpcode() == AMDGPU::G_FRAME_INDEX) 4658fe6060f1SDimitry Andric FI = PtrBaseDef->getOperand(1).getIndex(); 46590b57cec5SDimitry Andric else 4660fe6060f1SDimitry Andric VAddr = PtrBase; 4661fe6060f1SDimitry Andric Offset = ConstOffset; 46620b57cec5SDimitry Andric } 46630b57cec5SDimitry Andric } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) { 46640b57cec5SDimitry Andric FI = RootDef->getOperand(1).getIndex(); 46650b57cec5SDimitry Andric } 46660b57cec5SDimitry Andric } 46670b57cec5SDimitry Andric 46680b57cec5SDimitry Andric return {{[=](MachineInstrBuilder &MIB) { // rsrc 46690b57cec5SDimitry Andric MIB.addReg(Info->getScratchRSrcReg()); 46700b57cec5SDimitry Andric }, 46710b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { // vaddr 467281ad6265SDimitry Andric if (FI) 4673bdd1243dSDimitry Andric MIB.addFrameIndex(*FI); 46740b57cec5SDimitry Andric else 46750b57cec5SDimitry Andric MIB.addReg(VAddr); 46760b57cec5SDimitry Andric }, 46770b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { // soffset 4678e8d8bef9SDimitry Andric // Use constant zero for soffset and rely on eliminateFrameIndex 4679e8d8bef9SDimitry Andric // to choose the appropriate frame register if need be. 46805ffd83dbSDimitry Andric MIB.addImm(0); 46810b57cec5SDimitry Andric }, 46820b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { // offset 46830b57cec5SDimitry Andric MIB.addImm(Offset); 46840b57cec5SDimitry Andric }}}; 46850b57cec5SDimitry Andric } 46860b57cec5SDimitry Andric 46875ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::isDSOffsetLegal(Register Base, 4688e8d8bef9SDimitry Andric int64_t Offset) const { 4689e8d8bef9SDimitry Andric if (!isUInt<16>(Offset)) 4690e8d8bef9SDimitry Andric return false; 4691e8d8bef9SDimitry Andric 4692e8d8bef9SDimitry Andric if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled()) 4693e8d8bef9SDimitry Andric return true; 4694e8d8bef9SDimitry Andric 4695e8d8bef9SDimitry Andric // On Southern Islands instruction with a negative base value and an offset 4696e8d8bef9SDimitry Andric // don't seem to work. 469706c3fb27SDimitry Andric return KB->signBitIsZero(Base); 4698e8d8bef9SDimitry Andric } 4699e8d8bef9SDimitry Andric 4700e8d8bef9SDimitry Andric bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0, 4701e8d8bef9SDimitry Andric int64_t Offset1, 4702e8d8bef9SDimitry Andric unsigned Size) const { 4703e8d8bef9SDimitry Andric if (Offset0 % Size != 0 || Offset1 % Size != 0) 4704e8d8bef9SDimitry Andric return false; 4705e8d8bef9SDimitry Andric if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size)) 47068bcb0991SDimitry Andric return false; 47078bcb0991SDimitry Andric 47088bcb0991SDimitry Andric if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled()) 47098bcb0991SDimitry Andric return true; 47108bcb0991SDimitry Andric 47118bcb0991SDimitry Andric // On Southern Islands instruction with a negative base value and an offset 47128bcb0991SDimitry Andric // don't seem to work. 471306c3fb27SDimitry Andric return KB->signBitIsZero(Base); 471406c3fb27SDimitry Andric } 471506c3fb27SDimitry Andric 47165f757f3fSDimitry Andric // Return whether the operation has NoUnsignedWrap property. 47175f757f3fSDimitry Andric static bool isNoUnsignedWrap(MachineInstr *Addr) { 47185f757f3fSDimitry Andric return Addr->getOpcode() == TargetOpcode::G_OR || 47195f757f3fSDimitry Andric (Addr->getOpcode() == TargetOpcode::G_PTR_ADD && 47205f757f3fSDimitry Andric Addr->getFlag(MachineInstr::NoUWrap)); 47215f757f3fSDimitry Andric } 47225f757f3fSDimitry Andric 47235f757f3fSDimitry Andric // Check that the base address of flat scratch load/store in the form of `base + 47245f757f3fSDimitry Andric // offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware 47255f757f3fSDimitry Andric // requirement). We always treat the first operand as the base address here. 47265f757f3fSDimitry Andric bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const { 47275f757f3fSDimitry Andric MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI); 47285f757f3fSDimitry Andric 47295f757f3fSDimitry Andric if (isNoUnsignedWrap(AddrMI)) 473006c3fb27SDimitry Andric return true; 473106c3fb27SDimitry Andric 47325f757f3fSDimitry Andric // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative 47335f757f3fSDimitry Andric // values. 47347a6dacacSDimitry Andric if (STI.hasSignedScratchOffsets()) 47355f757f3fSDimitry Andric return true; 47365f757f3fSDimitry Andric 47375f757f3fSDimitry Andric Register LHS = AddrMI->getOperand(1).getReg(); 47385f757f3fSDimitry Andric Register RHS = AddrMI->getOperand(2).getReg(); 47395f757f3fSDimitry Andric 47405f757f3fSDimitry Andric if (AddrMI->getOpcode() == TargetOpcode::G_PTR_ADD) { 47415f757f3fSDimitry Andric std::optional<ValueAndVReg> RhsValReg = 47425f757f3fSDimitry Andric getIConstantVRegValWithLookThrough(RHS, *MRI); 47435f757f3fSDimitry Andric // If the immediate offset is negative and within certain range, the base 47445f757f3fSDimitry Andric // address cannot also be negative. If the base is also negative, the sum 47455f757f3fSDimitry Andric // would be either negative or much larger than the valid range of scratch 47465f757f3fSDimitry Andric // memory a thread can access. 47475f757f3fSDimitry Andric if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 && 47485f757f3fSDimitry Andric RhsValReg->Value.getSExtValue() > -0x40000000) 47495f757f3fSDimitry Andric return true; 47505f757f3fSDimitry Andric } 47515f757f3fSDimitry Andric 47525f757f3fSDimitry Andric return KB->signBitIsZero(LHS); 47535f757f3fSDimitry Andric } 47545f757f3fSDimitry Andric 47555f757f3fSDimitry Andric // Check address value in SGPR/VGPR are legal for flat scratch in the form 47565f757f3fSDimitry Andric // of: SGPR + VGPR. 47575f757f3fSDimitry Andric bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(Register Addr) const { 47585f757f3fSDimitry Andric MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI); 47595f757f3fSDimitry Andric 47605f757f3fSDimitry Andric if (isNoUnsignedWrap(AddrMI)) 47615f757f3fSDimitry Andric return true; 47625f757f3fSDimitry Andric 47637a6dacacSDimitry Andric // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative 47647a6dacacSDimitry Andric // values. 47657a6dacacSDimitry Andric if (STI.hasSignedScratchOffsets()) 47667a6dacacSDimitry Andric return true; 47677a6dacacSDimitry Andric 47685f757f3fSDimitry Andric Register LHS = AddrMI->getOperand(1).getReg(); 47695f757f3fSDimitry Andric Register RHS = AddrMI->getOperand(2).getReg(); 47705f757f3fSDimitry Andric return KB->signBitIsZero(RHS) && KB->signBitIsZero(LHS); 47715f757f3fSDimitry Andric } 47725f757f3fSDimitry Andric 47735f757f3fSDimitry Andric // Check address value in SGPR/VGPR are legal for flat scratch in the form 47745f757f3fSDimitry Andric // of: SGPR + VGPR + Imm. 47755f757f3fSDimitry Andric bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm( 47765f757f3fSDimitry Andric Register Addr) const { 47777a6dacacSDimitry Andric // Starting with GFX12, VADDR and SADDR fields in VSCRATCH can use negative 47787a6dacacSDimitry Andric // values. 47797a6dacacSDimitry Andric if (STI.hasSignedScratchOffsets()) 47807a6dacacSDimitry Andric return true; 47817a6dacacSDimitry Andric 47825f757f3fSDimitry Andric MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI); 47835f757f3fSDimitry Andric Register Base = AddrMI->getOperand(1).getReg(); 47845f757f3fSDimitry Andric std::optional<DefinitionAndSourceRegister> BaseDef = 47855f757f3fSDimitry Andric getDefSrcRegIgnoringCopies(Base, *MRI); 47865f757f3fSDimitry Andric std::optional<ValueAndVReg> RHSOffset = 47875f757f3fSDimitry Andric getIConstantVRegValWithLookThrough(AddrMI->getOperand(2).getReg(), *MRI); 47885f757f3fSDimitry Andric assert(RHSOffset); 47895f757f3fSDimitry Andric 47905f757f3fSDimitry Andric // If the immediate offset is negative and within certain range, the base 47915f757f3fSDimitry Andric // address cannot also be negative. If the base is also negative, the sum 47925f757f3fSDimitry Andric // would be either negative or much larger than the valid range of scratch 47935f757f3fSDimitry Andric // memory a thread can access. 47945f757f3fSDimitry Andric if (isNoUnsignedWrap(BaseDef->MI) && 47955f757f3fSDimitry Andric (isNoUnsignedWrap(AddrMI) || 47965f757f3fSDimitry Andric (RHSOffset->Value.getSExtValue() < 0 && 47975f757f3fSDimitry Andric RHSOffset->Value.getSExtValue() > -0x40000000))) 47985f757f3fSDimitry Andric return true; 47995f757f3fSDimitry Andric 48005f757f3fSDimitry Andric Register LHS = BaseDef->MI->getOperand(1).getReg(); 48015f757f3fSDimitry Andric Register RHS = BaseDef->MI->getOperand(2).getReg(); 48025f757f3fSDimitry Andric return KB->signBitIsZero(RHS) && KB->signBitIsZero(LHS); 48038bcb0991SDimitry Andric } 48048bcb0991SDimitry Andric 48054824e7fdSDimitry Andric bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI, 48064824e7fdSDimitry Andric unsigned ShAmtBits) const { 48074824e7fdSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_AND); 48084824e7fdSDimitry Andric 4809bdd1243dSDimitry Andric std::optional<APInt> RHS = 4810bdd1243dSDimitry Andric getIConstantVRegVal(MI.getOperand(2).getReg(), *MRI); 48114824e7fdSDimitry Andric if (!RHS) 48124824e7fdSDimitry Andric return false; 48134824e7fdSDimitry Andric 481406c3fb27SDimitry Andric if (RHS->countr_one() >= ShAmtBits) 48154824e7fdSDimitry Andric return true; 48164824e7fdSDimitry Andric 481706c3fb27SDimitry Andric const APInt &LHSKnownZeros = KB->getKnownZeroes(MI.getOperand(1).getReg()); 481806c3fb27SDimitry Andric return (LHSKnownZeros | *RHS).countr_one() >= ShAmtBits; 48194824e7fdSDimitry Andric } 48204824e7fdSDimitry Andric 48210b57cec5SDimitry Andric InstructionSelector::ComplexRendererFns 48220b57cec5SDimitry Andric AMDGPUInstructionSelector::selectMUBUFScratchOffset( 48230b57cec5SDimitry Andric MachineOperand &Root) const { 482404eeddc0SDimitry Andric Register Reg = Root.getReg(); 482504eeddc0SDimitry Andric const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>(); 482604eeddc0SDimitry Andric 48275f757f3fSDimitry Andric std::optional<DefinitionAndSourceRegister> Def = 48285f757f3fSDimitry Andric getDefSrcRegIgnoringCopies(Reg, *MRI); 48295f757f3fSDimitry Andric assert(Def && "this shouldn't be an optional result"); 48305f757f3fSDimitry Andric Reg = Def->Reg; 48315f757f3fSDimitry Andric 48325f757f3fSDimitry Andric if (Register WaveBase = getWaveAddress(Def->MI)) { 483304eeddc0SDimitry Andric return {{ 483404eeddc0SDimitry Andric [=](MachineInstrBuilder &MIB) { // rsrc 483504eeddc0SDimitry Andric MIB.addReg(Info->getScratchRSrcReg()); 483604eeddc0SDimitry Andric }, 483704eeddc0SDimitry Andric [=](MachineInstrBuilder &MIB) { // soffset 483804eeddc0SDimitry Andric MIB.addReg(WaveBase); 483904eeddc0SDimitry Andric }, 484004eeddc0SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // offset 484104eeddc0SDimitry Andric }}; 484204eeddc0SDimitry Andric } 48430b57cec5SDimitry Andric 48440b57cec5SDimitry Andric int64_t Offset = 0; 484504eeddc0SDimitry Andric 484604eeddc0SDimitry Andric // FIXME: Copy check is a hack 484704eeddc0SDimitry Andric Register BasePtr; 48485f757f3fSDimitry Andric if (mi_match(Reg, *MRI, 48495f757f3fSDimitry Andric m_GPtrAdd(m_Reg(BasePtr), 48505f757f3fSDimitry Andric m_any_of(m_ICst(Offset), m_Copy(m_ICst(Offset)))))) { 48515f757f3fSDimitry Andric if (!TII.isLegalMUBUFImmOffset(Offset)) 485204eeddc0SDimitry Andric return {}; 48535f757f3fSDimitry Andric MachineInstr *BasePtrDef = getDefIgnoringCopies(BasePtr, *MRI); 485404eeddc0SDimitry Andric Register WaveBase = getWaveAddress(BasePtrDef); 485504eeddc0SDimitry Andric if (!WaveBase) 485604eeddc0SDimitry Andric return {}; 485704eeddc0SDimitry Andric 485804eeddc0SDimitry Andric return {{ 485904eeddc0SDimitry Andric [=](MachineInstrBuilder &MIB) { // rsrc 486004eeddc0SDimitry Andric MIB.addReg(Info->getScratchRSrcReg()); 486104eeddc0SDimitry Andric }, 486204eeddc0SDimitry Andric [=](MachineInstrBuilder &MIB) { // soffset 486304eeddc0SDimitry Andric MIB.addReg(WaveBase); 486404eeddc0SDimitry Andric }, 486504eeddc0SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset 486604eeddc0SDimitry Andric }}; 486704eeddc0SDimitry Andric } 486804eeddc0SDimitry Andric 48698bcb0991SDimitry Andric if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) || 48705f757f3fSDimitry Andric !TII.isLegalMUBUFImmOffset(Offset)) 48710b57cec5SDimitry Andric return {}; 48720b57cec5SDimitry Andric 48730b57cec5SDimitry Andric return {{ 48745ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { // rsrc 48750b57cec5SDimitry Andric MIB.addReg(Info->getScratchRSrcReg()); 48765ffd83dbSDimitry Andric }, 48775ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { // soffset 48785ffd83dbSDimitry Andric MIB.addImm(0); 48795ffd83dbSDimitry Andric }, 48800b57cec5SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset 48810b57cec5SDimitry Andric }}; 48820b57cec5SDimitry Andric } 48838bcb0991SDimitry Andric 48845ffd83dbSDimitry Andric std::pair<Register, unsigned> 48855ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const { 48868bcb0991SDimitry Andric const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg()); 48875ffd83dbSDimitry Andric if (!RootDef) 4888bdd1243dSDimitry Andric return std::pair(Root.getReg(), 0); 48898bcb0991SDimitry Andric 48908bcb0991SDimitry Andric int64_t ConstAddr = 0; 48915ffd83dbSDimitry Andric 48925ffd83dbSDimitry Andric Register PtrBase; 48935ffd83dbSDimitry Andric int64_t Offset; 48945ffd83dbSDimitry Andric std::tie(PtrBase, Offset) = 48955ffd83dbSDimitry Andric getPtrBaseWithConstantOffset(Root.getReg(), *MRI); 48965ffd83dbSDimitry Andric 48975ffd83dbSDimitry Andric if (Offset) { 4898e8d8bef9SDimitry Andric if (isDSOffsetLegal(PtrBase, Offset)) { 48998bcb0991SDimitry Andric // (add n0, c0) 4900bdd1243dSDimitry Andric return std::pair(PtrBase, Offset); 49018bcb0991SDimitry Andric } 49028bcb0991SDimitry Andric } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { 49035ffd83dbSDimitry Andric // TODO 49048bcb0991SDimitry Andric 49058bcb0991SDimitry Andric 49068bcb0991SDimitry Andric } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) { 49075ffd83dbSDimitry Andric // TODO 49088bcb0991SDimitry Andric 49098bcb0991SDimitry Andric } 49108bcb0991SDimitry Andric 4911bdd1243dSDimitry Andric return std::pair(Root.getReg(), 0); 49125ffd83dbSDimitry Andric } 49135ffd83dbSDimitry Andric 49145ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns 49155ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { 49165ffd83dbSDimitry Andric Register Reg; 49175ffd83dbSDimitry Andric unsigned Offset; 49185ffd83dbSDimitry Andric std::tie(Reg, Offset) = selectDS1Addr1OffsetImpl(Root); 49198bcb0991SDimitry Andric return {{ 49205ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); }, 49215ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } 49228bcb0991SDimitry Andric }}; 49238bcb0991SDimitry Andric } 49248bcb0991SDimitry Andric 49255ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns 49265ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(MachineOperand &Root) const { 4927e8d8bef9SDimitry Andric return selectDSReadWrite2(Root, 4); 4928e8d8bef9SDimitry Andric } 4929e8d8bef9SDimitry Andric 4930e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns 4931e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(MachineOperand &Root) const { 4932e8d8bef9SDimitry Andric return selectDSReadWrite2(Root, 8); 4933e8d8bef9SDimitry Andric } 4934e8d8bef9SDimitry Andric 4935e8d8bef9SDimitry Andric InstructionSelector::ComplexRendererFns 4936e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectDSReadWrite2(MachineOperand &Root, 4937e8d8bef9SDimitry Andric unsigned Size) const { 49385ffd83dbSDimitry Andric Register Reg; 49395ffd83dbSDimitry Andric unsigned Offset; 4940e8d8bef9SDimitry Andric std::tie(Reg, Offset) = selectDSReadWrite2Impl(Root, Size); 49415ffd83dbSDimitry Andric return {{ 49425ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); }, 49435ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, 49445ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset+1); } 49455ffd83dbSDimitry Andric }}; 49465ffd83dbSDimitry Andric } 49475ffd83dbSDimitry Andric 49485ffd83dbSDimitry Andric std::pair<Register, unsigned> 4949e8d8bef9SDimitry Andric AMDGPUInstructionSelector::selectDSReadWrite2Impl(MachineOperand &Root, 4950e8d8bef9SDimitry Andric unsigned Size) const { 49515ffd83dbSDimitry Andric const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg()); 49525ffd83dbSDimitry Andric if (!RootDef) 4953bdd1243dSDimitry Andric return std::pair(Root.getReg(), 0); 49545ffd83dbSDimitry Andric 49555ffd83dbSDimitry Andric int64_t ConstAddr = 0; 49565ffd83dbSDimitry Andric 49575ffd83dbSDimitry Andric Register PtrBase; 49585ffd83dbSDimitry Andric int64_t Offset; 49595ffd83dbSDimitry Andric std::tie(PtrBase, Offset) = 49605ffd83dbSDimitry Andric getPtrBaseWithConstantOffset(Root.getReg(), *MRI); 49615ffd83dbSDimitry Andric 49625ffd83dbSDimitry Andric if (Offset) { 4963e8d8bef9SDimitry Andric int64_t OffsetValue0 = Offset; 4964e8d8bef9SDimitry Andric int64_t OffsetValue1 = Offset + Size; 4965e8d8bef9SDimitry Andric if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1, Size)) { 49665ffd83dbSDimitry Andric // (add n0, c0) 4967bdd1243dSDimitry Andric return std::pair(PtrBase, OffsetValue0 / Size); 49685ffd83dbSDimitry Andric } 49695ffd83dbSDimitry Andric } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { 49705ffd83dbSDimitry Andric // TODO 49715ffd83dbSDimitry Andric 49725ffd83dbSDimitry Andric } else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) { 49735ffd83dbSDimitry Andric // TODO 49745ffd83dbSDimitry Andric 49755ffd83dbSDimitry Andric } 49765ffd83dbSDimitry Andric 4977bdd1243dSDimitry Andric return std::pair(Root.getReg(), 0); 49785ffd83dbSDimitry Andric } 49795ffd83dbSDimitry Andric 49805ffd83dbSDimitry Andric /// If \p Root is a G_PTR_ADD with a G_CONSTANT on the right hand side, return 49815ffd83dbSDimitry Andric /// the base value with the constant offset. There may be intervening copies 49825ffd83dbSDimitry Andric /// between \p Root and the identified constant. Returns \p Root, 0 if this does 49835ffd83dbSDimitry Andric /// not match the pattern. 49845ffd83dbSDimitry Andric std::pair<Register, int64_t> 49855ffd83dbSDimitry Andric AMDGPUInstructionSelector::getPtrBaseWithConstantOffset( 49865ffd83dbSDimitry Andric Register Root, const MachineRegisterInfo &MRI) const { 4987e8d8bef9SDimitry Andric MachineInstr *RootI = getDefIgnoringCopies(Root, MRI); 49885ffd83dbSDimitry Andric if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD) 49895ffd83dbSDimitry Andric return {Root, 0}; 49905ffd83dbSDimitry Andric 49915ffd83dbSDimitry Andric MachineOperand &RHS = RootI->getOperand(2); 4992bdd1243dSDimitry Andric std::optional<ValueAndVReg> MaybeOffset = 4993349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(RHS.getReg(), MRI); 49945ffd83dbSDimitry Andric if (!MaybeOffset) 49955ffd83dbSDimitry Andric return {Root, 0}; 4996e8d8bef9SDimitry Andric return {RootI->getOperand(1).getReg(), MaybeOffset->Value.getSExtValue()}; 49975ffd83dbSDimitry Andric } 49985ffd83dbSDimitry Andric 49995ffd83dbSDimitry Andric static void addZeroImm(MachineInstrBuilder &MIB) { 50005ffd83dbSDimitry Andric MIB.addImm(0); 50015ffd83dbSDimitry Andric } 50025ffd83dbSDimitry Andric 50035ffd83dbSDimitry Andric /// Return a resource descriptor for use with an arbitrary 64-bit pointer. If \p 50045ffd83dbSDimitry Andric /// BasePtr is not valid, a null base pointer will be used. 50055ffd83dbSDimitry Andric static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, 50065ffd83dbSDimitry Andric uint32_t FormatLo, uint32_t FormatHi, 50075ffd83dbSDimitry Andric Register BasePtr) { 50085ffd83dbSDimitry Andric Register RSrc2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 50095ffd83dbSDimitry Andric Register RSrc3 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); 50105ffd83dbSDimitry Andric Register RSrcHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 50115ffd83dbSDimitry Andric Register RSrc = MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass); 50125ffd83dbSDimitry Andric 50135ffd83dbSDimitry Andric B.buildInstr(AMDGPU::S_MOV_B32) 50145ffd83dbSDimitry Andric .addDef(RSrc2) 50155ffd83dbSDimitry Andric .addImm(FormatLo); 50165ffd83dbSDimitry Andric B.buildInstr(AMDGPU::S_MOV_B32) 50175ffd83dbSDimitry Andric .addDef(RSrc3) 50185ffd83dbSDimitry Andric .addImm(FormatHi); 50195ffd83dbSDimitry Andric 50205ffd83dbSDimitry Andric // Build the half of the subregister with the constants before building the 50215ffd83dbSDimitry Andric // full 128-bit register. If we are building multiple resource descriptors, 50225ffd83dbSDimitry Andric // this will allow CSEing of the 2-component register. 50235ffd83dbSDimitry Andric B.buildInstr(AMDGPU::REG_SEQUENCE) 50245ffd83dbSDimitry Andric .addDef(RSrcHi) 50255ffd83dbSDimitry Andric .addReg(RSrc2) 50265ffd83dbSDimitry Andric .addImm(AMDGPU::sub0) 50275ffd83dbSDimitry Andric .addReg(RSrc3) 50285ffd83dbSDimitry Andric .addImm(AMDGPU::sub1); 50295ffd83dbSDimitry Andric 50305ffd83dbSDimitry Andric Register RSrcLo = BasePtr; 50315ffd83dbSDimitry Andric if (!BasePtr) { 50325ffd83dbSDimitry Andric RSrcLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 50335ffd83dbSDimitry Andric B.buildInstr(AMDGPU::S_MOV_B64) 50345ffd83dbSDimitry Andric .addDef(RSrcLo) 50355ffd83dbSDimitry Andric .addImm(0); 50365ffd83dbSDimitry Andric } 50375ffd83dbSDimitry Andric 50385ffd83dbSDimitry Andric B.buildInstr(AMDGPU::REG_SEQUENCE) 50395ffd83dbSDimitry Andric .addDef(RSrc) 50405ffd83dbSDimitry Andric .addReg(RSrcLo) 50415ffd83dbSDimitry Andric .addImm(AMDGPU::sub0_sub1) 50425ffd83dbSDimitry Andric .addReg(RSrcHi) 50435ffd83dbSDimitry Andric .addImm(AMDGPU::sub2_sub3); 50445ffd83dbSDimitry Andric 50455ffd83dbSDimitry Andric return RSrc; 50465ffd83dbSDimitry Andric } 50475ffd83dbSDimitry Andric 50485ffd83dbSDimitry Andric static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, 50495ffd83dbSDimitry Andric const SIInstrInfo &TII, Register BasePtr) { 50505ffd83dbSDimitry Andric uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat(); 50515ffd83dbSDimitry Andric 50525ffd83dbSDimitry Andric // FIXME: Why are half the "default" bits ignored based on the addressing 50535ffd83dbSDimitry Andric // mode? 50545ffd83dbSDimitry Andric return buildRSRC(B, MRI, 0, Hi_32(DefaultFormat), BasePtr); 50555ffd83dbSDimitry Andric } 50565ffd83dbSDimitry Andric 50575ffd83dbSDimitry Andric static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, 50585ffd83dbSDimitry Andric const SIInstrInfo &TII, Register BasePtr) { 50595ffd83dbSDimitry Andric uint64_t DefaultFormat = TII.getDefaultRsrcDataFormat(); 50605ffd83dbSDimitry Andric 50615ffd83dbSDimitry Andric // FIXME: Why are half the "default" bits ignored based on the addressing 50625ffd83dbSDimitry Andric // mode? 50635ffd83dbSDimitry Andric return buildRSRC(B, MRI, -1, Hi_32(DefaultFormat), BasePtr); 50645ffd83dbSDimitry Andric } 50655ffd83dbSDimitry Andric 50665ffd83dbSDimitry Andric AMDGPUInstructionSelector::MUBUFAddressData 50675ffd83dbSDimitry Andric AMDGPUInstructionSelector::parseMUBUFAddress(Register Src) const { 50685ffd83dbSDimitry Andric MUBUFAddressData Data; 50695ffd83dbSDimitry Andric Data.N0 = Src; 50705ffd83dbSDimitry Andric 50715ffd83dbSDimitry Andric Register PtrBase; 50725ffd83dbSDimitry Andric int64_t Offset; 50735ffd83dbSDimitry Andric 50745ffd83dbSDimitry Andric std::tie(PtrBase, Offset) = getPtrBaseWithConstantOffset(Src, *MRI); 50755ffd83dbSDimitry Andric if (isUInt<32>(Offset)) { 50765ffd83dbSDimitry Andric Data.N0 = PtrBase; 50775ffd83dbSDimitry Andric Data.Offset = Offset; 50785ffd83dbSDimitry Andric } 50795ffd83dbSDimitry Andric 50805ffd83dbSDimitry Andric if (MachineInstr *InputAdd 50815ffd83dbSDimitry Andric = getOpcodeDef(TargetOpcode::G_PTR_ADD, Data.N0, *MRI)) { 50825ffd83dbSDimitry Andric Data.N2 = InputAdd->getOperand(1).getReg(); 50835ffd83dbSDimitry Andric Data.N3 = InputAdd->getOperand(2).getReg(); 50845ffd83dbSDimitry Andric 50855ffd83dbSDimitry Andric // FIXME: Need to fix extra SGPR->VGPRcopies inserted 50865ffd83dbSDimitry Andric // FIXME: Don't know this was defined by operand 0 50875ffd83dbSDimitry Andric // 50885ffd83dbSDimitry Andric // TODO: Remove this when we have copy folding optimizations after 50895ffd83dbSDimitry Andric // RegBankSelect. 50905ffd83dbSDimitry Andric Data.N2 = getDefIgnoringCopies(Data.N2, *MRI)->getOperand(0).getReg(); 50915ffd83dbSDimitry Andric Data.N3 = getDefIgnoringCopies(Data.N3, *MRI)->getOperand(0).getReg(); 50925ffd83dbSDimitry Andric } 50935ffd83dbSDimitry Andric 50945ffd83dbSDimitry Andric return Data; 50955ffd83dbSDimitry Andric } 50965ffd83dbSDimitry Andric 50975ffd83dbSDimitry Andric /// Return if the addr64 mubuf mode should be used for the given address. 50985ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr) const { 50995ffd83dbSDimitry Andric // (ptr_add N2, N3) -> addr64, or 51005ffd83dbSDimitry Andric // (ptr_add (ptr_add N2, N3), C1) -> addr64 51015ffd83dbSDimitry Andric if (Addr.N2) 51025ffd83dbSDimitry Andric return true; 51035ffd83dbSDimitry Andric 51045ffd83dbSDimitry Andric const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI); 51055ffd83dbSDimitry Andric return N0Bank->getID() == AMDGPU::VGPRRegBankID; 51065ffd83dbSDimitry Andric } 51075ffd83dbSDimitry Andric 51085ffd83dbSDimitry Andric /// Split an immediate offset \p ImmOffset depending on whether it fits in the 51095ffd83dbSDimitry Andric /// immediate field. Modifies \p ImmOffset and sets \p SOffset to the variable 51105ffd83dbSDimitry Andric /// component. 51115ffd83dbSDimitry Andric void AMDGPUInstructionSelector::splitIllegalMUBUFOffset( 51125ffd83dbSDimitry Andric MachineIRBuilder &B, Register &SOffset, int64_t &ImmOffset) const { 51135f757f3fSDimitry Andric if (TII.isLegalMUBUFImmOffset(ImmOffset)) 51145ffd83dbSDimitry Andric return; 51155ffd83dbSDimitry Andric 51165ffd83dbSDimitry Andric // Illegal offset, store it in soffset. 51175ffd83dbSDimitry Andric SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 51185ffd83dbSDimitry Andric B.buildInstr(AMDGPU::S_MOV_B32) 51195ffd83dbSDimitry Andric .addDef(SOffset) 51205ffd83dbSDimitry Andric .addImm(ImmOffset); 51215ffd83dbSDimitry Andric ImmOffset = 0; 51225ffd83dbSDimitry Andric } 51235ffd83dbSDimitry Andric 51245ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl( 51255ffd83dbSDimitry Andric MachineOperand &Root, Register &VAddr, Register &RSrcReg, 51265ffd83dbSDimitry Andric Register &SOffset, int64_t &Offset) const { 51275ffd83dbSDimitry Andric // FIXME: Predicates should stop this from reaching here. 51285ffd83dbSDimitry Andric // addr64 bit was removed for volcanic islands. 51295ffd83dbSDimitry Andric if (!STI.hasAddr64() || STI.useFlatForGlobal()) 51305ffd83dbSDimitry Andric return false; 51315ffd83dbSDimitry Andric 51325ffd83dbSDimitry Andric MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg()); 51335ffd83dbSDimitry Andric if (!shouldUseAddr64(AddrData)) 51345ffd83dbSDimitry Andric return false; 51355ffd83dbSDimitry Andric 51365ffd83dbSDimitry Andric Register N0 = AddrData.N0; 51375ffd83dbSDimitry Andric Register N2 = AddrData.N2; 51385ffd83dbSDimitry Andric Register N3 = AddrData.N3; 51395ffd83dbSDimitry Andric Offset = AddrData.Offset; 51405ffd83dbSDimitry Andric 51415ffd83dbSDimitry Andric // Base pointer for the SRD. 51425ffd83dbSDimitry Andric Register SRDPtr; 51435ffd83dbSDimitry Andric 51445ffd83dbSDimitry Andric if (N2) { 51455ffd83dbSDimitry Andric if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) { 51465ffd83dbSDimitry Andric assert(N3); 51475ffd83dbSDimitry Andric if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) { 51485ffd83dbSDimitry Andric // Both N2 and N3 are divergent. Use N0 (the result of the add) as the 51495ffd83dbSDimitry Andric // addr64, and construct the default resource from a 0 address. 51505ffd83dbSDimitry Andric VAddr = N0; 51515ffd83dbSDimitry Andric } else { 51525ffd83dbSDimitry Andric SRDPtr = N3; 51535ffd83dbSDimitry Andric VAddr = N2; 51545ffd83dbSDimitry Andric } 51555ffd83dbSDimitry Andric } else { 51565ffd83dbSDimitry Andric // N2 is not divergent. 51575ffd83dbSDimitry Andric SRDPtr = N2; 51585ffd83dbSDimitry Andric VAddr = N3; 51595ffd83dbSDimitry Andric } 51605ffd83dbSDimitry Andric } else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) { 51615ffd83dbSDimitry Andric // Use the default null pointer in the resource 51625ffd83dbSDimitry Andric VAddr = N0; 51635ffd83dbSDimitry Andric } else { 51645ffd83dbSDimitry Andric // N0 -> offset, or 51655ffd83dbSDimitry Andric // (N0 + C1) -> offset 51665ffd83dbSDimitry Andric SRDPtr = N0; 51675ffd83dbSDimitry Andric } 51685ffd83dbSDimitry Andric 51695ffd83dbSDimitry Andric MachineIRBuilder B(*Root.getParent()); 51705ffd83dbSDimitry Andric RSrcReg = buildAddr64RSrc(B, *MRI, TII, SRDPtr); 51715ffd83dbSDimitry Andric splitIllegalMUBUFOffset(B, SOffset, Offset); 51725ffd83dbSDimitry Andric return true; 51735ffd83dbSDimitry Andric } 51745ffd83dbSDimitry Andric 51755ffd83dbSDimitry Andric bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl( 51765ffd83dbSDimitry Andric MachineOperand &Root, Register &RSrcReg, Register &SOffset, 51775ffd83dbSDimitry Andric int64_t &Offset) const { 5178e8d8bef9SDimitry Andric 5179e8d8bef9SDimitry Andric // FIXME: Pattern should not reach here. 5180e8d8bef9SDimitry Andric if (STI.useFlatForGlobal()) 5181e8d8bef9SDimitry Andric return false; 5182e8d8bef9SDimitry Andric 51835ffd83dbSDimitry Andric MUBUFAddressData AddrData = parseMUBUFAddress(Root.getReg()); 51845ffd83dbSDimitry Andric if (shouldUseAddr64(AddrData)) 51855ffd83dbSDimitry Andric return false; 51865ffd83dbSDimitry Andric 51875ffd83dbSDimitry Andric // N0 -> offset, or 51885ffd83dbSDimitry Andric // (N0 + C1) -> offset 51895ffd83dbSDimitry Andric Register SRDPtr = AddrData.N0; 51905ffd83dbSDimitry Andric Offset = AddrData.Offset; 51915ffd83dbSDimitry Andric 51925ffd83dbSDimitry Andric // TODO: Look through extensions for 32-bit soffset. 51935ffd83dbSDimitry Andric MachineIRBuilder B(*Root.getParent()); 51945ffd83dbSDimitry Andric 51955ffd83dbSDimitry Andric RSrcReg = buildOffsetSrc(B, *MRI, TII, SRDPtr); 51965ffd83dbSDimitry Andric splitIllegalMUBUFOffset(B, SOffset, Offset); 51975ffd83dbSDimitry Andric return true; 51985ffd83dbSDimitry Andric } 51995ffd83dbSDimitry Andric 52005ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns 52015ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const { 52025ffd83dbSDimitry Andric Register VAddr; 52035ffd83dbSDimitry Andric Register RSrcReg; 52045ffd83dbSDimitry Andric Register SOffset; 52055ffd83dbSDimitry Andric int64_t Offset = 0; 52065ffd83dbSDimitry Andric 52075ffd83dbSDimitry Andric if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset, Offset)) 52085ffd83dbSDimitry Andric return {}; 52095ffd83dbSDimitry Andric 52105ffd83dbSDimitry Andric // FIXME: Use defaulted operands for trailing 0s and remove from the complex 52115ffd83dbSDimitry Andric // pattern. 52125ffd83dbSDimitry Andric return {{ 52135ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { // rsrc 52145ffd83dbSDimitry Andric MIB.addReg(RSrcReg); 52155ffd83dbSDimitry Andric }, 52165ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { // vaddr 52175ffd83dbSDimitry Andric MIB.addReg(VAddr); 52185ffd83dbSDimitry Andric }, 52195ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { // soffset 52205ffd83dbSDimitry Andric if (SOffset) 52215ffd83dbSDimitry Andric MIB.addReg(SOffset); 52225f757f3fSDimitry Andric else if (STI.hasRestrictedSOffset()) 52235f757f3fSDimitry Andric MIB.addReg(AMDGPU::SGPR_NULL); 52245ffd83dbSDimitry Andric else 52255ffd83dbSDimitry Andric MIB.addImm(0); 52265ffd83dbSDimitry Andric }, 52275ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { // offset 52285ffd83dbSDimitry Andric MIB.addImm(Offset); 52295ffd83dbSDimitry Andric }, 5230fe6060f1SDimitry Andric addZeroImm, // cpol 52315ffd83dbSDimitry Andric addZeroImm, // tfe 52325ffd83dbSDimitry Andric addZeroImm // swz 52335ffd83dbSDimitry Andric }}; 52345ffd83dbSDimitry Andric } 52355ffd83dbSDimitry Andric 52365ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns 52375ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const { 52385ffd83dbSDimitry Andric Register RSrcReg; 52395ffd83dbSDimitry Andric Register SOffset; 52405ffd83dbSDimitry Andric int64_t Offset = 0; 52415ffd83dbSDimitry Andric 52425ffd83dbSDimitry Andric if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset, Offset)) 52435ffd83dbSDimitry Andric return {}; 52445ffd83dbSDimitry Andric 52455ffd83dbSDimitry Andric return {{ 52465ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { // rsrc 52475ffd83dbSDimitry Andric MIB.addReg(RSrcReg); 52485ffd83dbSDimitry Andric }, 52495ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { // soffset 52505ffd83dbSDimitry Andric if (SOffset) 52515ffd83dbSDimitry Andric MIB.addReg(SOffset); 52525f757f3fSDimitry Andric else if (STI.hasRestrictedSOffset()) 52535f757f3fSDimitry Andric MIB.addReg(AMDGPU::SGPR_NULL); 52545ffd83dbSDimitry Andric else 52555ffd83dbSDimitry Andric MIB.addImm(0); 52565ffd83dbSDimitry Andric }, 52575ffd83dbSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset 5258fe6060f1SDimitry Andric addZeroImm, // cpol 52595ffd83dbSDimitry Andric addZeroImm, // tfe 5260fe6060f1SDimitry Andric addZeroImm, // swz 52615ffd83dbSDimitry Andric }}; 52625ffd83dbSDimitry Andric } 52635ffd83dbSDimitry Andric 52645f757f3fSDimitry Andric InstructionSelector::ComplexRendererFns 52655f757f3fSDimitry Andric AMDGPUInstructionSelector::selectBUFSOffset(MachineOperand &Root) const { 52665f757f3fSDimitry Andric 52675f757f3fSDimitry Andric Register SOffset = Root.getReg(); 52685f757f3fSDimitry Andric 52695f757f3fSDimitry Andric if (STI.hasRestrictedSOffset() && mi_match(SOffset, *MRI, m_ZeroInt())) 52705f757f3fSDimitry Andric SOffset = AMDGPU::SGPR_NULL; 52715f757f3fSDimitry Andric 52725f757f3fSDimitry Andric return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}}; 52735f757f3fSDimitry Andric } 52745f757f3fSDimitry Andric 52755ffd83dbSDimitry Andric /// Get an immediate that must be 32-bits, and treated as zero extended. 5276bdd1243dSDimitry Andric static std::optional<uint64_t> 5277bdd1243dSDimitry Andric getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) { 5278349cc55cSDimitry Andric // getIConstantVRegVal sexts any values, so see if that matters. 5279bdd1243dSDimitry Andric std::optional<int64_t> OffsetVal = getIConstantVRegSExtVal(Reg, MRI); 52805ffd83dbSDimitry Andric if (!OffsetVal || !isInt<32>(*OffsetVal)) 5281bdd1243dSDimitry Andric return std::nullopt; 52825ffd83dbSDimitry Andric return Lo_32(*OffsetVal); 52835ffd83dbSDimitry Andric } 52845ffd83dbSDimitry Andric 52855ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns 52865ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectSMRDBufferImm(MachineOperand &Root) const { 5287bdd1243dSDimitry Andric std::optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI); 52885ffd83dbSDimitry Andric if (!OffsetVal) 52895ffd83dbSDimitry Andric return {}; 52905ffd83dbSDimitry Andric 5291bdd1243dSDimitry Andric std::optional<int64_t> EncodedImm = 52925ffd83dbSDimitry Andric AMDGPU::getSMRDEncodedOffset(STI, *OffsetVal, true); 52935ffd83dbSDimitry Andric if (!EncodedImm) 52945ffd83dbSDimitry Andric return {}; 52955ffd83dbSDimitry Andric 52965ffd83dbSDimitry Andric return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }}; 52975ffd83dbSDimitry Andric } 52985ffd83dbSDimitry Andric 52995ffd83dbSDimitry Andric InstructionSelector::ComplexRendererFns 53005ffd83dbSDimitry Andric AMDGPUInstructionSelector::selectSMRDBufferImm32(MachineOperand &Root) const { 53015ffd83dbSDimitry Andric assert(STI.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS); 53025ffd83dbSDimitry Andric 5303bdd1243dSDimitry Andric std::optional<uint64_t> OffsetVal = getConstantZext32Val(Root.getReg(), *MRI); 53045ffd83dbSDimitry Andric if (!OffsetVal) 53055ffd83dbSDimitry Andric return {}; 53065ffd83dbSDimitry Andric 5307bdd1243dSDimitry Andric std::optional<int64_t> EncodedImm = 5308bdd1243dSDimitry Andric AMDGPU::getSMRDEncodedLiteralOffset32(STI, *OffsetVal); 53095ffd83dbSDimitry Andric if (!EncodedImm) 53105ffd83dbSDimitry Andric return {}; 53115ffd83dbSDimitry Andric 53125ffd83dbSDimitry Andric return {{ [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } }}; 53135ffd83dbSDimitry Andric } 53145ffd83dbSDimitry Andric 5315bdd1243dSDimitry Andric InstructionSelector::ComplexRendererFns 5316bdd1243dSDimitry Andric AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const { 5317bdd1243dSDimitry Andric // Match the (soffset + offset) pair as a 32-bit register base and 5318bdd1243dSDimitry Andric // an immediate offset. 5319bdd1243dSDimitry Andric Register SOffset; 5320bdd1243dSDimitry Andric unsigned Offset; 53215f757f3fSDimitry Andric std::tie(SOffset, Offset) = AMDGPU::getBaseWithConstantOffset( 53225f757f3fSDimitry Andric *MRI, Root.getReg(), KB, /*CheckNUW*/ true); 5323bdd1243dSDimitry Andric if (!SOffset) 5324bdd1243dSDimitry Andric return std::nullopt; 5325bdd1243dSDimitry Andric 5326bdd1243dSDimitry Andric std::optional<int64_t> EncodedOffset = 5327bdd1243dSDimitry Andric AMDGPU::getSMRDEncodedOffset(STI, Offset, /* IsBuffer */ true); 5328bdd1243dSDimitry Andric if (!EncodedOffset) 5329bdd1243dSDimitry Andric return std::nullopt; 5330bdd1243dSDimitry Andric 5331bdd1243dSDimitry Andric assert(MRI->getType(SOffset) == LLT::scalar(32)); 5332bdd1243dSDimitry Andric return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }, 5333bdd1243dSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedOffset); }}}; 5334bdd1243dSDimitry Andric } 5335bdd1243dSDimitry Andric 5336bdd1243dSDimitry Andric // Variant of stripBitCast that returns the instruction instead of a 5337bdd1243dSDimitry Andric // MachineOperand. 5338bdd1243dSDimitry Andric static MachineInstr *stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI) { 5339bdd1243dSDimitry Andric if (MI->getOpcode() == AMDGPU::G_BITCAST) 5340bdd1243dSDimitry Andric return getDefIgnoringCopies(MI->getOperand(1).getReg(), MRI); 5341bdd1243dSDimitry Andric return MI; 5342bdd1243dSDimitry Andric } 5343bdd1243dSDimitry Andric 5344bdd1243dSDimitry Andric // Figure out if this is really an extract of the high 16-bits of a dword, 5345bdd1243dSDimitry Andric // returns nullptr if it isn't. 5346bdd1243dSDimitry Andric static MachineInstr *isExtractHiElt(MachineInstr *Inst, 5347bdd1243dSDimitry Andric MachineRegisterInfo &MRI) { 5348bdd1243dSDimitry Andric Inst = stripBitCast(Inst, MRI); 5349bdd1243dSDimitry Andric 5350bdd1243dSDimitry Andric if (Inst->getOpcode() != AMDGPU::G_TRUNC) 5351bdd1243dSDimitry Andric return nullptr; 5352bdd1243dSDimitry Andric 5353bdd1243dSDimitry Andric MachineInstr *TruncOp = 5354bdd1243dSDimitry Andric getDefIgnoringCopies(Inst->getOperand(1).getReg(), MRI); 5355bdd1243dSDimitry Andric TruncOp = stripBitCast(TruncOp, MRI); 5356bdd1243dSDimitry Andric 5357bdd1243dSDimitry Andric // G_LSHR x, (G_CONSTANT i32 16) 5358bdd1243dSDimitry Andric if (TruncOp->getOpcode() == AMDGPU::G_LSHR) { 5359bdd1243dSDimitry Andric auto SrlAmount = getIConstantVRegValWithLookThrough( 5360bdd1243dSDimitry Andric TruncOp->getOperand(2).getReg(), MRI); 5361bdd1243dSDimitry Andric if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) { 5362bdd1243dSDimitry Andric MachineInstr *SrlOp = 5363bdd1243dSDimitry Andric getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI); 5364bdd1243dSDimitry Andric return stripBitCast(SrlOp, MRI); 5365bdd1243dSDimitry Andric } 5366bdd1243dSDimitry Andric } 5367bdd1243dSDimitry Andric 5368bdd1243dSDimitry Andric // G_SHUFFLE_VECTOR x, y, shufflemask(1, 1|0) 5369bdd1243dSDimitry Andric // 1, 0 swaps the low/high 16 bits. 5370bdd1243dSDimitry Andric // 1, 1 sets the high 16 bits to be the same as the low 16. 5371bdd1243dSDimitry Andric // in any case, it selects the high elts. 5372bdd1243dSDimitry Andric if (TruncOp->getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) { 5373bdd1243dSDimitry Andric assert(MRI.getType(TruncOp->getOperand(0).getReg()) == 5374bdd1243dSDimitry Andric LLT::fixed_vector(2, 16)); 5375bdd1243dSDimitry Andric 5376bdd1243dSDimitry Andric ArrayRef<int> Mask = TruncOp->getOperand(3).getShuffleMask(); 5377bdd1243dSDimitry Andric assert(Mask.size() == 2); 5378bdd1243dSDimitry Andric 5379bdd1243dSDimitry Andric if (Mask[0] == 1 && Mask[1] <= 1) { 5380bdd1243dSDimitry Andric MachineInstr *LHS = 5381bdd1243dSDimitry Andric getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI); 5382bdd1243dSDimitry Andric return stripBitCast(LHS, MRI); 5383bdd1243dSDimitry Andric } 5384bdd1243dSDimitry Andric } 5385bdd1243dSDimitry Andric 5386bdd1243dSDimitry Andric return nullptr; 5387bdd1243dSDimitry Andric } 5388bdd1243dSDimitry Andric 5389bdd1243dSDimitry Andric std::pair<Register, unsigned> 5390bdd1243dSDimitry Andric AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root, 5391bdd1243dSDimitry Andric bool &Matched) const { 5392bdd1243dSDimitry Andric Matched = false; 5393bdd1243dSDimitry Andric 5394bdd1243dSDimitry Andric Register Src; 5395bdd1243dSDimitry Andric unsigned Mods; 5396bdd1243dSDimitry Andric std::tie(Src, Mods) = selectVOP3ModsImpl(Root); 5397bdd1243dSDimitry Andric 5398bdd1243dSDimitry Andric MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); 5399bdd1243dSDimitry Andric if (MI->getOpcode() == AMDGPU::G_FPEXT) { 5400bdd1243dSDimitry Andric MachineOperand *MO = &MI->getOperand(1); 5401bdd1243dSDimitry Andric Src = MO->getReg(); 5402bdd1243dSDimitry Andric MI = getDefIgnoringCopies(Src, *MRI); 5403bdd1243dSDimitry Andric 5404bdd1243dSDimitry Andric assert(MRI->getType(Src) == LLT::scalar(16)); 5405bdd1243dSDimitry Andric 5406bdd1243dSDimitry Andric // See through bitcasts. 5407bdd1243dSDimitry Andric // FIXME: Would be nice to use stripBitCast here. 5408bdd1243dSDimitry Andric if (MI->getOpcode() == AMDGPU::G_BITCAST) { 5409bdd1243dSDimitry Andric MO = &MI->getOperand(1); 5410bdd1243dSDimitry Andric Src = MO->getReg(); 5411bdd1243dSDimitry Andric MI = getDefIgnoringCopies(Src, *MRI); 5412bdd1243dSDimitry Andric } 5413bdd1243dSDimitry Andric 5414bdd1243dSDimitry Andric const auto CheckAbsNeg = [&]() { 5415bdd1243dSDimitry Andric // Be careful about folding modifiers if we already have an abs. fneg is 5416bdd1243dSDimitry Andric // applied last, so we don't want to apply an earlier fneg. 5417bdd1243dSDimitry Andric if ((Mods & SISrcMods::ABS) == 0) { 5418bdd1243dSDimitry Andric unsigned ModsTmp; 5419bdd1243dSDimitry Andric std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO); 5420bdd1243dSDimitry Andric MI = getDefIgnoringCopies(Src, *MRI); 5421bdd1243dSDimitry Andric 5422bdd1243dSDimitry Andric if ((ModsTmp & SISrcMods::NEG) != 0) 5423bdd1243dSDimitry Andric Mods ^= SISrcMods::NEG; 5424bdd1243dSDimitry Andric 5425bdd1243dSDimitry Andric if ((ModsTmp & SISrcMods::ABS) != 0) 5426bdd1243dSDimitry Andric Mods |= SISrcMods::ABS; 5427bdd1243dSDimitry Andric } 5428bdd1243dSDimitry Andric }; 5429bdd1243dSDimitry Andric 5430bdd1243dSDimitry Andric CheckAbsNeg(); 5431bdd1243dSDimitry Andric 5432bdd1243dSDimitry Andric // op_sel/op_sel_hi decide the source type and source. 5433bdd1243dSDimitry Andric // If the source's op_sel_hi is set, it indicates to do a conversion from 5434bdd1243dSDimitry Andric // fp16. If the sources's op_sel is set, it picks the high half of the 5435bdd1243dSDimitry Andric // source register. 5436bdd1243dSDimitry Andric 5437bdd1243dSDimitry Andric Mods |= SISrcMods::OP_SEL_1; 5438bdd1243dSDimitry Andric 5439bdd1243dSDimitry Andric if (MachineInstr *ExtractHiEltMI = isExtractHiElt(MI, *MRI)) { 5440bdd1243dSDimitry Andric Mods |= SISrcMods::OP_SEL_0; 5441bdd1243dSDimitry Andric MI = ExtractHiEltMI; 5442bdd1243dSDimitry Andric MO = &MI->getOperand(0); 5443bdd1243dSDimitry Andric Src = MO->getReg(); 5444bdd1243dSDimitry Andric 5445bdd1243dSDimitry Andric CheckAbsNeg(); 5446bdd1243dSDimitry Andric } 5447bdd1243dSDimitry Andric 5448bdd1243dSDimitry Andric Matched = true; 5449bdd1243dSDimitry Andric } 5450bdd1243dSDimitry Andric 5451bdd1243dSDimitry Andric return {Src, Mods}; 5452bdd1243dSDimitry Andric } 5453bdd1243dSDimitry Andric 5454bdd1243dSDimitry Andric InstructionSelector::ComplexRendererFns 545506c3fb27SDimitry Andric AMDGPUInstructionSelector::selectVOP3PMadMixModsExt( 545606c3fb27SDimitry Andric MachineOperand &Root) const { 545706c3fb27SDimitry Andric Register Src; 545806c3fb27SDimitry Andric unsigned Mods; 545906c3fb27SDimitry Andric bool Matched; 546006c3fb27SDimitry Andric std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched); 546106c3fb27SDimitry Andric if (!Matched) 546206c3fb27SDimitry Andric return {}; 546306c3fb27SDimitry Andric 546406c3fb27SDimitry Andric return {{ 546506c3fb27SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 546606c3fb27SDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 546706c3fb27SDimitry Andric }}; 546806c3fb27SDimitry Andric } 546906c3fb27SDimitry Andric 547006c3fb27SDimitry Andric InstructionSelector::ComplexRendererFns 5471bdd1243dSDimitry Andric AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const { 5472bdd1243dSDimitry Andric Register Src; 5473bdd1243dSDimitry Andric unsigned Mods; 5474bdd1243dSDimitry Andric bool Matched; 5475bdd1243dSDimitry Andric std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched); 5476bdd1243dSDimitry Andric 5477bdd1243dSDimitry Andric return {{ 5478bdd1243dSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, 5479bdd1243dSDimitry Andric [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods 5480bdd1243dSDimitry Andric }}; 5481bdd1243dSDimitry Andric } 5482bdd1243dSDimitry Andric 54835f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst( 54845f757f3fSDimitry Andric MachineInstr &I, Intrinsic::ID IntrID) const { 54855f757f3fSDimitry Andric MachineBasicBlock *MBB = I.getParent(); 54865f757f3fSDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 54875f757f3fSDimitry Andric Register CCReg = I.getOperand(0).getReg(); 54885f757f3fSDimitry Andric 54895f757f3fSDimitry Andric bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var; 54905f757f3fSDimitry Andric 54915f757f3fSDimitry Andric if (HasM0) { 54925f757f3fSDimitry Andric auto CopyMIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0) 54935f757f3fSDimitry Andric .addReg(I.getOperand(2).getReg()); 54945f757f3fSDimitry Andric BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0)); 54955f757f3fSDimitry Andric if (!constrainSelectedInstRegOperands(*CopyMIB, TII, TRI, RBI)) 54965f757f3fSDimitry Andric return false; 54975f757f3fSDimitry Andric } else { 54985f757f3fSDimitry Andric BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM)) 54995f757f3fSDimitry Andric .addImm(I.getOperand(2).getImm()); 55005f757f3fSDimitry Andric } 55015f757f3fSDimitry Andric 55025f757f3fSDimitry Andric BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC); 55035f757f3fSDimitry Andric 55045f757f3fSDimitry Andric I.eraseFromParent(); 55055f757f3fSDimitry Andric return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass, 55065f757f3fSDimitry Andric *MRI); 55075f757f3fSDimitry Andric } 55085f757f3fSDimitry Andric 55095f757f3fSDimitry Andric unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) { 55105f757f3fSDimitry Andric if (HasInlineConst) { 55115f757f3fSDimitry Andric switch (IntrID) { 55125f757f3fSDimitry Andric default: 55135f757f3fSDimitry Andric llvm_unreachable("not a named barrier op"); 55145f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_init: 55155f757f3fSDimitry Andric return AMDGPU::S_BARRIER_INIT_IMM; 55165f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_join: 55175f757f3fSDimitry Andric return AMDGPU::S_BARRIER_JOIN_IMM; 55185f757f3fSDimitry Andric case Intrinsic::amdgcn_s_wakeup_barrier: 55195f757f3fSDimitry Andric return AMDGPU::S_WAKEUP_BARRIER_IMM; 55205f757f3fSDimitry Andric case Intrinsic::amdgcn_s_get_barrier_state: 55215f757f3fSDimitry Andric return AMDGPU::S_GET_BARRIER_STATE_IMM; 55225f757f3fSDimitry Andric }; 55235f757f3fSDimitry Andric } else { 55245f757f3fSDimitry Andric switch (IntrID) { 55255f757f3fSDimitry Andric default: 55265f757f3fSDimitry Andric llvm_unreachable("not a named barrier op"); 55275f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_init: 55285f757f3fSDimitry Andric return AMDGPU::S_BARRIER_INIT_M0; 55295f757f3fSDimitry Andric case Intrinsic::amdgcn_s_barrier_join: 55305f757f3fSDimitry Andric return AMDGPU::S_BARRIER_JOIN_M0; 55315f757f3fSDimitry Andric case Intrinsic::amdgcn_s_wakeup_barrier: 55325f757f3fSDimitry Andric return AMDGPU::S_WAKEUP_BARRIER_M0; 55335f757f3fSDimitry Andric case Intrinsic::amdgcn_s_get_barrier_state: 55345f757f3fSDimitry Andric return AMDGPU::S_GET_BARRIER_STATE_M0; 55355f757f3fSDimitry Andric }; 55365f757f3fSDimitry Andric } 55375f757f3fSDimitry Andric } 55385f757f3fSDimitry Andric 55395f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectNamedBarrierInst( 55405f757f3fSDimitry Andric MachineInstr &I, Intrinsic::ID IntrID) const { 55415f757f3fSDimitry Andric MachineBasicBlock *MBB = I.getParent(); 55425f757f3fSDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 55435f757f3fSDimitry Andric MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state 55445f757f3fSDimitry Andric ? I.getOperand(2) 55455f757f3fSDimitry Andric : I.getOperand(1); 55465f757f3fSDimitry Andric std::optional<int64_t> BarValImm = 55475f757f3fSDimitry Andric getIConstantVRegSExtVal(BarOp.getReg(), *MRI); 55485f757f3fSDimitry Andric Register M0Val; 55495f757f3fSDimitry Andric Register TmpReg0; 55505f757f3fSDimitry Andric 55515f757f3fSDimitry Andric // For S_BARRIER_INIT, member count will always be read from M0[16:22] 55525f757f3fSDimitry Andric if (IntrID == Intrinsic::amdgcn_s_barrier_init) { 55535f757f3fSDimitry Andric Register MemberCount = I.getOperand(2).getReg(); 55545f757f3fSDimitry Andric TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 55555f757f3fSDimitry Andric // TODO: This should be expanded during legalization so that the the S_LSHL 55565f757f3fSDimitry Andric // and S_OR can be constant-folded 55575f757f3fSDimitry Andric BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_LSHL_B32), TmpReg0) 55585f757f3fSDimitry Andric .addImm(16) 55595f757f3fSDimitry Andric .addReg(MemberCount); 55605f757f3fSDimitry Andric M0Val = TmpReg0; 55615f757f3fSDimitry Andric } 55625f757f3fSDimitry Andric 55635f757f3fSDimitry Andric // If not inlinable, get reference to barrier depending on the instruction 55645f757f3fSDimitry Andric if (!BarValImm) { 55655f757f3fSDimitry Andric if (IntrID == Intrinsic::amdgcn_s_barrier_init) { 55665f757f3fSDimitry Andric // If reference to barrier id is not an inlinable constant then it must be 55675f757f3fSDimitry Andric // referenced with M0[4:0]. Perform an OR with the member count to include 55685f757f3fSDimitry Andric // it in M0 for S_BARRIER_INIT. 55695f757f3fSDimitry Andric Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); 55705f757f3fSDimitry Andric BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_OR_B32), TmpReg1) 55715f757f3fSDimitry Andric .addReg(BarOp.getReg()) 55725f757f3fSDimitry Andric .addReg(TmpReg0); 55735f757f3fSDimitry Andric M0Val = TmpReg1; 55745f757f3fSDimitry Andric } else { 55755f757f3fSDimitry Andric M0Val = BarOp.getReg(); 55765f757f3fSDimitry Andric } 55775f757f3fSDimitry Andric } 55785f757f3fSDimitry Andric 55795f757f3fSDimitry Andric // Build copy to M0 if needed. For S_BARRIER_INIT, M0 is always required. 55805f757f3fSDimitry Andric if (M0Val) { 55815f757f3fSDimitry Andric auto CopyMIB = 55825f757f3fSDimitry Andric BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0).addReg(M0Val); 55835f757f3fSDimitry Andric constrainSelectedInstRegOperands(*CopyMIB, TII, TRI, RBI); 55845f757f3fSDimitry Andric } 55855f757f3fSDimitry Andric 55865f757f3fSDimitry Andric MachineInstrBuilder MIB; 55875f757f3fSDimitry Andric unsigned Opc = getNamedBarrierOp(BarValImm.has_value(), IntrID); 55885f757f3fSDimitry Andric MIB = BuildMI(*MBB, &I, DL, TII.get(Opc)); 55895f757f3fSDimitry Andric 55905f757f3fSDimitry Andric if (IntrID == Intrinsic::amdgcn_s_get_barrier_state) 55915f757f3fSDimitry Andric MIB.addDef(I.getOperand(0).getReg()); 55925f757f3fSDimitry Andric 55935f757f3fSDimitry Andric if (BarValImm) 55945f757f3fSDimitry Andric MIB.addImm(*BarValImm); 55955f757f3fSDimitry Andric 55965f757f3fSDimitry Andric I.eraseFromParent(); 55975f757f3fSDimitry Andric return true; 55985f757f3fSDimitry Andric } 55997a6dacacSDimitry Andric 56005f757f3fSDimitry Andric bool AMDGPUInstructionSelector::selectSBarrierLeave(MachineInstr &I) const { 56015f757f3fSDimitry Andric MachineBasicBlock *BB = I.getParent(); 56025f757f3fSDimitry Andric const DebugLoc &DL = I.getDebugLoc(); 56035f757f3fSDimitry Andric Register CCReg = I.getOperand(0).getReg(); 56045f757f3fSDimitry Andric 56055f757f3fSDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_BARRIER_LEAVE)); 56065f757f3fSDimitry Andric BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg).addReg(AMDGPU::SCC); 56075f757f3fSDimitry Andric 56085f757f3fSDimitry Andric I.eraseFromParent(); 56095f757f3fSDimitry Andric return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass, 56105f757f3fSDimitry Andric *MRI); 56115f757f3fSDimitry Andric } 56125f757f3fSDimitry Andric 56138bcb0991SDimitry Andric void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB, 5614480093f4SDimitry Andric const MachineInstr &MI, 5615480093f4SDimitry Andric int OpIdx) const { 5616480093f4SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5617480093f4SDimitry Andric "Expected G_CONSTANT"); 56185ffd83dbSDimitry Andric MIB.addImm(MI.getOperand(1).getCImm()->getSExtValue()); 56198bcb0991SDimitry Andric } 5620480093f4SDimitry Andric 5621480093f4SDimitry Andric void AMDGPUInstructionSelector::renderNegateImm(MachineInstrBuilder &MIB, 5622480093f4SDimitry Andric const MachineInstr &MI, 5623480093f4SDimitry Andric int OpIdx) const { 5624480093f4SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5625480093f4SDimitry Andric "Expected G_CONSTANT"); 5626480093f4SDimitry Andric MIB.addImm(-MI.getOperand(1).getCImm()->getSExtValue()); 5627480093f4SDimitry Andric } 5628480093f4SDimitry Andric 5629480093f4SDimitry Andric void AMDGPUInstructionSelector::renderBitcastImm(MachineInstrBuilder &MIB, 5630480093f4SDimitry Andric const MachineInstr &MI, 5631480093f4SDimitry Andric int OpIdx) const { 5632480093f4SDimitry Andric assert(OpIdx == -1); 5633480093f4SDimitry Andric 5634480093f4SDimitry Andric const MachineOperand &Op = MI.getOperand(1); 5635480093f4SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_FCONSTANT) 5636480093f4SDimitry Andric MIB.addImm(Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); 5637480093f4SDimitry Andric else { 5638480093f4SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); 5639480093f4SDimitry Andric MIB.addImm(Op.getCImm()->getSExtValue()); 5640480093f4SDimitry Andric } 5641480093f4SDimitry Andric } 5642480093f4SDimitry Andric 5643480093f4SDimitry Andric void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB, 5644480093f4SDimitry Andric const MachineInstr &MI, 5645480093f4SDimitry Andric int OpIdx) const { 5646480093f4SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5647480093f4SDimitry Andric "Expected G_CONSTANT"); 564806c3fb27SDimitry Andric MIB.addImm(MI.getOperand(1).getCImm()->getValue().popcount()); 5649480093f4SDimitry Andric } 5650480093f4SDimitry Andric 5651480093f4SDimitry Andric /// This only really exists to satisfy DAG type checking machinery, so is a 5652480093f4SDimitry Andric /// no-op here. 5653480093f4SDimitry Andric void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB, 5654480093f4SDimitry Andric const MachineInstr &MI, 5655480093f4SDimitry Andric int OpIdx) const { 5656480093f4SDimitry Andric MIB.addImm(MI.getOperand(OpIdx).getImm()); 5657480093f4SDimitry Andric } 5658480093f4SDimitry Andric 565906c3fb27SDimitry Andric void AMDGPUInstructionSelector::renderOpSelTImm(MachineInstrBuilder &MIB, 566006c3fb27SDimitry Andric const MachineInstr &MI, 566106c3fb27SDimitry Andric int OpIdx) const { 566206c3fb27SDimitry Andric assert(OpIdx >= 0 && "expected to match an immediate operand"); 566306c3fb27SDimitry Andric MIB.addImm(MI.getOperand(OpIdx).getImm() ? (int64_t)SISrcMods::OP_SEL_0 : 0); 566406c3fb27SDimitry Andric } 566506c3fb27SDimitry Andric 5666fe6060f1SDimitry Andric void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB, 56675ffd83dbSDimitry Andric const MachineInstr &MI, 56685ffd83dbSDimitry Andric int OpIdx) const { 56695ffd83dbSDimitry Andric assert(OpIdx >= 0 && "expected to match an immediate operand"); 56705f757f3fSDimitry Andric MIB.addImm(MI.getOperand(OpIdx).getImm() & 56715f757f3fSDimitry Andric (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::ALL 56725f757f3fSDimitry Andric : AMDGPU::CPol::ALL_pregfx12)); 56735ffd83dbSDimitry Andric } 56745ffd83dbSDimitry Andric 56755ffd83dbSDimitry Andric void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB, 56765ffd83dbSDimitry Andric const MachineInstr &MI, 56775ffd83dbSDimitry Andric int OpIdx) const { 56785ffd83dbSDimitry Andric assert(OpIdx >= 0 && "expected to match an immediate operand"); 56795f757f3fSDimitry Andric const bool Swizzle = MI.getOperand(OpIdx).getImm() & 56805f757f3fSDimitry Andric (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::SWZ 56815f757f3fSDimitry Andric : AMDGPU::CPol::SWZ_pregfx12); 56825f757f3fSDimitry Andric MIB.addImm(Swizzle); 56835ffd83dbSDimitry Andric } 56845ffd83dbSDimitry Andric 56857a6dacacSDimitry Andric void AMDGPUInstructionSelector::renderExtractCpolSetGLC( 56867a6dacacSDimitry Andric MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { 5687fe6060f1SDimitry Andric assert(OpIdx >= 0 && "expected to match an immediate operand"); 56887a6dacacSDimitry Andric const uint32_t Cpol = MI.getOperand(OpIdx).getImm() & 56897a6dacacSDimitry Andric (AMDGPU::isGFX12Plus(STI) ? AMDGPU::CPol::ALL 56907a6dacacSDimitry Andric : AMDGPU::CPol::ALL_pregfx12); 56917a6dacacSDimitry Andric MIB.addImm(Cpol | AMDGPU::CPol::GLC); 5692fe6060f1SDimitry Andric } 5693fe6060f1SDimitry Andric 5694e8d8bef9SDimitry Andric void AMDGPUInstructionSelector::renderFrameIndex(MachineInstrBuilder &MIB, 5695e8d8bef9SDimitry Andric const MachineInstr &MI, 5696e8d8bef9SDimitry Andric int OpIdx) const { 56975f757f3fSDimitry Andric MIB.addFrameIndex(MI.getOperand(1).getIndex()); 56985f757f3fSDimitry Andric } 56995f757f3fSDimitry Andric 57005f757f3fSDimitry Andric void AMDGPUInstructionSelector::renderFPPow2ToExponent(MachineInstrBuilder &MIB, 57015f757f3fSDimitry Andric const MachineInstr &MI, 57025f757f3fSDimitry Andric int OpIdx) const { 57035f757f3fSDimitry Andric const APFloat &APF = MI.getOperand(1).getFPImm()->getValueAPF(); 57045f757f3fSDimitry Andric int ExpVal = APF.getExactLog2Abs(); 57055f757f3fSDimitry Andric assert(ExpVal != INT_MIN); 57065f757f3fSDimitry Andric MIB.addImm(ExpVal); 5707e8d8bef9SDimitry Andric } 5708e8d8bef9SDimitry Andric 5709*0fca6ea1SDimitry Andric bool AMDGPUInstructionSelector::isInlineImmediate(const APInt &Imm) const { 5710*0fca6ea1SDimitry Andric return TII.isInlineConstant(Imm); 5711480093f4SDimitry Andric } 5712480093f4SDimitry Andric 5713480093f4SDimitry Andric bool AMDGPUInstructionSelector::isInlineImmediate(const APFloat &Imm) const { 5714480093f4SDimitry Andric return TII.isInlineConstant(Imm); 5715480093f4SDimitry Andric } 5716