109467b48Spatrick //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //==-----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick /// \file
1009467b48Spatrick /// Defines an instruction selector for the AMDGPU target.
1109467b48Spatrick //
1209467b48Spatrick //===----------------------------------------------------------------------===//
1309467b48Spatrick
14*d415bd75Srobert #include "AMDGPUISelDAGToDAG.h"
1509467b48Spatrick #include "AMDGPU.h"
16*d415bd75Srobert #include "AMDGPUInstrInfo.h"
17*d415bd75Srobert #include "AMDGPUSubtarget.h"
1809467b48Spatrick #include "AMDGPUTargetMachine.h"
19*d415bd75Srobert #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20*d415bd75Srobert #include "MCTargetDesc/R600MCTargetDesc.h"
21*d415bd75Srobert #include "R600RegisterInfo.h"
2209467b48Spatrick #include "SIMachineFunctionInfo.h"
2309467b48Spatrick #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
2409467b48Spatrick #include "llvm/Analysis/ValueTracking.h"
2509467b48Spatrick #include "llvm/CodeGen/FunctionLoweringInfo.h"
2609467b48Spatrick #include "llvm/CodeGen/SelectionDAG.h"
2709467b48Spatrick #include "llvm/CodeGen/SelectionDAGISel.h"
2809467b48Spatrick #include "llvm/CodeGen/SelectionDAGNodes.h"
2973471bf0Spatrick #include "llvm/IR/IntrinsicsAMDGPU.h"
3009467b48Spatrick #include "llvm/InitializePasses.h"
3173471bf0Spatrick
3209467b48Spatrick #ifdef EXPENSIVE_CHECKS
3373471bf0Spatrick #include "llvm/Analysis/LoopInfo.h"
3409467b48Spatrick #include "llvm/IR/Dominators.h"
3509467b48Spatrick #endif
3609467b48Spatrick
37*d415bd75Srobert #define DEBUG_TYPE "amdgpu-isel"
3809467b48Spatrick
3909467b48Spatrick using namespace llvm;
4009467b48Spatrick
4109467b48Spatrick //===----------------------------------------------------------------------===//
4209467b48Spatrick // Instruction Selector Implementation
4309467b48Spatrick //===----------------------------------------------------------------------===//
4409467b48Spatrick
4509467b48Spatrick namespace {
stripBitcast(SDValue Val)4609467b48Spatrick static SDValue stripBitcast(SDValue Val) {
4709467b48Spatrick return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
4809467b48Spatrick }
4909467b48Spatrick
5009467b48Spatrick // Figure out if this is really an extract of the high 16-bits of a dword.
isExtractHiElt(SDValue In,SDValue & Out)5109467b48Spatrick static bool isExtractHiElt(SDValue In, SDValue &Out) {
5209467b48Spatrick In = stripBitcast(In);
5373471bf0Spatrick
5473471bf0Spatrick if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
5573471bf0Spatrick if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
5673471bf0Spatrick if (!Idx->isOne())
5773471bf0Spatrick return false;
5873471bf0Spatrick Out = In.getOperand(0);
5973471bf0Spatrick return true;
6073471bf0Spatrick }
6173471bf0Spatrick }
6273471bf0Spatrick
6309467b48Spatrick if (In.getOpcode() != ISD::TRUNCATE)
6409467b48Spatrick return false;
6509467b48Spatrick
6609467b48Spatrick SDValue Srl = In.getOperand(0);
6709467b48Spatrick if (Srl.getOpcode() == ISD::SRL) {
6809467b48Spatrick if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
6909467b48Spatrick if (ShiftAmt->getZExtValue() == 16) {
7009467b48Spatrick Out = stripBitcast(Srl.getOperand(0));
7109467b48Spatrick return true;
7209467b48Spatrick }
7309467b48Spatrick }
7409467b48Spatrick }
7509467b48Spatrick
7609467b48Spatrick return false;
7709467b48Spatrick }
7809467b48Spatrick
7909467b48Spatrick // Look through operations that obscure just looking at the low 16-bits of the
8009467b48Spatrick // same register.
stripExtractLoElt(SDValue In)8109467b48Spatrick static SDValue stripExtractLoElt(SDValue In) {
8273471bf0Spatrick if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
8373471bf0Spatrick if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
84*d415bd75Srobert if (Idx->isZero() && In.getValueSizeInBits() <= 32)
8573471bf0Spatrick return In.getOperand(0);
8673471bf0Spatrick }
8773471bf0Spatrick }
8873471bf0Spatrick
8909467b48Spatrick if (In.getOpcode() == ISD::TRUNCATE) {
9009467b48Spatrick SDValue Src = In.getOperand(0);
9109467b48Spatrick if (Src.getValueType().getSizeInBits() == 32)
9209467b48Spatrick return stripBitcast(Src);
9309467b48Spatrick }
9409467b48Spatrick
9509467b48Spatrick return In;
9609467b48Spatrick }
9709467b48Spatrick
9809467b48Spatrick } // end anonymous namespace
9909467b48Spatrick
10009467b48Spatrick INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
10109467b48Spatrick "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)10209467b48Spatrick INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
10309467b48Spatrick INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
10409467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
10509467b48Spatrick #ifdef EXPENSIVE_CHECKS
10609467b48Spatrick INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
10709467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
10809467b48Spatrick #endif
10909467b48Spatrick INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
11009467b48Spatrick "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
11109467b48Spatrick
11209467b48Spatrick /// This pass converts a legalized DAG into a AMDGPU-specific
11309467b48Spatrick // DAG, ready for instruction scheduling.
114*d415bd75Srobert FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
11509467b48Spatrick CodeGenOpt::Level OptLevel) {
11609467b48Spatrick return new AMDGPUDAGToDAGISel(TM, OptLevel);
11709467b48Spatrick }
11809467b48Spatrick
AMDGPUDAGToDAGISel(TargetMachine & TM,CodeGenOpt::Level OptLevel)119*d415bd75Srobert AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM,
120*d415bd75Srobert CodeGenOpt::Level OptLevel)
121*d415bd75Srobert : SelectionDAGISel(ID, TM, OptLevel) {
122*d415bd75Srobert EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
12309467b48Spatrick }
12409467b48Spatrick
runOnMachineFunction(MachineFunction & MF)12509467b48Spatrick bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
12609467b48Spatrick #ifdef EXPENSIVE_CHECKS
12709467b48Spatrick DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
12809467b48Spatrick LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
12909467b48Spatrick for (auto &L : LI->getLoopsInPreorder()) {
13009467b48Spatrick assert(L->isLCSSAForm(DT));
13109467b48Spatrick }
13209467b48Spatrick #endif
13309467b48Spatrick Subtarget = &MF.getSubtarget<GCNSubtarget>();
134097a140dSpatrick Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
13509467b48Spatrick return SelectionDAGISel::runOnMachineFunction(MF);
13609467b48Spatrick }
13709467b48Spatrick
fp16SrcZerosHighBits(unsigned Opc) const13873471bf0Spatrick bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
13973471bf0Spatrick // XXX - only need to list legal operations.
14073471bf0Spatrick switch (Opc) {
14173471bf0Spatrick case ISD::FADD:
14273471bf0Spatrick case ISD::FSUB:
14373471bf0Spatrick case ISD::FMUL:
14473471bf0Spatrick case ISD::FDIV:
14573471bf0Spatrick case ISD::FREM:
14673471bf0Spatrick case ISD::FCANONICALIZE:
14773471bf0Spatrick case ISD::UINT_TO_FP:
14873471bf0Spatrick case ISD::SINT_TO_FP:
14973471bf0Spatrick case ISD::FABS:
15073471bf0Spatrick // Fabs is lowered to a bit operation, but it's an and which will clear the
15173471bf0Spatrick // high bits anyway.
15273471bf0Spatrick case ISD::FSQRT:
15373471bf0Spatrick case ISD::FSIN:
15473471bf0Spatrick case ISD::FCOS:
15573471bf0Spatrick case ISD::FPOWI:
15673471bf0Spatrick case ISD::FPOW:
15773471bf0Spatrick case ISD::FLOG:
15873471bf0Spatrick case ISD::FLOG2:
15973471bf0Spatrick case ISD::FLOG10:
16073471bf0Spatrick case ISD::FEXP:
16173471bf0Spatrick case ISD::FEXP2:
16273471bf0Spatrick case ISD::FCEIL:
16373471bf0Spatrick case ISD::FTRUNC:
16473471bf0Spatrick case ISD::FRINT:
16573471bf0Spatrick case ISD::FNEARBYINT:
16673471bf0Spatrick case ISD::FROUND:
16773471bf0Spatrick case ISD::FFLOOR:
16873471bf0Spatrick case ISD::FMINNUM:
16973471bf0Spatrick case ISD::FMAXNUM:
17073471bf0Spatrick case AMDGPUISD::FRACT:
17173471bf0Spatrick case AMDGPUISD::CLAMP:
17273471bf0Spatrick case AMDGPUISD::COS_HW:
17373471bf0Spatrick case AMDGPUISD::SIN_HW:
17473471bf0Spatrick case AMDGPUISD::FMIN3:
17573471bf0Spatrick case AMDGPUISD::FMAX3:
17673471bf0Spatrick case AMDGPUISD::FMED3:
17773471bf0Spatrick case AMDGPUISD::FMAD_FTZ:
17873471bf0Spatrick case AMDGPUISD::RCP:
17973471bf0Spatrick case AMDGPUISD::RSQ:
18073471bf0Spatrick case AMDGPUISD::RCP_IFLAG:
18173471bf0Spatrick case AMDGPUISD::LDEXP:
18273471bf0Spatrick // On gfx10, all 16-bit instructions preserve the high bits.
18373471bf0Spatrick return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
18473471bf0Spatrick case ISD::FP_ROUND:
18573471bf0Spatrick // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
18673471bf0Spatrick // high bits on gfx9.
18773471bf0Spatrick // TODO: If we had the source node we could see if the source was fma/mad
18873471bf0Spatrick return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
18973471bf0Spatrick case ISD::FMA:
19073471bf0Spatrick case ISD::FMAD:
19173471bf0Spatrick case AMDGPUISD::DIV_FIXUP:
19273471bf0Spatrick return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
19373471bf0Spatrick default:
19473471bf0Spatrick // fcopysign, select and others may be lowered to 32-bit bit operations
19573471bf0Spatrick // which don't zero the high bits.
19673471bf0Spatrick return false;
19773471bf0Spatrick }
19873471bf0Spatrick }
19973471bf0Spatrick
getAnalysisUsage(AnalysisUsage & AU) const200*d415bd75Srobert void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
201*d415bd75Srobert AU.addRequired<AMDGPUArgumentUsageInfo>();
202*d415bd75Srobert AU.addRequired<LegacyDivergenceAnalysis>();
203*d415bd75Srobert #ifdef EXPENSIVE_CHECKS
204*d415bd75Srobert AU.addRequired<DominatorTreeWrapperPass>();
205*d415bd75Srobert AU.addRequired<LoopInfoWrapperPass>();
206*d415bd75Srobert #endif
207*d415bd75Srobert SelectionDAGISel::getAnalysisUsage(AU);
208*d415bd75Srobert }
209*d415bd75Srobert
matchLoadD16FromBuildVector(SDNode * N) const21009467b48Spatrick bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
21109467b48Spatrick assert(Subtarget->d16PreservesUnusedBits());
21209467b48Spatrick MVT VT = N->getValueType(0).getSimpleVT();
21309467b48Spatrick if (VT != MVT::v2i16 && VT != MVT::v2f16)
21409467b48Spatrick return false;
21509467b48Spatrick
21609467b48Spatrick SDValue Lo = N->getOperand(0);
21709467b48Spatrick SDValue Hi = N->getOperand(1);
21809467b48Spatrick
21909467b48Spatrick LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
22009467b48Spatrick
22109467b48Spatrick // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
22209467b48Spatrick // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
22309467b48Spatrick // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
22409467b48Spatrick
22509467b48Spatrick // Need to check for possible indirect dependencies on the other half of the
22609467b48Spatrick // vector to avoid introducing a cycle.
22709467b48Spatrick if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
22809467b48Spatrick SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
22909467b48Spatrick
23009467b48Spatrick SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
23109467b48Spatrick SDValue Ops[] = {
23209467b48Spatrick LdHi->getChain(), LdHi->getBasePtr(), TiedIn
23309467b48Spatrick };
23409467b48Spatrick
23509467b48Spatrick unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
23609467b48Spatrick if (LdHi->getMemoryVT() == MVT::i8) {
23709467b48Spatrick LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
23809467b48Spatrick AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
23909467b48Spatrick } else {
24009467b48Spatrick assert(LdHi->getMemoryVT() == MVT::i16);
24109467b48Spatrick }
24209467b48Spatrick
24309467b48Spatrick SDValue NewLoadHi =
24409467b48Spatrick CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
24509467b48Spatrick Ops, LdHi->getMemoryVT(),
24609467b48Spatrick LdHi->getMemOperand());
24709467b48Spatrick
24809467b48Spatrick CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
24909467b48Spatrick CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
25009467b48Spatrick return true;
25109467b48Spatrick }
25209467b48Spatrick
25309467b48Spatrick // build_vector (load ptr), hi -> load_d16_lo ptr, hi
25409467b48Spatrick // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
25509467b48Spatrick // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
25609467b48Spatrick LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
25709467b48Spatrick if (LdLo && Lo.hasOneUse()) {
25809467b48Spatrick SDValue TiedIn = getHi16Elt(Hi);
25909467b48Spatrick if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
26009467b48Spatrick return false;
26109467b48Spatrick
26209467b48Spatrick SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
26309467b48Spatrick unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
26409467b48Spatrick if (LdLo->getMemoryVT() == MVT::i8) {
26509467b48Spatrick LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
26609467b48Spatrick AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
26709467b48Spatrick } else {
26809467b48Spatrick assert(LdLo->getMemoryVT() == MVT::i16);
26909467b48Spatrick }
27009467b48Spatrick
27109467b48Spatrick TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
27209467b48Spatrick
27309467b48Spatrick SDValue Ops[] = {
27409467b48Spatrick LdLo->getChain(), LdLo->getBasePtr(), TiedIn
27509467b48Spatrick };
27609467b48Spatrick
27709467b48Spatrick SDValue NewLoadLo =
27809467b48Spatrick CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
27909467b48Spatrick Ops, LdLo->getMemoryVT(),
28009467b48Spatrick LdLo->getMemOperand());
28109467b48Spatrick
28209467b48Spatrick CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
28309467b48Spatrick CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
28409467b48Spatrick return true;
28509467b48Spatrick }
28609467b48Spatrick
28709467b48Spatrick return false;
28809467b48Spatrick }
28909467b48Spatrick
PreprocessISelDAG()29009467b48Spatrick void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
29109467b48Spatrick if (!Subtarget->d16PreservesUnusedBits())
29209467b48Spatrick return;
29309467b48Spatrick
29409467b48Spatrick SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
29509467b48Spatrick
29609467b48Spatrick bool MadeChange = false;
29709467b48Spatrick while (Position != CurDAG->allnodes_begin()) {
29809467b48Spatrick SDNode *N = &*--Position;
29909467b48Spatrick if (N->use_empty())
30009467b48Spatrick continue;
30109467b48Spatrick
30209467b48Spatrick switch (N->getOpcode()) {
30309467b48Spatrick case ISD::BUILD_VECTOR:
30409467b48Spatrick MadeChange |= matchLoadD16FromBuildVector(N);
30509467b48Spatrick break;
30609467b48Spatrick default:
30709467b48Spatrick break;
30809467b48Spatrick }
30909467b48Spatrick }
31009467b48Spatrick
31109467b48Spatrick if (MadeChange) {
31209467b48Spatrick CurDAG->RemoveDeadNodes();
31309467b48Spatrick LLVM_DEBUG(dbgs() << "After PreProcess:\n";
31409467b48Spatrick CurDAG->dump(););
31509467b48Spatrick }
31609467b48Spatrick }
31709467b48Spatrick
isInlineImmediate(const SDNode * N,bool Negated) const31809467b48Spatrick bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
31909467b48Spatrick bool Negated) const {
32009467b48Spatrick if (N->isUndef())
32109467b48Spatrick return true;
32209467b48Spatrick
32309467b48Spatrick const SIInstrInfo *TII = Subtarget->getInstrInfo();
32409467b48Spatrick if (Negated) {
32509467b48Spatrick if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
32609467b48Spatrick return TII->isInlineConstant(-C->getAPIntValue());
32709467b48Spatrick
32809467b48Spatrick if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
32909467b48Spatrick return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
33009467b48Spatrick
33109467b48Spatrick } else {
33209467b48Spatrick if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
33309467b48Spatrick return TII->isInlineConstant(C->getAPIntValue());
33409467b48Spatrick
33509467b48Spatrick if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
33609467b48Spatrick return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
33709467b48Spatrick }
33809467b48Spatrick
33909467b48Spatrick return false;
34009467b48Spatrick }
34109467b48Spatrick
34209467b48Spatrick /// Determine the register class for \p OpNo
34309467b48Spatrick /// \returns The register class of the virtual register that will be used for
34409467b48Spatrick /// the given operand number \OpNo or NULL if the register class cannot be
34509467b48Spatrick /// determined.
getOperandRegClass(SDNode * N,unsigned OpNo) const34609467b48Spatrick const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
34709467b48Spatrick unsigned OpNo) const {
34809467b48Spatrick if (!N->isMachineOpcode()) {
34909467b48Spatrick if (N->getOpcode() == ISD::CopyToReg) {
35073471bf0Spatrick Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
35173471bf0Spatrick if (Reg.isVirtual()) {
35209467b48Spatrick MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
35309467b48Spatrick return MRI.getRegClass(Reg);
35409467b48Spatrick }
35509467b48Spatrick
35609467b48Spatrick const SIRegisterInfo *TRI
35709467b48Spatrick = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
358*d415bd75Srobert return TRI->getPhysRegBaseClass(Reg);
35909467b48Spatrick }
36009467b48Spatrick
36109467b48Spatrick return nullptr;
36209467b48Spatrick }
36309467b48Spatrick
36409467b48Spatrick switch (N->getMachineOpcode()) {
36509467b48Spatrick default: {
36609467b48Spatrick const MCInstrDesc &Desc =
36709467b48Spatrick Subtarget->getInstrInfo()->get(N->getMachineOpcode());
36809467b48Spatrick unsigned OpIdx = Desc.getNumDefs() + OpNo;
36909467b48Spatrick if (OpIdx >= Desc.getNumOperands())
37009467b48Spatrick return nullptr;
371*d415bd75Srobert int RegClass = Desc.operands()[OpIdx].RegClass;
37209467b48Spatrick if (RegClass == -1)
37309467b48Spatrick return nullptr;
37409467b48Spatrick
37509467b48Spatrick return Subtarget->getRegisterInfo()->getRegClass(RegClass);
37609467b48Spatrick }
37709467b48Spatrick case AMDGPU::REG_SEQUENCE: {
37809467b48Spatrick unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
37909467b48Spatrick const TargetRegisterClass *SuperRC =
38009467b48Spatrick Subtarget->getRegisterInfo()->getRegClass(RCID);
38109467b48Spatrick
38209467b48Spatrick SDValue SubRegOp = N->getOperand(OpNo + 1);
38309467b48Spatrick unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
38409467b48Spatrick return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
38509467b48Spatrick SubRegIdx);
38609467b48Spatrick }
38709467b48Spatrick }
38809467b48Spatrick }
38909467b48Spatrick
glueCopyToOp(SDNode * N,SDValue NewChain,SDValue Glue) const39009467b48Spatrick SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
39109467b48Spatrick SDValue Glue) const {
39209467b48Spatrick SmallVector <SDValue, 8> Ops;
39309467b48Spatrick Ops.push_back(NewChain); // Replace the chain.
39409467b48Spatrick for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
39509467b48Spatrick Ops.push_back(N->getOperand(i));
39609467b48Spatrick
39709467b48Spatrick Ops.push_back(Glue);
39809467b48Spatrick return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
39909467b48Spatrick }
40009467b48Spatrick
glueCopyToM0(SDNode * N,SDValue Val) const40109467b48Spatrick SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
40209467b48Spatrick const SITargetLowering& Lowering =
40309467b48Spatrick *static_cast<const SITargetLowering*>(getTargetLowering());
40409467b48Spatrick
40509467b48Spatrick assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
40609467b48Spatrick
40709467b48Spatrick SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
40809467b48Spatrick return glueCopyToOp(N, M0, M0.getValue(1));
40909467b48Spatrick }
41009467b48Spatrick
glueCopyToM0LDSInit(SDNode * N) const41109467b48Spatrick SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
41209467b48Spatrick unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
41309467b48Spatrick if (AS == AMDGPUAS::LOCAL_ADDRESS) {
41409467b48Spatrick if (Subtarget->ldsRequiresM0Init())
41509467b48Spatrick return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
41609467b48Spatrick } else if (AS == AMDGPUAS::REGION_ADDRESS) {
41709467b48Spatrick MachineFunction &MF = CurDAG->getMachineFunction();
41809467b48Spatrick unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
41909467b48Spatrick return
42009467b48Spatrick glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
42109467b48Spatrick }
42209467b48Spatrick return N;
42309467b48Spatrick }
42409467b48Spatrick
buildSMovImm64(SDLoc & DL,uint64_t Imm,EVT VT) const42509467b48Spatrick MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
42609467b48Spatrick EVT VT) const {
42709467b48Spatrick SDNode *Lo = CurDAG->getMachineNode(
42809467b48Spatrick AMDGPU::S_MOV_B32, DL, MVT::i32,
42909467b48Spatrick CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
43009467b48Spatrick SDNode *Hi =
43109467b48Spatrick CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
43209467b48Spatrick CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
43309467b48Spatrick const SDValue Ops[] = {
43409467b48Spatrick CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
43509467b48Spatrick SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
43609467b48Spatrick SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
43709467b48Spatrick
43809467b48Spatrick return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
43909467b48Spatrick }
44009467b48Spatrick
SelectBuildVector(SDNode * N,unsigned RegClassID)44109467b48Spatrick void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
44209467b48Spatrick EVT VT = N->getValueType(0);
44309467b48Spatrick unsigned NumVectorElts = VT.getVectorNumElements();
44409467b48Spatrick EVT EltVT = VT.getVectorElementType();
44509467b48Spatrick SDLoc DL(N);
44609467b48Spatrick SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
44709467b48Spatrick
44809467b48Spatrick if (NumVectorElts == 1) {
44909467b48Spatrick CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
45009467b48Spatrick RegClass);
45109467b48Spatrick return;
45209467b48Spatrick }
45309467b48Spatrick
45409467b48Spatrick assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
45509467b48Spatrick "supported yet");
45609467b48Spatrick // 32 = Max Num Vector Elements
45709467b48Spatrick // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
45809467b48Spatrick // 1 = Vector Register Class
45909467b48Spatrick SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
46009467b48Spatrick
461097a140dSpatrick bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
462097a140dSpatrick Triple::amdgcn;
46309467b48Spatrick RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
46409467b48Spatrick bool IsRegSeq = true;
46509467b48Spatrick unsigned NOps = N->getNumOperands();
46609467b48Spatrick for (unsigned i = 0; i < NOps; i++) {
46709467b48Spatrick // XXX: Why is this here?
46809467b48Spatrick if (isa<RegisterSDNode>(N->getOperand(i))) {
46909467b48Spatrick IsRegSeq = false;
47009467b48Spatrick break;
47109467b48Spatrick }
472097a140dSpatrick unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
473097a140dSpatrick : R600RegisterInfo::getSubRegFromChannel(i);
47409467b48Spatrick RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
47509467b48Spatrick RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
47609467b48Spatrick }
47709467b48Spatrick if (NOps != NumVectorElts) {
47809467b48Spatrick // Fill in the missing undef elements if this was a scalar_to_vector.
47909467b48Spatrick assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
48009467b48Spatrick MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
48109467b48Spatrick DL, EltVT);
48209467b48Spatrick for (unsigned i = NOps; i < NumVectorElts; ++i) {
483097a140dSpatrick unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
484097a140dSpatrick : R600RegisterInfo::getSubRegFromChannel(i);
48509467b48Spatrick RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
48609467b48Spatrick RegSeqArgs[1 + (2 * i) + 1] =
48709467b48Spatrick CurDAG->getTargetConstant(Sub, DL, MVT::i32);
48809467b48Spatrick }
48909467b48Spatrick }
49009467b48Spatrick
49109467b48Spatrick if (!IsRegSeq)
49209467b48Spatrick SelectCode(N);
49309467b48Spatrick CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
49409467b48Spatrick }
49509467b48Spatrick
Select(SDNode * N)49609467b48Spatrick void AMDGPUDAGToDAGISel::Select(SDNode *N) {
49709467b48Spatrick unsigned int Opc = N->getOpcode();
49809467b48Spatrick if (N->isMachineOpcode()) {
49909467b48Spatrick N->setNodeId(-1);
50009467b48Spatrick return; // Already selected.
50109467b48Spatrick }
50209467b48Spatrick
50309467b48Spatrick // isa<MemSDNode> almost works but is slightly too permissive for some DS
50409467b48Spatrick // intrinsics.
50509467b48Spatrick if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
50609467b48Spatrick (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
50709467b48Spatrick Opc == ISD::ATOMIC_LOAD_FADD ||
50809467b48Spatrick Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
50973471bf0Spatrick Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
51009467b48Spatrick N = glueCopyToM0LDSInit(N);
51109467b48Spatrick SelectCode(N);
51209467b48Spatrick return;
51309467b48Spatrick }
51409467b48Spatrick
51509467b48Spatrick switch (Opc) {
51609467b48Spatrick default:
51709467b48Spatrick break;
51809467b48Spatrick // We are selecting i64 ADD here instead of custom lower it during
51909467b48Spatrick // DAG legalization, so we can fold some i64 ADDs used for address
52009467b48Spatrick // calculation into the LOAD and STORE instructions.
52109467b48Spatrick case ISD::ADDC:
52209467b48Spatrick case ISD::ADDE:
52309467b48Spatrick case ISD::SUBC:
52409467b48Spatrick case ISD::SUBE: {
52509467b48Spatrick if (N->getValueType(0) != MVT::i64)
52609467b48Spatrick break;
52709467b48Spatrick
52809467b48Spatrick SelectADD_SUB_I64(N);
52909467b48Spatrick return;
53009467b48Spatrick }
53109467b48Spatrick case ISD::ADDCARRY:
53209467b48Spatrick case ISD::SUBCARRY:
53309467b48Spatrick if (N->getValueType(0) != MVT::i32)
53409467b48Spatrick break;
53509467b48Spatrick
53609467b48Spatrick SelectAddcSubb(N);
53709467b48Spatrick return;
53809467b48Spatrick case ISD::UADDO:
53909467b48Spatrick case ISD::USUBO: {
54009467b48Spatrick SelectUADDO_USUBO(N);
54109467b48Spatrick return;
54209467b48Spatrick }
54309467b48Spatrick case AMDGPUISD::FMUL_W_CHAIN: {
54409467b48Spatrick SelectFMUL_W_CHAIN(N);
54509467b48Spatrick return;
54609467b48Spatrick }
54709467b48Spatrick case AMDGPUISD::FMA_W_CHAIN: {
54809467b48Spatrick SelectFMA_W_CHAIN(N);
54909467b48Spatrick return;
55009467b48Spatrick }
55109467b48Spatrick
55209467b48Spatrick case ISD::SCALAR_TO_VECTOR:
55309467b48Spatrick case ISD::BUILD_VECTOR: {
55409467b48Spatrick EVT VT = N->getValueType(0);
55509467b48Spatrick unsigned NumVectorElts = VT.getVectorNumElements();
55609467b48Spatrick if (VT.getScalarSizeInBits() == 16) {
55709467b48Spatrick if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
55809467b48Spatrick if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
55909467b48Spatrick ReplaceNode(N, Packed);
56009467b48Spatrick return;
56109467b48Spatrick }
56209467b48Spatrick }
56309467b48Spatrick
56409467b48Spatrick break;
56509467b48Spatrick }
56609467b48Spatrick
56709467b48Spatrick assert(VT.getVectorElementType().bitsEq(MVT::i32));
568097a140dSpatrick unsigned RegClassID =
569097a140dSpatrick SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
57009467b48Spatrick SelectBuildVector(N, RegClassID);
57109467b48Spatrick return;
57209467b48Spatrick }
57309467b48Spatrick case ISD::BUILD_PAIR: {
57409467b48Spatrick SDValue RC, SubReg0, SubReg1;
57509467b48Spatrick SDLoc DL(N);
57609467b48Spatrick if (N->getValueType(0) == MVT::i128) {
57709467b48Spatrick RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
57809467b48Spatrick SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
57909467b48Spatrick SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
58009467b48Spatrick } else if (N->getValueType(0) == MVT::i64) {
58109467b48Spatrick RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
58209467b48Spatrick SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
58309467b48Spatrick SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
58409467b48Spatrick } else {
58509467b48Spatrick llvm_unreachable("Unhandled value type for BUILD_PAIR");
58609467b48Spatrick }
58709467b48Spatrick const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
58809467b48Spatrick N->getOperand(1), SubReg1 };
58909467b48Spatrick ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
59009467b48Spatrick N->getValueType(0), Ops));
59109467b48Spatrick return;
59209467b48Spatrick }
59309467b48Spatrick
59409467b48Spatrick case ISD::Constant:
59509467b48Spatrick case ISD::ConstantFP: {
59609467b48Spatrick if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
59709467b48Spatrick break;
59809467b48Spatrick
59909467b48Spatrick uint64_t Imm;
60009467b48Spatrick if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
60109467b48Spatrick Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
60209467b48Spatrick else {
60309467b48Spatrick ConstantSDNode *C = cast<ConstantSDNode>(N);
60409467b48Spatrick Imm = C->getZExtValue();
60509467b48Spatrick }
60609467b48Spatrick
60709467b48Spatrick SDLoc DL(N);
60809467b48Spatrick ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
60909467b48Spatrick return;
61009467b48Spatrick }
61109467b48Spatrick case AMDGPUISD::BFE_I32:
61209467b48Spatrick case AMDGPUISD::BFE_U32: {
61309467b48Spatrick // There is a scalar version available, but unlike the vector version which
61409467b48Spatrick // has a separate operand for the offset and width, the scalar version packs
61509467b48Spatrick // the width and offset into a single operand. Try to move to the scalar
61609467b48Spatrick // version if the offsets are constant, so that we can try to keep extended
61709467b48Spatrick // loads of kernel arguments in SGPRs.
61809467b48Spatrick
61909467b48Spatrick // TODO: Technically we could try to pattern match scalar bitshifts of
62009467b48Spatrick // dynamic values, but it's probably not useful.
62109467b48Spatrick ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
62209467b48Spatrick if (!Offset)
62309467b48Spatrick break;
62409467b48Spatrick
62509467b48Spatrick ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
62609467b48Spatrick if (!Width)
62709467b48Spatrick break;
62809467b48Spatrick
62909467b48Spatrick bool Signed = Opc == AMDGPUISD::BFE_I32;
63009467b48Spatrick
63109467b48Spatrick uint32_t OffsetVal = Offset->getZExtValue();
63209467b48Spatrick uint32_t WidthVal = Width->getZExtValue();
63309467b48Spatrick
634*d415bd75Srobert ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
635*d415bd75Srobert WidthVal));
63609467b48Spatrick return;
63709467b48Spatrick }
63809467b48Spatrick case AMDGPUISD::DIV_SCALE: {
63909467b48Spatrick SelectDIV_SCALE(N);
64009467b48Spatrick return;
64109467b48Spatrick }
64209467b48Spatrick case AMDGPUISD::MAD_I64_I32:
64309467b48Spatrick case AMDGPUISD::MAD_U64_U32: {
64409467b48Spatrick SelectMAD_64_32(N);
64509467b48Spatrick return;
64609467b48Spatrick }
647*d415bd75Srobert case ISD::SMUL_LOHI:
648*d415bd75Srobert case ISD::UMUL_LOHI:
649*d415bd75Srobert return SelectMUL_LOHI(N);
65009467b48Spatrick case ISD::CopyToReg: {
65109467b48Spatrick const SITargetLowering& Lowering =
65209467b48Spatrick *static_cast<const SITargetLowering*>(getTargetLowering());
65309467b48Spatrick N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
65409467b48Spatrick break;
65509467b48Spatrick }
65609467b48Spatrick case ISD::AND:
65709467b48Spatrick case ISD::SRL:
65809467b48Spatrick case ISD::SRA:
65909467b48Spatrick case ISD::SIGN_EXTEND_INREG:
66009467b48Spatrick if (N->getValueType(0) != MVT::i32)
66109467b48Spatrick break;
66209467b48Spatrick
66309467b48Spatrick SelectS_BFE(N);
66409467b48Spatrick return;
66509467b48Spatrick case ISD::BRCOND:
66609467b48Spatrick SelectBRCOND(N);
66709467b48Spatrick return;
66809467b48Spatrick case ISD::FMAD:
66909467b48Spatrick case ISD::FMA:
67009467b48Spatrick SelectFMAD_FMA(N);
67109467b48Spatrick return;
67209467b48Spatrick case AMDGPUISD::CVT_PKRTZ_F16_F32:
67309467b48Spatrick case AMDGPUISD::CVT_PKNORM_I16_F32:
67409467b48Spatrick case AMDGPUISD::CVT_PKNORM_U16_F32:
67509467b48Spatrick case AMDGPUISD::CVT_PK_U16_U32:
67609467b48Spatrick case AMDGPUISD::CVT_PK_I16_I32: {
67709467b48Spatrick // Hack around using a legal type if f16 is illegal.
67809467b48Spatrick if (N->getValueType(0) == MVT::i32) {
67909467b48Spatrick MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
68009467b48Spatrick N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
68109467b48Spatrick { N->getOperand(0), N->getOperand(1) });
68209467b48Spatrick SelectCode(N);
68309467b48Spatrick return;
68409467b48Spatrick }
68509467b48Spatrick
68609467b48Spatrick break;
68709467b48Spatrick }
68809467b48Spatrick case ISD::INTRINSIC_W_CHAIN: {
68909467b48Spatrick SelectINTRINSIC_W_CHAIN(N);
69009467b48Spatrick return;
69109467b48Spatrick }
69209467b48Spatrick case ISD::INTRINSIC_WO_CHAIN: {
69309467b48Spatrick SelectINTRINSIC_WO_CHAIN(N);
69409467b48Spatrick return;
69509467b48Spatrick }
69609467b48Spatrick case ISD::INTRINSIC_VOID: {
69709467b48Spatrick SelectINTRINSIC_VOID(N);
69809467b48Spatrick return;
69909467b48Spatrick }
70009467b48Spatrick }
70109467b48Spatrick
70209467b48Spatrick SelectCode(N);
70309467b48Spatrick }
70409467b48Spatrick
isUniformBr(const SDNode * N) const70509467b48Spatrick bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
70609467b48Spatrick const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
70709467b48Spatrick const Instruction *Term = BB->getTerminator();
70809467b48Spatrick return Term->getMetadata("amdgpu.uniform") ||
70909467b48Spatrick Term->getMetadata("structurizecfg.uniform");
71009467b48Spatrick }
71109467b48Spatrick
isUnneededShiftMask(const SDNode * N,unsigned ShAmtBits) const712*d415bd75Srobert bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
713*d415bd75Srobert unsigned ShAmtBits) const {
714*d415bd75Srobert assert(N->getOpcode() == ISD::AND);
715*d415bd75Srobert
716*d415bd75Srobert const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
717*d415bd75Srobert if (RHS.countTrailingOnes() >= ShAmtBits)
718*d415bd75Srobert return true;
719*d415bd75Srobert
720*d415bd75Srobert const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
721*d415bd75Srobert return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits;
722*d415bd75Srobert }
723*d415bd75Srobert
getBaseWithOffsetUsingSplitOR(SelectionDAG & DAG,SDValue Addr,SDValue & N0,SDValue & N1)72473471bf0Spatrick static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
72573471bf0Spatrick SDValue &N0, SDValue &N1) {
72673471bf0Spatrick if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
72773471bf0Spatrick Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
72873471bf0Spatrick // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
72973471bf0Spatrick // (i64 (bitcast (v2i32 (build_vector
73073471bf0Spatrick // (or (extract_vector_elt V, 0), OFFSET),
73173471bf0Spatrick // (extract_vector_elt V, 1)))))
73273471bf0Spatrick SDValue Lo = Addr.getOperand(0).getOperand(0);
73373471bf0Spatrick if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
73473471bf0Spatrick SDValue BaseLo = Lo.getOperand(0);
73573471bf0Spatrick SDValue BaseHi = Addr.getOperand(0).getOperand(1);
73673471bf0Spatrick // Check that split base (Lo and Hi) are extracted from the same one.
73773471bf0Spatrick if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
73873471bf0Spatrick BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
73973471bf0Spatrick BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
74073471bf0Spatrick // Lo is statically extracted from index 0.
74173471bf0Spatrick isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
74273471bf0Spatrick BaseLo.getConstantOperandVal(1) == 0 &&
74373471bf0Spatrick // Hi is statically extracted from index 0.
74473471bf0Spatrick isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
74573471bf0Spatrick BaseHi.getConstantOperandVal(1) == 1) {
74673471bf0Spatrick N0 = BaseLo.getOperand(0).getOperand(0);
74773471bf0Spatrick N1 = Lo.getOperand(1);
74873471bf0Spatrick return true;
74973471bf0Spatrick }
75073471bf0Spatrick }
75173471bf0Spatrick }
75273471bf0Spatrick return false;
75373471bf0Spatrick }
75473471bf0Spatrick
isBaseWithConstantOffset64(SDValue Addr,SDValue & LHS,SDValue & RHS) const75573471bf0Spatrick bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
75673471bf0Spatrick SDValue &RHS) const {
75773471bf0Spatrick if (CurDAG->isBaseWithConstantOffset(Addr)) {
75873471bf0Spatrick LHS = Addr.getOperand(0);
75973471bf0Spatrick RHS = Addr.getOperand(1);
76073471bf0Spatrick return true;
76173471bf0Spatrick }
76273471bf0Spatrick
76373471bf0Spatrick if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
76473471bf0Spatrick assert(LHS && RHS && isa<ConstantSDNode>(RHS));
76573471bf0Spatrick return true;
76673471bf0Spatrick }
76773471bf0Spatrick
76873471bf0Spatrick return false;
76973471bf0Spatrick }
77073471bf0Spatrick
getPassName() const77109467b48Spatrick StringRef AMDGPUDAGToDAGISel::getPassName() const {
77209467b48Spatrick return "AMDGPU DAG->DAG Pattern Instruction Selection";
77309467b48Spatrick }
77409467b48Spatrick
77509467b48Spatrick //===----------------------------------------------------------------------===//
77609467b48Spatrick // Complex Patterns
77709467b48Spatrick //===----------------------------------------------------------------------===//
77809467b48Spatrick
SelectADDRVTX_READ(SDValue Addr,SDValue & Base,SDValue & Offset)77909467b48Spatrick bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
78009467b48Spatrick SDValue &Offset) {
78109467b48Spatrick return false;
78209467b48Spatrick }
78309467b48Spatrick
SelectADDRIndirect(SDValue Addr,SDValue & Base,SDValue & Offset)78409467b48Spatrick bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
78509467b48Spatrick SDValue &Offset) {
78609467b48Spatrick ConstantSDNode *C;
78709467b48Spatrick SDLoc DL(Addr);
78809467b48Spatrick
78909467b48Spatrick if ((C = dyn_cast<ConstantSDNode>(Addr))) {
79009467b48Spatrick Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
79109467b48Spatrick Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
79209467b48Spatrick } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
79309467b48Spatrick (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
79409467b48Spatrick Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
79509467b48Spatrick Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
79609467b48Spatrick } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
79709467b48Spatrick (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
79809467b48Spatrick Base = Addr.getOperand(0);
79909467b48Spatrick Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
80009467b48Spatrick } else {
80109467b48Spatrick Base = Addr;
80209467b48Spatrick Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
80309467b48Spatrick }
80409467b48Spatrick
80509467b48Spatrick return true;
80609467b48Spatrick }
80709467b48Spatrick
getMaterializedScalarImm32(int64_t Val,const SDLoc & DL) const80809467b48Spatrick SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
80909467b48Spatrick const SDLoc &DL) const {
81009467b48Spatrick SDNode *Mov = CurDAG->getMachineNode(
81109467b48Spatrick AMDGPU::S_MOV_B32, DL, MVT::i32,
81209467b48Spatrick CurDAG->getTargetConstant(Val, DL, MVT::i32));
81309467b48Spatrick return SDValue(Mov, 0);
81409467b48Spatrick }
81509467b48Spatrick
81609467b48Spatrick // FIXME: Should only handle addcarry/subcarry
SelectADD_SUB_I64(SDNode * N)81709467b48Spatrick void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
81809467b48Spatrick SDLoc DL(N);
81909467b48Spatrick SDValue LHS = N->getOperand(0);
82009467b48Spatrick SDValue RHS = N->getOperand(1);
82109467b48Spatrick
82209467b48Spatrick unsigned Opcode = N->getOpcode();
82309467b48Spatrick bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
82409467b48Spatrick bool ProduceCarry =
82509467b48Spatrick ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
82609467b48Spatrick bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
82709467b48Spatrick
82809467b48Spatrick SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
82909467b48Spatrick SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
83009467b48Spatrick
83109467b48Spatrick SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
83209467b48Spatrick DL, MVT::i32, LHS, Sub0);
83309467b48Spatrick SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
83409467b48Spatrick DL, MVT::i32, LHS, Sub1);
83509467b48Spatrick
83609467b48Spatrick SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
83709467b48Spatrick DL, MVT::i32, RHS, Sub0);
83809467b48Spatrick SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
83909467b48Spatrick DL, MVT::i32, RHS, Sub1);
84009467b48Spatrick
84109467b48Spatrick SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
84209467b48Spatrick
843097a140dSpatrick static const unsigned OpcMap[2][2][2] = {
844097a140dSpatrick {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
84573471bf0Spatrick {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
846097a140dSpatrick {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
847097a140dSpatrick {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
848097a140dSpatrick
849097a140dSpatrick unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
850097a140dSpatrick unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
85109467b48Spatrick
85209467b48Spatrick SDNode *AddLo;
85309467b48Spatrick if (!ConsumeCarry) {
85409467b48Spatrick SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
85509467b48Spatrick AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
85609467b48Spatrick } else {
85709467b48Spatrick SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
85809467b48Spatrick AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
85909467b48Spatrick }
86009467b48Spatrick SDValue AddHiArgs[] = {
86109467b48Spatrick SDValue(Hi0, 0),
86209467b48Spatrick SDValue(Hi1, 0),
86309467b48Spatrick SDValue(AddLo, 1)
86409467b48Spatrick };
86509467b48Spatrick SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
86609467b48Spatrick
86709467b48Spatrick SDValue RegSequenceArgs[] = {
86809467b48Spatrick CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
86909467b48Spatrick SDValue(AddLo,0),
87009467b48Spatrick Sub0,
87109467b48Spatrick SDValue(AddHi,0),
87209467b48Spatrick Sub1,
87309467b48Spatrick };
87409467b48Spatrick SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
87509467b48Spatrick MVT::i64, RegSequenceArgs);
87609467b48Spatrick
87709467b48Spatrick if (ProduceCarry) {
87809467b48Spatrick // Replace the carry-use
87909467b48Spatrick ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
88009467b48Spatrick }
88109467b48Spatrick
88209467b48Spatrick // Replace the remaining uses.
88309467b48Spatrick ReplaceNode(N, RegSequence);
88409467b48Spatrick }
88509467b48Spatrick
SelectAddcSubb(SDNode * N)88609467b48Spatrick void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
88709467b48Spatrick SDLoc DL(N);
88809467b48Spatrick SDValue LHS = N->getOperand(0);
88909467b48Spatrick SDValue RHS = N->getOperand(1);
89009467b48Spatrick SDValue CI = N->getOperand(2);
89109467b48Spatrick
892097a140dSpatrick if (N->isDivergent()) {
89309467b48Spatrick unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
89409467b48Spatrick : AMDGPU::V_SUBB_U32_e64;
89509467b48Spatrick CurDAG->SelectNodeTo(
89609467b48Spatrick N, Opc, N->getVTList(),
897097a140dSpatrick {LHS, RHS, CI,
898097a140dSpatrick CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
899097a140dSpatrick } else {
900097a140dSpatrick unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
901097a140dSpatrick : AMDGPU::S_SUB_CO_PSEUDO;
902097a140dSpatrick CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
903097a140dSpatrick }
90409467b48Spatrick }
90509467b48Spatrick
SelectUADDO_USUBO(SDNode * N)90609467b48Spatrick void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
90709467b48Spatrick // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
90809467b48Spatrick // carry out despite the _i32 name. These were renamed in VI to _U32.
90909467b48Spatrick // FIXME: We should probably rename the opcodes here.
910097a140dSpatrick bool IsAdd = N->getOpcode() == ISD::UADDO;
911097a140dSpatrick bool IsVALU = N->isDivergent();
912097a140dSpatrick
913097a140dSpatrick for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
914097a140dSpatrick ++UI)
915097a140dSpatrick if (UI.getUse().getResNo() == 1) {
916097a140dSpatrick if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
917097a140dSpatrick (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
918097a140dSpatrick IsVALU = true;
919097a140dSpatrick break;
920097a140dSpatrick }
921097a140dSpatrick }
922097a140dSpatrick
923097a140dSpatrick if (IsVALU) {
92473471bf0Spatrick unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
92509467b48Spatrick
92609467b48Spatrick CurDAG->SelectNodeTo(
92709467b48Spatrick N, Opc, N->getVTList(),
92809467b48Spatrick {N->getOperand(0), N->getOperand(1),
92909467b48Spatrick CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
930097a140dSpatrick } else {
931097a140dSpatrick unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
932097a140dSpatrick : AMDGPU::S_USUBO_PSEUDO;
933097a140dSpatrick
934097a140dSpatrick CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
935097a140dSpatrick {N->getOperand(0), N->getOperand(1)});
936097a140dSpatrick }
93709467b48Spatrick }
93809467b48Spatrick
SelectFMA_W_CHAIN(SDNode * N)93909467b48Spatrick void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
94009467b48Spatrick SDLoc SL(N);
94109467b48Spatrick // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
94209467b48Spatrick SDValue Ops[10];
94309467b48Spatrick
94409467b48Spatrick SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
94509467b48Spatrick SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
94609467b48Spatrick SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
94709467b48Spatrick Ops[8] = N->getOperand(0);
94809467b48Spatrick Ops[9] = N->getOperand(4);
94909467b48Spatrick
950*d415bd75Srobert // If there are no source modifiers, prefer fmac over fma because it can use
951*d415bd75Srobert // the smaller VOP2 encoding.
952*d415bd75Srobert bool UseFMAC = Subtarget->hasDLInsts() &&
953*d415bd75Srobert cast<ConstantSDNode>(Ops[0])->isZero() &&
954*d415bd75Srobert cast<ConstantSDNode>(Ops[2])->isZero() &&
955*d415bd75Srobert cast<ConstantSDNode>(Ops[4])->isZero();
956*d415bd75Srobert unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
957*d415bd75Srobert CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
95809467b48Spatrick }
95909467b48Spatrick
SelectFMUL_W_CHAIN(SDNode * N)96009467b48Spatrick void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
96109467b48Spatrick SDLoc SL(N);
96209467b48Spatrick // src0_modifiers, src0, src1_modifiers, src1, clamp, omod
96309467b48Spatrick SDValue Ops[8];
96409467b48Spatrick
96509467b48Spatrick SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
96609467b48Spatrick SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
96709467b48Spatrick Ops[6] = N->getOperand(0);
96809467b48Spatrick Ops[7] = N->getOperand(3);
96909467b48Spatrick
97009467b48Spatrick CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
97109467b48Spatrick }
97209467b48Spatrick
97309467b48Spatrick // We need to handle this here because tablegen doesn't support matching
97409467b48Spatrick // instructions with multiple outputs.
SelectDIV_SCALE(SDNode * N)97509467b48Spatrick void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
97609467b48Spatrick SDLoc SL(N);
97709467b48Spatrick EVT VT = N->getValueType(0);
97809467b48Spatrick
97909467b48Spatrick assert(VT == MVT::f32 || VT == MVT::f64);
98009467b48Spatrick
98109467b48Spatrick unsigned Opc
98273471bf0Spatrick = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
98309467b48Spatrick
98473471bf0Spatrick // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
98573471bf0Spatrick // omod
98673471bf0Spatrick SDValue Ops[8];
98773471bf0Spatrick SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
98873471bf0Spatrick SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
98973471bf0Spatrick SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
99009467b48Spatrick CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
99109467b48Spatrick }
99209467b48Spatrick
99309467b48Spatrick // We need to handle this here because tablegen doesn't support matching
99409467b48Spatrick // instructions with multiple outputs.
SelectMAD_64_32(SDNode * N)99509467b48Spatrick void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
99609467b48Spatrick SDLoc SL(N);
99709467b48Spatrick bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
998*d415bd75Srobert unsigned Opc;
999*d415bd75Srobert if (Subtarget->hasMADIntraFwdBug())
1000*d415bd75Srobert Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1001*d415bd75Srobert : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1002*d415bd75Srobert else
1003*d415bd75Srobert Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
100409467b48Spatrick
100509467b48Spatrick SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
100609467b48Spatrick SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
100709467b48Spatrick Clamp };
100809467b48Spatrick CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
100909467b48Spatrick }
101009467b48Spatrick
1011*d415bd75Srobert // We need to handle this here because tablegen doesn't support matching
1012*d415bd75Srobert // instructions with multiple outputs.
SelectMUL_LOHI(SDNode * N)1013*d415bd75Srobert void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
1014*d415bd75Srobert SDLoc SL(N);
1015*d415bd75Srobert bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
1016*d415bd75Srobert unsigned Opc;
1017*d415bd75Srobert if (Subtarget->hasMADIntraFwdBug())
1018*d415bd75Srobert Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1019*d415bd75Srobert : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1020*d415bd75Srobert else
1021*d415bd75Srobert Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1022*d415bd75Srobert
1023*d415bd75Srobert SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
1024*d415bd75Srobert SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1025*d415bd75Srobert SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
1026*d415bd75Srobert SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
1027*d415bd75Srobert if (!SDValue(N, 0).use_empty()) {
1028*d415bd75Srobert SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1029*d415bd75Srobert SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1030*d415bd75Srobert MVT::i32, SDValue(Mad, 0), Sub0);
1031*d415bd75Srobert ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
1032*d415bd75Srobert }
1033*d415bd75Srobert if (!SDValue(N, 1).use_empty()) {
1034*d415bd75Srobert SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1035*d415bd75Srobert SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1036*d415bd75Srobert MVT::i32, SDValue(Mad, 0), Sub1);
1037*d415bd75Srobert ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
1038*d415bd75Srobert }
1039*d415bd75Srobert CurDAG->RemoveDeadNode(N);
1040*d415bd75Srobert }
1041*d415bd75Srobert
isDSOffsetLegal(SDValue Base,unsigned Offset) const104273471bf0Spatrick bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
104373471bf0Spatrick if (!isUInt<16>(Offset))
104409467b48Spatrick return false;
104509467b48Spatrick
104673471bf0Spatrick if (!Base || Subtarget->hasUsableDSOffset() ||
104709467b48Spatrick Subtarget->unsafeDSOffsetFoldingEnabled())
104809467b48Spatrick return true;
104909467b48Spatrick
105009467b48Spatrick // On Southern Islands instruction with a negative base value and an offset
105109467b48Spatrick // don't seem to work.
105209467b48Spatrick return CurDAG->SignBitIsZero(Base);
105309467b48Spatrick }
105409467b48Spatrick
SelectDS1Addr1Offset(SDValue Addr,SDValue & Base,SDValue & Offset) const105509467b48Spatrick bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
105609467b48Spatrick SDValue &Offset) const {
105709467b48Spatrick SDLoc DL(Addr);
105809467b48Spatrick if (CurDAG->isBaseWithConstantOffset(Addr)) {
105909467b48Spatrick SDValue N0 = Addr.getOperand(0);
106009467b48Spatrick SDValue N1 = Addr.getOperand(1);
106109467b48Spatrick ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
106273471bf0Spatrick if (isDSOffsetLegal(N0, C1->getSExtValue())) {
106309467b48Spatrick // (add n0, c0)
106409467b48Spatrick Base = N0;
106509467b48Spatrick Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
106609467b48Spatrick return true;
106709467b48Spatrick }
106809467b48Spatrick } else if (Addr.getOpcode() == ISD::SUB) {
106909467b48Spatrick // sub C, x -> add (sub 0, x), C
107009467b48Spatrick if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
107109467b48Spatrick int64_t ByteOffset = C->getSExtValue();
107273471bf0Spatrick if (isDSOffsetLegal(SDValue(), ByteOffset)) {
107309467b48Spatrick SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
107409467b48Spatrick
107509467b48Spatrick // XXX - This is kind of hacky. Create a dummy sub node so we can check
107609467b48Spatrick // the known bits in isDSOffsetLegal. We need to emit the selected node
107709467b48Spatrick // here, so this is thrown away.
107809467b48Spatrick SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
107909467b48Spatrick Zero, Addr.getOperand(1));
108009467b48Spatrick
108173471bf0Spatrick if (isDSOffsetLegal(Sub, ByteOffset)) {
108209467b48Spatrick SmallVector<SDValue, 3> Opnds;
108309467b48Spatrick Opnds.push_back(Zero);
108409467b48Spatrick Opnds.push_back(Addr.getOperand(1));
108509467b48Spatrick
108609467b48Spatrick // FIXME: Select to VOP3 version for with-carry.
108773471bf0Spatrick unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
108809467b48Spatrick if (Subtarget->hasAddNoCarry()) {
108909467b48Spatrick SubOp = AMDGPU::V_SUB_U32_e64;
109009467b48Spatrick Opnds.push_back(
109109467b48Spatrick CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
109209467b48Spatrick }
109309467b48Spatrick
109409467b48Spatrick MachineSDNode *MachineSub =
109509467b48Spatrick CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
109609467b48Spatrick
109709467b48Spatrick Base = SDValue(MachineSub, 0);
109809467b48Spatrick Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
109909467b48Spatrick return true;
110009467b48Spatrick }
110109467b48Spatrick }
110209467b48Spatrick }
110309467b48Spatrick } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
110409467b48Spatrick // If we have a constant address, prefer to put the constant into the
110509467b48Spatrick // offset. This can save moves to load the constant address since multiple
110609467b48Spatrick // operations can share the zero base address register, and enables merging
110709467b48Spatrick // into read2 / write2 instructions.
110809467b48Spatrick
110909467b48Spatrick SDLoc DL(Addr);
111009467b48Spatrick
111173471bf0Spatrick if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
111209467b48Spatrick SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
111309467b48Spatrick MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
111409467b48Spatrick DL, MVT::i32, Zero);
111509467b48Spatrick Base = SDValue(MovZero, 0);
111609467b48Spatrick Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
111709467b48Spatrick return true;
111809467b48Spatrick }
111909467b48Spatrick }
112009467b48Spatrick
112109467b48Spatrick // default case
112209467b48Spatrick Base = Addr;
112309467b48Spatrick Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
112409467b48Spatrick return true;
112509467b48Spatrick }
112609467b48Spatrick
isDSOffset2Legal(SDValue Base,unsigned Offset0,unsigned Offset1,unsigned Size) const112773471bf0Spatrick bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
112873471bf0Spatrick unsigned Offset1,
112973471bf0Spatrick unsigned Size) const {
113073471bf0Spatrick if (Offset0 % Size != 0 || Offset1 % Size != 0)
113173471bf0Spatrick return false;
113273471bf0Spatrick if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
113373471bf0Spatrick return false;
113473471bf0Spatrick
113573471bf0Spatrick if (!Base || Subtarget->hasUsableDSOffset() ||
113673471bf0Spatrick Subtarget->unsafeDSOffsetFoldingEnabled())
113773471bf0Spatrick return true;
113873471bf0Spatrick
113973471bf0Spatrick // On Southern Islands instruction with a negative base value and an offset
114073471bf0Spatrick // don't seem to work.
114173471bf0Spatrick return CurDAG->SignBitIsZero(Base);
114273471bf0Spatrick }
114373471bf0Spatrick
114409467b48Spatrick // TODO: If offset is too big, put low 16-bit into offset.
SelectDS64Bit4ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const114509467b48Spatrick bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
114609467b48Spatrick SDValue &Offset0,
114709467b48Spatrick SDValue &Offset1) const {
114873471bf0Spatrick return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
114973471bf0Spatrick }
115073471bf0Spatrick
SelectDS128Bit8ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const115173471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
115273471bf0Spatrick SDValue &Offset0,
115373471bf0Spatrick SDValue &Offset1) const {
115473471bf0Spatrick return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
115573471bf0Spatrick }
115673471bf0Spatrick
SelectDSReadWrite2(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1,unsigned Size) const115773471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
115873471bf0Spatrick SDValue &Offset0, SDValue &Offset1,
115973471bf0Spatrick unsigned Size) const {
116009467b48Spatrick SDLoc DL(Addr);
116109467b48Spatrick
116209467b48Spatrick if (CurDAG->isBaseWithConstantOffset(Addr)) {
116309467b48Spatrick SDValue N0 = Addr.getOperand(0);
116409467b48Spatrick SDValue N1 = Addr.getOperand(1);
116509467b48Spatrick ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
116673471bf0Spatrick unsigned OffsetValue0 = C1->getZExtValue();
116773471bf0Spatrick unsigned OffsetValue1 = OffsetValue0 + Size;
116873471bf0Spatrick
116909467b48Spatrick // (add n0, c0)
117073471bf0Spatrick if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
117109467b48Spatrick Base = N0;
117273471bf0Spatrick Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
117373471bf0Spatrick Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
117409467b48Spatrick return true;
117509467b48Spatrick }
117609467b48Spatrick } else if (Addr.getOpcode() == ISD::SUB) {
117709467b48Spatrick // sub C, x -> add (sub 0, x), C
117873471bf0Spatrick if (const ConstantSDNode *C =
117973471bf0Spatrick dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
118073471bf0Spatrick unsigned OffsetValue0 = C->getZExtValue();
118173471bf0Spatrick unsigned OffsetValue1 = OffsetValue0 + Size;
118209467b48Spatrick
118373471bf0Spatrick if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
118409467b48Spatrick SDLoc DL(Addr);
118509467b48Spatrick SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
118609467b48Spatrick
118709467b48Spatrick // XXX - This is kind of hacky. Create a dummy sub node so we can check
118809467b48Spatrick // the known bits in isDSOffsetLegal. We need to emit the selected node
118909467b48Spatrick // here, so this is thrown away.
119073471bf0Spatrick SDValue Sub =
119173471bf0Spatrick CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
119209467b48Spatrick
119373471bf0Spatrick if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
119409467b48Spatrick SmallVector<SDValue, 3> Opnds;
119509467b48Spatrick Opnds.push_back(Zero);
119609467b48Spatrick Opnds.push_back(Addr.getOperand(1));
119773471bf0Spatrick unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
119809467b48Spatrick if (Subtarget->hasAddNoCarry()) {
119909467b48Spatrick SubOp = AMDGPU::V_SUB_U32_e64;
120009467b48Spatrick Opnds.push_back(
120109467b48Spatrick CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
120209467b48Spatrick }
120309467b48Spatrick
120473471bf0Spatrick MachineSDNode *MachineSub = CurDAG->getMachineNode(
120573471bf0Spatrick SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
120609467b48Spatrick
120709467b48Spatrick Base = SDValue(MachineSub, 0);
120873471bf0Spatrick Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
120973471bf0Spatrick Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
121009467b48Spatrick return true;
121109467b48Spatrick }
121209467b48Spatrick }
121309467b48Spatrick }
121409467b48Spatrick } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
121573471bf0Spatrick unsigned OffsetValue0 = CAddr->getZExtValue();
121673471bf0Spatrick unsigned OffsetValue1 = OffsetValue0 + Size;
121709467b48Spatrick
121873471bf0Spatrick if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
121909467b48Spatrick SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
122073471bf0Spatrick MachineSDNode *MovZero =
122173471bf0Spatrick CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
122209467b48Spatrick Base = SDValue(MovZero, 0);
122373471bf0Spatrick Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
122473471bf0Spatrick Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
122509467b48Spatrick return true;
122609467b48Spatrick }
122709467b48Spatrick }
122809467b48Spatrick
122909467b48Spatrick // default case
123009467b48Spatrick
123109467b48Spatrick Base = Addr;
123209467b48Spatrick Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
123309467b48Spatrick Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
123409467b48Spatrick return true;
123509467b48Spatrick }
123609467b48Spatrick
SelectMUBUF(SDValue Addr,SDValue & Ptr,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & Offen,SDValue & Idxen,SDValue & Addr64) const123773471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
123873471bf0Spatrick SDValue &SOffset, SDValue &Offset,
123973471bf0Spatrick SDValue &Offen, SDValue &Idxen,
124073471bf0Spatrick SDValue &Addr64) const {
124109467b48Spatrick // Subtarget prefers to use flat instruction
1242097a140dSpatrick // FIXME: This should be a pattern predicate and not reach here
124309467b48Spatrick if (Subtarget->useFlatForGlobal())
124409467b48Spatrick return false;
124509467b48Spatrick
124609467b48Spatrick SDLoc DL(Addr);
124709467b48Spatrick
124809467b48Spatrick Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
124909467b48Spatrick Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
125009467b48Spatrick Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
125109467b48Spatrick SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
125209467b48Spatrick
125309467b48Spatrick ConstantSDNode *C1 = nullptr;
125409467b48Spatrick SDValue N0 = Addr;
125509467b48Spatrick if (CurDAG->isBaseWithConstantOffset(Addr)) {
125609467b48Spatrick C1 = cast<ConstantSDNode>(Addr.getOperand(1));
125709467b48Spatrick if (isUInt<32>(C1->getZExtValue()))
125809467b48Spatrick N0 = Addr.getOperand(0);
125909467b48Spatrick else
126009467b48Spatrick C1 = nullptr;
126109467b48Spatrick }
126209467b48Spatrick
126309467b48Spatrick if (N0.getOpcode() == ISD::ADD) {
126409467b48Spatrick // (add N2, N3) -> addr64, or
126509467b48Spatrick // (add (add N2, N3), C1) -> addr64
126609467b48Spatrick SDValue N2 = N0.getOperand(0);
126709467b48Spatrick SDValue N3 = N0.getOperand(1);
126809467b48Spatrick Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
126909467b48Spatrick
127009467b48Spatrick if (N2->isDivergent()) {
127109467b48Spatrick if (N3->isDivergent()) {
127209467b48Spatrick // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
127309467b48Spatrick // addr64, and construct the resource from a 0 address.
127409467b48Spatrick Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
127509467b48Spatrick VAddr = N0;
127609467b48Spatrick } else {
127709467b48Spatrick // N2 is divergent, N3 is not.
127809467b48Spatrick Ptr = N3;
127909467b48Spatrick VAddr = N2;
128009467b48Spatrick }
128109467b48Spatrick } else {
128209467b48Spatrick // N2 is not divergent.
128309467b48Spatrick Ptr = N2;
128409467b48Spatrick VAddr = N3;
128509467b48Spatrick }
128609467b48Spatrick Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
128709467b48Spatrick } else if (N0->isDivergent()) {
128809467b48Spatrick // N0 is divergent. Use it as the addr64, and construct the resource from a
128909467b48Spatrick // 0 address.
129009467b48Spatrick Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
129109467b48Spatrick VAddr = N0;
129209467b48Spatrick Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
129309467b48Spatrick } else {
129409467b48Spatrick // N0 -> offset, or
129509467b48Spatrick // (N0 + C1) -> offset
129609467b48Spatrick VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
129709467b48Spatrick Ptr = N0;
129809467b48Spatrick }
129909467b48Spatrick
130009467b48Spatrick if (!C1) {
130109467b48Spatrick // No offset.
130209467b48Spatrick Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
130309467b48Spatrick return true;
130409467b48Spatrick }
130509467b48Spatrick
130609467b48Spatrick if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
130709467b48Spatrick // Legal offset for instruction.
130809467b48Spatrick Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
130909467b48Spatrick return true;
131009467b48Spatrick }
131109467b48Spatrick
131209467b48Spatrick // Illegal offset, store it in soffset.
131309467b48Spatrick Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
131409467b48Spatrick SOffset =
131509467b48Spatrick SDValue(CurDAG->getMachineNode(
131609467b48Spatrick AMDGPU::S_MOV_B32, DL, MVT::i32,
131709467b48Spatrick CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
131809467b48Spatrick 0);
131909467b48Spatrick return true;
132009467b48Spatrick }
132109467b48Spatrick
SelectMUBUFAddr64(SDValue Addr,SDValue & SRsrc,SDValue & VAddr,SDValue & SOffset,SDValue & Offset) const132209467b48Spatrick bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
132309467b48Spatrick SDValue &VAddr, SDValue &SOffset,
132473471bf0Spatrick SDValue &Offset) const {
132509467b48Spatrick SDValue Ptr, Offen, Idxen, Addr64;
132609467b48Spatrick
132709467b48Spatrick // addr64 bit was removed for volcanic islands.
1328097a140dSpatrick // FIXME: This should be a pattern predicate and not reach here
132909467b48Spatrick if (!Subtarget->hasAddr64())
133009467b48Spatrick return false;
133109467b48Spatrick
133273471bf0Spatrick if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
133309467b48Spatrick return false;
133409467b48Spatrick
133509467b48Spatrick ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
133609467b48Spatrick if (C->getSExtValue()) {
133709467b48Spatrick SDLoc DL(Addr);
133809467b48Spatrick
133909467b48Spatrick const SITargetLowering& Lowering =
134009467b48Spatrick *static_cast<const SITargetLowering*>(getTargetLowering());
134109467b48Spatrick
134209467b48Spatrick SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
134309467b48Spatrick return true;
134409467b48Spatrick }
134509467b48Spatrick
134609467b48Spatrick return false;
134709467b48Spatrick }
134809467b48Spatrick
foldFrameIndex(SDValue N) const134909467b48Spatrick std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1350097a140dSpatrick SDLoc DL(N);
135109467b48Spatrick
135273471bf0Spatrick auto *FI = dyn_cast<FrameIndexSDNode>(N);
135373471bf0Spatrick SDValue TFI =
135473471bf0Spatrick FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
135509467b48Spatrick
135673471bf0Spatrick // We rebase the base address into an absolute stack address and hence
135773471bf0Spatrick // use constant 0 for soffset. This value must be retained until
135873471bf0Spatrick // frame elimination and eliminateFrameIndex will choose the appropriate
135973471bf0Spatrick // frame register if need be.
1360*d415bd75Srobert return std::pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
136109467b48Spatrick }
136209467b48Spatrick
SelectMUBUFScratchOffen(SDNode * Parent,SDValue Addr,SDValue & Rsrc,SDValue & VAddr,SDValue & SOffset,SDValue & ImmOffset) const136309467b48Spatrick bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
136409467b48Spatrick SDValue Addr, SDValue &Rsrc,
136509467b48Spatrick SDValue &VAddr, SDValue &SOffset,
136609467b48Spatrick SDValue &ImmOffset) const {
136709467b48Spatrick
136809467b48Spatrick SDLoc DL(Addr);
136909467b48Spatrick MachineFunction &MF = CurDAG->getMachineFunction();
137009467b48Spatrick const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
137109467b48Spatrick
137209467b48Spatrick Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
137309467b48Spatrick
137409467b48Spatrick if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1375097a140dSpatrick int64_t Imm = CAddr->getSExtValue();
1376097a140dSpatrick const int64_t NullPtr =
1377097a140dSpatrick AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1378097a140dSpatrick // Don't fold null pointer.
1379097a140dSpatrick if (Imm != NullPtr) {
138009467b48Spatrick SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1381097a140dSpatrick MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1382097a140dSpatrick AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
138309467b48Spatrick VAddr = SDValue(MovHighBits, 0);
138409467b48Spatrick
138573471bf0Spatrick SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
138609467b48Spatrick ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
138709467b48Spatrick return true;
138809467b48Spatrick }
1389097a140dSpatrick }
139009467b48Spatrick
139109467b48Spatrick if (CurDAG->isBaseWithConstantOffset(Addr)) {
139209467b48Spatrick // (add n0, c1)
139309467b48Spatrick
139409467b48Spatrick SDValue N0 = Addr.getOperand(0);
139509467b48Spatrick SDValue N1 = Addr.getOperand(1);
139609467b48Spatrick
139709467b48Spatrick // Offsets in vaddr must be positive if range checking is enabled.
139809467b48Spatrick //
139909467b48Spatrick // The total computation of vaddr + soffset + offset must not overflow. If
140009467b48Spatrick // vaddr is negative, even if offset is 0 the sgpr offset add will end up
140109467b48Spatrick // overflowing.
140209467b48Spatrick //
140309467b48Spatrick // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
140409467b48Spatrick // always perform a range check. If a negative vaddr base index was used,
140509467b48Spatrick // this would fail the range check. The overall address computation would
140609467b48Spatrick // compute a valid address, but this doesn't happen due to the range
140709467b48Spatrick // check. For out-of-bounds MUBUF loads, a 0 is returned.
140809467b48Spatrick //
140909467b48Spatrick // Therefore it should be safe to fold any VGPR offset on gfx9 into the
141009467b48Spatrick // MUBUF vaddr, but not on older subtargets which can only do this if the
141109467b48Spatrick // sign bit is known 0.
141209467b48Spatrick ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
141309467b48Spatrick if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
141409467b48Spatrick (!Subtarget->privateMemoryResourceIsRangeChecked() ||
141509467b48Spatrick CurDAG->SignBitIsZero(N0))) {
141609467b48Spatrick std::tie(VAddr, SOffset) = foldFrameIndex(N0);
141709467b48Spatrick ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
141809467b48Spatrick return true;
141909467b48Spatrick }
142009467b48Spatrick }
142109467b48Spatrick
142209467b48Spatrick // (node)
142309467b48Spatrick std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
142409467b48Spatrick ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
142509467b48Spatrick return true;
142609467b48Spatrick }
142709467b48Spatrick
IsCopyFromSGPR(const SIRegisterInfo & TRI,SDValue Val)142873471bf0Spatrick static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
142973471bf0Spatrick if (Val.getOpcode() != ISD::CopyFromReg)
143073471bf0Spatrick return false;
1431*d415bd75Srobert auto Reg = cast<RegisterSDNode>(Val.getOperand(1))->getReg();
1432*d415bd75Srobert if (!Reg.isPhysical())
1433*d415bd75Srobert return false;
1434*d415bd75Srobert auto RC = TRI.getPhysRegBaseClass(Reg);
143573471bf0Spatrick return RC && TRI.isSGPRClass(RC);
143673471bf0Spatrick }
143773471bf0Spatrick
SelectMUBUFScratchOffset(SDNode * Parent,SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const143809467b48Spatrick bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
143909467b48Spatrick SDValue Addr,
144009467b48Spatrick SDValue &SRsrc,
144109467b48Spatrick SDValue &SOffset,
144209467b48Spatrick SDValue &Offset) const {
144373471bf0Spatrick const SIRegisterInfo *TRI =
144473471bf0Spatrick static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
144509467b48Spatrick MachineFunction &MF = CurDAG->getMachineFunction();
144609467b48Spatrick const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
144773471bf0Spatrick SDLoc DL(Addr);
144873471bf0Spatrick
144973471bf0Spatrick // CopyFromReg <sgpr>
145073471bf0Spatrick if (IsCopyFromSGPR(*TRI, Addr)) {
145173471bf0Spatrick SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
145273471bf0Spatrick SOffset = Addr;
145373471bf0Spatrick Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
145473471bf0Spatrick return true;
145573471bf0Spatrick }
145673471bf0Spatrick
145773471bf0Spatrick ConstantSDNode *CAddr;
145873471bf0Spatrick if (Addr.getOpcode() == ISD::ADD) {
145973471bf0Spatrick // Add (CopyFromReg <sgpr>) <constant>
146073471bf0Spatrick CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
146173471bf0Spatrick if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
146273471bf0Spatrick return false;
146373471bf0Spatrick if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
146473471bf0Spatrick return false;
146573471bf0Spatrick
146673471bf0Spatrick SOffset = Addr.getOperand(0);
146773471bf0Spatrick } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
146873471bf0Spatrick SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
146973471bf0Spatrick // <constant>
147073471bf0Spatrick SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
147173471bf0Spatrick } else {
147273471bf0Spatrick return false;
147373471bf0Spatrick }
147409467b48Spatrick
147509467b48Spatrick SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
147609467b48Spatrick
147709467b48Spatrick Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
147809467b48Spatrick return true;
147909467b48Spatrick }
148009467b48Spatrick
SelectMUBUFOffset(SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const148109467b48Spatrick bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
148273471bf0Spatrick SDValue &SOffset, SDValue &Offset
148373471bf0Spatrick ) const {
148409467b48Spatrick SDValue Ptr, VAddr, Offen, Idxen, Addr64;
148509467b48Spatrick const SIInstrInfo *TII =
148609467b48Spatrick static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
148709467b48Spatrick
148873471bf0Spatrick if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
148909467b48Spatrick return false;
149009467b48Spatrick
149109467b48Spatrick if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
149209467b48Spatrick !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
149309467b48Spatrick !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
149409467b48Spatrick uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1495*d415bd75Srobert APInt::getAllOnes(32).getZExtValue(); // Size
149609467b48Spatrick SDLoc DL(Addr);
149709467b48Spatrick
149809467b48Spatrick const SITargetLowering& Lowering =
149909467b48Spatrick *static_cast<const SITargetLowering*>(getTargetLowering());
150009467b48Spatrick
150109467b48Spatrick SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
150209467b48Spatrick return true;
150309467b48Spatrick }
150409467b48Spatrick return false;
150509467b48Spatrick }
150609467b48Spatrick
150709467b48Spatrick // Find a load or store from corresponding pattern root.
150809467b48Spatrick // Roots may be build_vector, bitconvert or their combinations.
findMemSDNode(SDNode * N)150909467b48Spatrick static MemSDNode* findMemSDNode(SDNode *N) {
151009467b48Spatrick N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
151109467b48Spatrick if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
151209467b48Spatrick return MN;
151309467b48Spatrick assert(isa<BuildVectorSDNode>(N));
151409467b48Spatrick for (SDValue V : N->op_values())
151509467b48Spatrick if (MemSDNode *MN =
151609467b48Spatrick dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
151709467b48Spatrick return MN;
151809467b48Spatrick llvm_unreachable("cannot find MemSDNode in the pattern!");
151909467b48Spatrick }
152009467b48Spatrick
SelectFlatOffsetImpl(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset,uint64_t FlatVariant) const152173471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
152273471bf0Spatrick SDValue &VAddr, SDValue &Offset,
152373471bf0Spatrick uint64_t FlatVariant) const {
152409467b48Spatrick int64_t OffsetVal = 0;
152509467b48Spatrick
152673471bf0Spatrick unsigned AS = findMemSDNode(N)->getAddressSpace();
152773471bf0Spatrick
152873471bf0Spatrick bool CanHaveFlatSegmentOffsetBug =
152973471bf0Spatrick Subtarget->hasFlatSegmentOffsetBug() &&
153073471bf0Spatrick FlatVariant == SIInstrFlags::FLAT &&
153173471bf0Spatrick (AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS);
153273471bf0Spatrick
153373471bf0Spatrick if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1534097a140dSpatrick SDValue N0, N1;
153573471bf0Spatrick if (isBaseWithConstantOffset64(Addr, N0, N1)) {
153673471bf0Spatrick int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
153709467b48Spatrick
153809467b48Spatrick const SIInstrInfo *TII = Subtarget->getInstrInfo();
153973471bf0Spatrick if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
154009467b48Spatrick Addr = N0;
154109467b48Spatrick OffsetVal = COffsetVal;
154209467b48Spatrick } else {
154309467b48Spatrick // If the offset doesn't fit, put the low bits into the offset field and
154409467b48Spatrick // add the rest.
154573471bf0Spatrick //
154673471bf0Spatrick // For a FLAT instruction the hardware decides whether to access
154773471bf0Spatrick // global/scratch/shared memory based on the high bits of vaddr,
154873471bf0Spatrick // ignoring the offset field, so we have to ensure that when we add
154973471bf0Spatrick // remainder to vaddr it still points into the same underlying object.
155073471bf0Spatrick // The easiest way to do that is to make sure that we split the offset
155173471bf0Spatrick // into two pieces that are both >= 0 or both <= 0.
155209467b48Spatrick
155309467b48Spatrick SDLoc DL(N);
155473471bf0Spatrick uint64_t RemainderOffset;
155509467b48Spatrick
155673471bf0Spatrick std::tie(OffsetVal, RemainderOffset) =
155773471bf0Spatrick TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
155809467b48Spatrick
155973471bf0Spatrick SDValue AddOffsetLo =
156073471bf0Spatrick getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
156173471bf0Spatrick SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
156273471bf0Spatrick
156373471bf0Spatrick if (Addr.getValueType().getSizeInBits() == 32) {
156473471bf0Spatrick SmallVector<SDValue, 3> Opnds;
156573471bf0Spatrick Opnds.push_back(N0);
156673471bf0Spatrick Opnds.push_back(AddOffsetLo);
156773471bf0Spatrick unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
156873471bf0Spatrick if (Subtarget->hasAddNoCarry()) {
156973471bf0Spatrick AddOp = AMDGPU::V_ADD_U32_e64;
157073471bf0Spatrick Opnds.push_back(Clamp);
157109467b48Spatrick }
157273471bf0Spatrick Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
157309467b48Spatrick } else {
1574097a140dSpatrick // TODO: Should this try to use a scalar add pseudo if the base address
1575097a140dSpatrick // is uniform and saddr is usable?
157609467b48Spatrick SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
157709467b48Spatrick SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
157809467b48Spatrick
157973471bf0Spatrick SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
158073471bf0Spatrick DL, MVT::i32, N0, Sub0);
158173471bf0Spatrick SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
158273471bf0Spatrick DL, MVT::i32, N0, Sub1);
158309467b48Spatrick
1584097a140dSpatrick SDValue AddOffsetHi =
1585097a140dSpatrick getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
158609467b48Spatrick
158709467b48Spatrick SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
158809467b48Spatrick
1589097a140dSpatrick SDNode *Add =
159073471bf0Spatrick CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
159109467b48Spatrick {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
159209467b48Spatrick
159309467b48Spatrick SDNode *Addc = CurDAG->getMachineNode(
159409467b48Spatrick AMDGPU::V_ADDC_U32_e64, DL, VTs,
159509467b48Spatrick {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
159609467b48Spatrick
159709467b48Spatrick SDValue RegSequenceArgs[] = {
159809467b48Spatrick CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1599097a140dSpatrick SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
160009467b48Spatrick
160109467b48Spatrick Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1602097a140dSpatrick MVT::i64, RegSequenceArgs),
1603097a140dSpatrick 0);
1604097a140dSpatrick }
160509467b48Spatrick }
160609467b48Spatrick }
160773471bf0Spatrick }
160809467b48Spatrick
160909467b48Spatrick VAddr = Addr;
161009467b48Spatrick Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
161109467b48Spatrick return true;
161209467b48Spatrick }
161309467b48Spatrick
SelectFlatOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const161473471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
161509467b48Spatrick SDValue &VAddr,
161673471bf0Spatrick SDValue &Offset) const {
161773471bf0Spatrick return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
161809467b48Spatrick }
161909467b48Spatrick
SelectGlobalOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const162073471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
162109467b48Spatrick SDValue &VAddr,
162273471bf0Spatrick SDValue &Offset) const {
162373471bf0Spatrick return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
162473471bf0Spatrick }
162573471bf0Spatrick
SelectScratchOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const162673471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
162773471bf0Spatrick SDValue &VAddr,
162873471bf0Spatrick SDValue &Offset) const {
162973471bf0Spatrick return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
163073471bf0Spatrick SIInstrFlags::FlatScratch);
163173471bf0Spatrick }
163273471bf0Spatrick
163373471bf0Spatrick // If this matches zero_extend i32:x, return x
matchZExtFromI32(SDValue Op)163473471bf0Spatrick static SDValue matchZExtFromI32(SDValue Op) {
163573471bf0Spatrick if (Op.getOpcode() != ISD::ZERO_EXTEND)
163673471bf0Spatrick return SDValue();
163773471bf0Spatrick
163873471bf0Spatrick SDValue ExtSrc = Op.getOperand(0);
163973471bf0Spatrick return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
164073471bf0Spatrick }
164173471bf0Spatrick
164273471bf0Spatrick // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
SelectGlobalSAddr(SDNode * N,SDValue Addr,SDValue & SAddr,SDValue & VOffset,SDValue & Offset) const164373471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
164473471bf0Spatrick SDValue Addr,
164573471bf0Spatrick SDValue &SAddr,
164673471bf0Spatrick SDValue &VOffset,
164773471bf0Spatrick SDValue &Offset) const {
164873471bf0Spatrick int64_t ImmOffset = 0;
164973471bf0Spatrick
165073471bf0Spatrick // Match the immediate offset first, which canonically is moved as low as
165173471bf0Spatrick // possible.
165273471bf0Spatrick
165373471bf0Spatrick SDValue LHS, RHS;
165473471bf0Spatrick if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
165573471bf0Spatrick int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
165673471bf0Spatrick const SIInstrInfo *TII = Subtarget->getInstrInfo();
165773471bf0Spatrick
165873471bf0Spatrick if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
165973471bf0Spatrick SIInstrFlags::FlatGlobal)) {
166073471bf0Spatrick Addr = LHS;
166173471bf0Spatrick ImmOffset = COffsetVal;
166273471bf0Spatrick } else if (!LHS->isDivergent()) {
166373471bf0Spatrick if (COffsetVal > 0) {
166473471bf0Spatrick SDLoc SL(N);
166573471bf0Spatrick // saddr + large_offset -> saddr +
166673471bf0Spatrick // (voffset = large_offset & ~MaxOffset) +
166773471bf0Spatrick // (large_offset & MaxOffset);
166873471bf0Spatrick int64_t SplitImmOffset, RemainderOffset;
166973471bf0Spatrick std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
167073471bf0Spatrick COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
167173471bf0Spatrick
167273471bf0Spatrick if (isUInt<32>(RemainderOffset)) {
167373471bf0Spatrick SDNode *VMov = CurDAG->getMachineNode(
167473471bf0Spatrick AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
167573471bf0Spatrick CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
167673471bf0Spatrick VOffset = SDValue(VMov, 0);
167773471bf0Spatrick SAddr = LHS;
167873471bf0Spatrick Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
167973471bf0Spatrick return true;
168073471bf0Spatrick }
168173471bf0Spatrick }
168273471bf0Spatrick
168373471bf0Spatrick // We are adding a 64 bit SGPR and a constant. If constant bus limit
168473471bf0Spatrick // is 1 we would need to perform 1 or 2 extra moves for each half of
168573471bf0Spatrick // the constant and it is better to do a scalar add and then issue a
168673471bf0Spatrick // single VALU instruction to materialize zero. Otherwise it is less
168773471bf0Spatrick // instructions to perform VALU adds with immediates or inline literals.
168873471bf0Spatrick unsigned NumLiterals =
168973471bf0Spatrick !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
169073471bf0Spatrick !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
169173471bf0Spatrick if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
169273471bf0Spatrick return false;
169373471bf0Spatrick }
169473471bf0Spatrick }
169573471bf0Spatrick
169673471bf0Spatrick // Match the variable offset.
169773471bf0Spatrick if (Addr.getOpcode() == ISD::ADD) {
169873471bf0Spatrick LHS = Addr.getOperand(0);
169973471bf0Spatrick RHS = Addr.getOperand(1);
170073471bf0Spatrick
170173471bf0Spatrick if (!LHS->isDivergent()) {
170273471bf0Spatrick // add (i64 sgpr), (zero_extend (i32 vgpr))
170373471bf0Spatrick if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
170473471bf0Spatrick SAddr = LHS;
170573471bf0Spatrick VOffset = ZextRHS;
170673471bf0Spatrick }
170773471bf0Spatrick }
170873471bf0Spatrick
170973471bf0Spatrick if (!SAddr && !RHS->isDivergent()) {
171073471bf0Spatrick // add (zero_extend (i32 vgpr)), (i64 sgpr)
171173471bf0Spatrick if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
171273471bf0Spatrick SAddr = RHS;
171373471bf0Spatrick VOffset = ZextLHS;
171473471bf0Spatrick }
171573471bf0Spatrick }
171673471bf0Spatrick
171773471bf0Spatrick if (SAddr) {
171873471bf0Spatrick Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
171973471bf0Spatrick return true;
172073471bf0Spatrick }
172173471bf0Spatrick }
172273471bf0Spatrick
172373471bf0Spatrick if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
172473471bf0Spatrick isa<ConstantSDNode>(Addr))
172573471bf0Spatrick return false;
172673471bf0Spatrick
172773471bf0Spatrick // It's cheaper to materialize a single 32-bit zero for vaddr than the two
172873471bf0Spatrick // moves required to copy a 64-bit SGPR to VGPR.
172973471bf0Spatrick SAddr = Addr;
173073471bf0Spatrick SDNode *VMov =
173173471bf0Spatrick CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
173273471bf0Spatrick CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
173373471bf0Spatrick VOffset = SDValue(VMov, 0);
173473471bf0Spatrick Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
173573471bf0Spatrick return true;
173673471bf0Spatrick }
173773471bf0Spatrick
SelectSAddrFI(SelectionDAG * CurDAG,SDValue SAddr)173873471bf0Spatrick static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
173973471bf0Spatrick if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
174073471bf0Spatrick SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
174173471bf0Spatrick } else if (SAddr.getOpcode() == ISD::ADD &&
174273471bf0Spatrick isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
174373471bf0Spatrick // Materialize this into a scalar move for scalar address to avoid
174473471bf0Spatrick // readfirstlane.
174573471bf0Spatrick auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
174673471bf0Spatrick SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
174773471bf0Spatrick FI->getValueType(0));
174873471bf0Spatrick SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
174973471bf0Spatrick MVT::i32, TFI, SAddr.getOperand(1)),
175073471bf0Spatrick 0);
175173471bf0Spatrick }
175273471bf0Spatrick
175373471bf0Spatrick return SAddr;
175473471bf0Spatrick }
175573471bf0Spatrick
175673471bf0Spatrick // Match (32-bit SGPR base) + sext(imm offset)
SelectScratchSAddr(SDNode * Parent,SDValue Addr,SDValue & SAddr,SDValue & Offset) const175773471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
175873471bf0Spatrick SDValue &SAddr,
175973471bf0Spatrick SDValue &Offset) const {
176073471bf0Spatrick if (Addr->isDivergent())
176173471bf0Spatrick return false;
176273471bf0Spatrick
176373471bf0Spatrick SDLoc DL(Addr);
176473471bf0Spatrick
176573471bf0Spatrick int64_t COffsetVal = 0;
176673471bf0Spatrick
176773471bf0Spatrick if (CurDAG->isBaseWithConstantOffset(Addr)) {
176873471bf0Spatrick COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
176973471bf0Spatrick SAddr = Addr.getOperand(0);
177073471bf0Spatrick } else {
177173471bf0Spatrick SAddr = Addr;
177273471bf0Spatrick }
177373471bf0Spatrick
177473471bf0Spatrick SAddr = SelectSAddrFI(CurDAG, SAddr);
177573471bf0Spatrick
177673471bf0Spatrick const SIInstrInfo *TII = Subtarget->getInstrInfo();
177773471bf0Spatrick
177873471bf0Spatrick if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
177973471bf0Spatrick SIInstrFlags::FlatScratch)) {
178073471bf0Spatrick int64_t SplitImmOffset, RemainderOffset;
178173471bf0Spatrick std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
178273471bf0Spatrick COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch);
178373471bf0Spatrick
178473471bf0Spatrick COffsetVal = SplitImmOffset;
178573471bf0Spatrick
178673471bf0Spatrick SDValue AddOffset =
178773471bf0Spatrick SAddr.getOpcode() == ISD::TargetFrameIndex
178873471bf0Spatrick ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
178973471bf0Spatrick : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
179073471bf0Spatrick SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
179173471bf0Spatrick SAddr, AddOffset),
179273471bf0Spatrick 0);
179373471bf0Spatrick }
179473471bf0Spatrick
179573471bf0Spatrick Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
179673471bf0Spatrick
179773471bf0Spatrick return true;
179809467b48Spatrick }
179909467b48Spatrick
1800*d415bd75Srobert // Check whether the flat scratch SVS swizzle bug affects this access.
checkFlatScratchSVSSwizzleBug(SDValue VAddr,SDValue SAddr,uint64_t ImmOffset) const1801*d415bd75Srobert bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1802*d415bd75Srobert SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {
1803*d415bd75Srobert if (!Subtarget->hasFlatScratchSVSSwizzleBug())
1804*d415bd75Srobert return false;
1805*d415bd75Srobert
1806*d415bd75Srobert // The bug affects the swizzling of SVS accesses if there is any carry out
1807*d415bd75Srobert // from the two low order bits (i.e. from bit 1 into bit 2) when adding
1808*d415bd75Srobert // voffset to (soffset + inst_offset).
1809*d415bd75Srobert KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
1810*d415bd75Srobert KnownBits SKnown = KnownBits::computeForAddSub(
1811*d415bd75Srobert true, false, CurDAG->computeKnownBits(SAddr),
1812*d415bd75Srobert KnownBits::makeConstant(APInt(32, ImmOffset)));
1813*d415bd75Srobert uint64_t VMax = VKnown.getMaxValue().getZExtValue();
1814*d415bd75Srobert uint64_t SMax = SKnown.getMaxValue().getZExtValue();
1815*d415bd75Srobert return (VMax & 3) + (SMax & 3) >= 4;
1816*d415bd75Srobert }
1817*d415bd75Srobert
SelectScratchSVAddr(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & SAddr,SDValue & Offset) const1818*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
1819*d415bd75Srobert SDValue &VAddr, SDValue &SAddr,
1820*d415bd75Srobert SDValue &Offset) const {
1821*d415bd75Srobert int64_t ImmOffset = 0;
1822*d415bd75Srobert
1823*d415bd75Srobert SDValue LHS, RHS;
1824*d415bd75Srobert if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1825*d415bd75Srobert int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1826*d415bd75Srobert const SIInstrInfo *TII = Subtarget->getInstrInfo();
1827*d415bd75Srobert
1828*d415bd75Srobert if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
1829*d415bd75Srobert Addr = LHS;
1830*d415bd75Srobert ImmOffset = COffsetVal;
1831*d415bd75Srobert } else if (!LHS->isDivergent() && COffsetVal > 0) {
1832*d415bd75Srobert SDLoc SL(N);
1833*d415bd75Srobert // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
1834*d415bd75Srobert // (large_offset & MaxOffset);
1835*d415bd75Srobert int64_t SplitImmOffset, RemainderOffset;
1836*d415bd75Srobert std::tie(SplitImmOffset, RemainderOffset)
1837*d415bd75Srobert = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
1838*d415bd75Srobert
1839*d415bd75Srobert if (isUInt<32>(RemainderOffset)) {
1840*d415bd75Srobert SDNode *VMov = CurDAG->getMachineNode(
1841*d415bd75Srobert AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1842*d415bd75Srobert CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1843*d415bd75Srobert VAddr = SDValue(VMov, 0);
1844*d415bd75Srobert SAddr = LHS;
1845*d415bd75Srobert if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1846*d415bd75Srobert return false;
1847*d415bd75Srobert Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1848*d415bd75Srobert return true;
1849*d415bd75Srobert }
1850*d415bd75Srobert }
1851*d415bd75Srobert }
1852*d415bd75Srobert
1853*d415bd75Srobert if (Addr.getOpcode() != ISD::ADD)
1854*d415bd75Srobert return false;
1855*d415bd75Srobert
1856*d415bd75Srobert LHS = Addr.getOperand(0);
1857*d415bd75Srobert RHS = Addr.getOperand(1);
1858*d415bd75Srobert
1859*d415bd75Srobert if (!LHS->isDivergent() && RHS->isDivergent()) {
1860*d415bd75Srobert SAddr = LHS;
1861*d415bd75Srobert VAddr = RHS;
1862*d415bd75Srobert } else if (!RHS->isDivergent() && LHS->isDivergent()) {
1863*d415bd75Srobert SAddr = RHS;
1864*d415bd75Srobert VAddr = LHS;
1865*d415bd75Srobert } else {
1866*d415bd75Srobert return false;
1867*d415bd75Srobert }
1868*d415bd75Srobert
1869*d415bd75Srobert if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
1870*d415bd75Srobert return false;
1871*d415bd75Srobert SAddr = SelectSAddrFI(CurDAG, SAddr);
1872*d415bd75Srobert Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1873*d415bd75Srobert return true;
1874*d415bd75Srobert }
1875*d415bd75Srobert
1876*d415bd75Srobert // Match an immediate (if Offset is not null) or an SGPR (if SOffset is
1877*d415bd75Srobert // not null) offset. If Imm32Only is true, match only 32-bit immediate
1878*d415bd75Srobert // offsets available on CI.
SelectSMRDOffset(SDValue ByteOffsetNode,SDValue * SOffset,SDValue * Offset,bool Imm32Only,bool IsBuffer) const187909467b48Spatrick bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1880*d415bd75Srobert SDValue *SOffset, SDValue *Offset,
1881*d415bd75Srobert bool Imm32Only, bool IsBuffer) const {
1882*d415bd75Srobert assert((!SOffset || !Offset) &&
1883*d415bd75Srobert "Cannot match both soffset and offset at the same time!");
1884*d415bd75Srobert
188509467b48Spatrick ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1886097a140dSpatrick if (!C) {
1887*d415bd75Srobert if (!SOffset)
1888*d415bd75Srobert return false;
1889097a140dSpatrick if (ByteOffsetNode.getValueType().isScalarInteger() &&
1890097a140dSpatrick ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1891*d415bd75Srobert *SOffset = ByteOffsetNode;
1892097a140dSpatrick return true;
1893097a140dSpatrick }
1894097a140dSpatrick if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1895097a140dSpatrick if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1896*d415bd75Srobert *SOffset = ByteOffsetNode.getOperand(0);
1897097a140dSpatrick return true;
1898097a140dSpatrick }
1899097a140dSpatrick }
190009467b48Spatrick return false;
1901097a140dSpatrick }
190209467b48Spatrick
190309467b48Spatrick SDLoc SL(ByteOffsetNode);
1904*d415bd75Srobert
1905*d415bd75Srobert // GFX9 and GFX10 have signed byte immediate offsets. The immediate
1906*d415bd75Srobert // offset for S_BUFFER instructions is unsigned.
1907*d415bd75Srobert int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();
1908*d415bd75Srobert std::optional<int64_t> EncodedOffset =
1909*d415bd75Srobert AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer);
1910*d415bd75Srobert if (EncodedOffset && Offset && !Imm32Only) {
1911*d415bd75Srobert *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
191209467b48Spatrick return true;
191309467b48Spatrick }
191409467b48Spatrick
1915097a140dSpatrick // SGPR and literal offsets are unsigned.
1916097a140dSpatrick if (ByteOffset < 0)
191709467b48Spatrick return false;
191809467b48Spatrick
1919097a140dSpatrick EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1920*d415bd75Srobert if (EncodedOffset && Offset && Imm32Only) {
1921*d415bd75Srobert *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1922097a140dSpatrick return true;
192309467b48Spatrick }
1924097a140dSpatrick
1925097a140dSpatrick if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1926097a140dSpatrick return false;
1927097a140dSpatrick
1928*d415bd75Srobert if (SOffset) {
1929097a140dSpatrick SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1930*d415bd75Srobert *SOffset = SDValue(
1931097a140dSpatrick CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
193209467b48Spatrick return true;
193309467b48Spatrick }
193409467b48Spatrick
1935*d415bd75Srobert return false;
1936*d415bd75Srobert }
1937*d415bd75Srobert
Expand32BitAddress(SDValue Addr) const193809467b48Spatrick SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
193909467b48Spatrick if (Addr.getValueType() != MVT::i32)
194009467b48Spatrick return Addr;
194109467b48Spatrick
194209467b48Spatrick // Zero-extend a 32-bit address.
194309467b48Spatrick SDLoc SL(Addr);
194409467b48Spatrick
194509467b48Spatrick const MachineFunction &MF = CurDAG->getMachineFunction();
194609467b48Spatrick const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
194709467b48Spatrick unsigned AddrHiVal = Info->get32BitAddressHighBits();
194809467b48Spatrick SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
194909467b48Spatrick
195009467b48Spatrick const SDValue Ops[] = {
195109467b48Spatrick CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
195209467b48Spatrick Addr,
195309467b48Spatrick CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
195409467b48Spatrick SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
195509467b48Spatrick 0),
195609467b48Spatrick CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
195709467b48Spatrick };
195809467b48Spatrick
195909467b48Spatrick return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
196009467b48Spatrick Ops), 0);
196109467b48Spatrick }
196209467b48Spatrick
1963*d415bd75Srobert // Match a base and an immediate (if Offset is not null) or an SGPR (if
1964*d415bd75Srobert // SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
1965*d415bd75Srobert // true, match only 32-bit immediate offsets available on CI.
SelectSMRDBaseOffset(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only,bool IsBuffer) const1966*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
1967*d415bd75Srobert SDValue *SOffset, SDValue *Offset,
1968*d415bd75Srobert bool Imm32Only,
1969*d415bd75Srobert bool IsBuffer) const {
1970*d415bd75Srobert if (SOffset && Offset) {
1971*d415bd75Srobert assert(!Imm32Only && !IsBuffer);
1972*d415bd75Srobert SDValue B;
1973*d415bd75Srobert return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
1974*d415bd75Srobert SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
1975*d415bd75Srobert }
197609467b48Spatrick
197709467b48Spatrick // A 32-bit (address + offset) should not cause unsigned 32-bit integer
197809467b48Spatrick // wraparound, because s_load instructions perform the addition in 64 bits.
1979*d415bd75Srobert if (Addr.getValueType() == MVT::i32 && Addr.getOpcode() == ISD::ADD &&
1980*d415bd75Srobert !Addr->getFlags().hasNoUnsignedWrap())
1981*d415bd75Srobert return false;
1982*d415bd75Srobert
1983097a140dSpatrick SDValue N0, N1;
1984097a140dSpatrick // Extract the base and offset if possible.
1985*d415bd75Srobert if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
1986097a140dSpatrick N0 = Addr.getOperand(0);
1987097a140dSpatrick N1 = Addr.getOperand(1);
1988097a140dSpatrick } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
1989097a140dSpatrick assert(N0 && N1 && isa<ConstantSDNode>(N1));
1990097a140dSpatrick }
1991*d415bd75Srobert if (!N0 || !N1)
1992*d415bd75Srobert return false;
1993*d415bd75Srobert if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) {
1994*d415bd75Srobert SBase = N0;
199509467b48Spatrick return true;
199609467b48Spatrick }
1997*d415bd75Srobert if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {
1998*d415bd75Srobert SBase = N1;
1999*d415bd75Srobert return true;
200009467b48Spatrick }
2001*d415bd75Srobert return false;
2002097a140dSpatrick }
2003*d415bd75Srobert
SelectSMRD(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only) const2004*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2005*d415bd75Srobert SDValue *SOffset, SDValue *Offset,
2006*d415bd75Srobert bool Imm32Only) const {
2007*d415bd75Srobert if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {
2008*d415bd75Srobert SBase = Expand32BitAddress(SBase);
2009*d415bd75Srobert return true;
2010*d415bd75Srobert }
2011*d415bd75Srobert
2012*d415bd75Srobert if (Addr.getValueType() == MVT::i32 && Offset && !SOffset) {
201309467b48Spatrick SBase = Expand32BitAddress(Addr);
2014*d415bd75Srobert *Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
201509467b48Spatrick return true;
201609467b48Spatrick }
201709467b48Spatrick
2018*d415bd75Srobert return false;
2019*d415bd75Srobert }
2020*d415bd75Srobert
SelectSMRDImm(SDValue Addr,SDValue & SBase,SDValue & Offset) const202109467b48Spatrick bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
202209467b48Spatrick SDValue &Offset) const {
2023*d415bd75Srobert return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);
202409467b48Spatrick }
202509467b48Spatrick
SelectSMRDImm32(SDValue Addr,SDValue & SBase,SDValue & Offset) const202609467b48Spatrick bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
202709467b48Spatrick SDValue &Offset) const {
2028097a140dSpatrick assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2029*d415bd75Srobert return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,
2030*d415bd75Srobert /* Imm32Only */ true);
203109467b48Spatrick }
203209467b48Spatrick
SelectSMRDSgpr(SDValue Addr,SDValue & SBase,SDValue & SOffset) const203309467b48Spatrick bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2034*d415bd75Srobert SDValue &SOffset) const {
2035*d415bd75Srobert return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);
2036*d415bd75Srobert }
2037*d415bd75Srobert
SelectSMRDSgprImm(SDValue Addr,SDValue & SBase,SDValue & SOffset,SDValue & Offset) const2038*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
2039*d415bd75Srobert SDValue &SOffset,
204009467b48Spatrick SDValue &Offset) const {
2041*d415bd75Srobert return SelectSMRD(Addr, SBase, &SOffset, &Offset);
204209467b48Spatrick }
204309467b48Spatrick
SelectSMRDBufferImm(SDValue N,SDValue & Offset) const2044*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {
2045*d415bd75Srobert return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2046*d415bd75Srobert /* Imm32Only */ false, /* IsBuffer */ true);
2047097a140dSpatrick }
2048097a140dSpatrick
SelectSMRDBufferImm32(SDValue N,SDValue & Offset) const2049*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
205009467b48Spatrick SDValue &Offset) const {
2051097a140dSpatrick assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2052*d415bd75Srobert return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2053*d415bd75Srobert /* Imm32Only */ true, /* IsBuffer */ true);
2054097a140dSpatrick }
205509467b48Spatrick
SelectSMRDBufferSgprImm(SDValue N,SDValue & SOffset,SDValue & Offset) const2056*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
2057*d415bd75Srobert SDValue &Offset) const {
2058*d415bd75Srobert // Match the (soffset + offset) pair as a 32-bit register base and
2059*d415bd75Srobert // an immediate offset.
2060*d415bd75Srobert return N.getValueType() == MVT::i32 &&
2061*d415bd75Srobert SelectSMRDBaseOffset(N, /* SBase */ SOffset, /* SOffset*/ nullptr,
2062*d415bd75Srobert &Offset, /* Imm32Only */ false,
2063*d415bd75Srobert /* IsBuffer */ true);
206409467b48Spatrick }
206509467b48Spatrick
SelectMOVRELOffset(SDValue Index,SDValue & Base,SDValue & Offset) const206609467b48Spatrick bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
206709467b48Spatrick SDValue &Base,
206809467b48Spatrick SDValue &Offset) const {
206909467b48Spatrick SDLoc DL(Index);
207009467b48Spatrick
207109467b48Spatrick if (CurDAG->isBaseWithConstantOffset(Index)) {
207209467b48Spatrick SDValue N0 = Index.getOperand(0);
207309467b48Spatrick SDValue N1 = Index.getOperand(1);
207409467b48Spatrick ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
207509467b48Spatrick
207609467b48Spatrick // (add n0, c0)
207709467b48Spatrick // Don't peel off the offset (c0) if doing so could possibly lead
207809467b48Spatrick // the base (n0) to be negative.
2079097a140dSpatrick // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2080097a140dSpatrick if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2081097a140dSpatrick (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
208209467b48Spatrick Base = N0;
208309467b48Spatrick Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
208409467b48Spatrick return true;
208509467b48Spatrick }
208609467b48Spatrick }
208709467b48Spatrick
208809467b48Spatrick if (isa<ConstantSDNode>(Index))
208909467b48Spatrick return false;
209009467b48Spatrick
209109467b48Spatrick Base = Index;
209209467b48Spatrick Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
209309467b48Spatrick return true;
209409467b48Spatrick }
209509467b48Spatrick
getBFE32(bool IsSigned,const SDLoc & DL,SDValue Val,uint32_t Offset,uint32_t Width)2096*d415bd75Srobert SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
209709467b48Spatrick SDValue Val, uint32_t Offset,
209809467b48Spatrick uint32_t Width) {
2099*d415bd75Srobert if (Val->isDivergent()) {
2100*d415bd75Srobert unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2101*d415bd75Srobert SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32);
2102*d415bd75Srobert SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32);
2103*d415bd75Srobert
2104*d415bd75Srobert return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
2105*d415bd75Srobert }
2106*d415bd75Srobert unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
210709467b48Spatrick // Transformation function, pack the offset and width of a BFE into
210809467b48Spatrick // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
210909467b48Spatrick // source, bits [5:0] contain the offset and bits [22:16] the width.
211009467b48Spatrick uint32_t PackedVal = Offset | (Width << 16);
211109467b48Spatrick SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
211209467b48Spatrick
211309467b48Spatrick return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
211409467b48Spatrick }
211509467b48Spatrick
SelectS_BFEFromShifts(SDNode * N)211609467b48Spatrick void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
211709467b48Spatrick // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
211809467b48Spatrick // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
211909467b48Spatrick // Predicate: 0 < b <= c < 32
212009467b48Spatrick
212109467b48Spatrick const SDValue &Shl = N->getOperand(0);
212209467b48Spatrick ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
212309467b48Spatrick ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
212409467b48Spatrick
212509467b48Spatrick if (B && C) {
212609467b48Spatrick uint32_t BVal = B->getZExtValue();
212709467b48Spatrick uint32_t CVal = C->getZExtValue();
212809467b48Spatrick
212909467b48Spatrick if (0 < BVal && BVal <= CVal && CVal < 32) {
213009467b48Spatrick bool Signed = N->getOpcode() == ISD::SRA;
2131*d415bd75Srobert ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
213209467b48Spatrick 32 - CVal));
213309467b48Spatrick return;
213409467b48Spatrick }
213509467b48Spatrick }
213609467b48Spatrick SelectCode(N);
213709467b48Spatrick }
213809467b48Spatrick
SelectS_BFE(SDNode * N)213909467b48Spatrick void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
214009467b48Spatrick switch (N->getOpcode()) {
214109467b48Spatrick case ISD::AND:
214209467b48Spatrick if (N->getOperand(0).getOpcode() == ISD::SRL) {
214309467b48Spatrick // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
214409467b48Spatrick // Predicate: isMask(mask)
214509467b48Spatrick const SDValue &Srl = N->getOperand(0);
214609467b48Spatrick ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
214709467b48Spatrick ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
214809467b48Spatrick
214909467b48Spatrick if (Shift && Mask) {
215009467b48Spatrick uint32_t ShiftVal = Shift->getZExtValue();
215109467b48Spatrick uint32_t MaskVal = Mask->getZExtValue();
215209467b48Spatrick
215309467b48Spatrick if (isMask_32(MaskVal)) {
2154*d415bd75Srobert uint32_t WidthVal = llvm::popcount(MaskVal);
2155*d415bd75Srobert ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
2156*d415bd75Srobert WidthVal));
215709467b48Spatrick return;
215809467b48Spatrick }
215909467b48Spatrick }
216009467b48Spatrick }
216109467b48Spatrick break;
216209467b48Spatrick case ISD::SRL:
216309467b48Spatrick if (N->getOperand(0).getOpcode() == ISD::AND) {
216409467b48Spatrick // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
216509467b48Spatrick // Predicate: isMask(mask >> b)
216609467b48Spatrick const SDValue &And = N->getOperand(0);
216709467b48Spatrick ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
216809467b48Spatrick ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
216909467b48Spatrick
217009467b48Spatrick if (Shift && Mask) {
217109467b48Spatrick uint32_t ShiftVal = Shift->getZExtValue();
217209467b48Spatrick uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
217309467b48Spatrick
217409467b48Spatrick if (isMask_32(MaskVal)) {
2175*d415bd75Srobert uint32_t WidthVal = llvm::popcount(MaskVal);
2176*d415bd75Srobert ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
2177*d415bd75Srobert WidthVal));
217809467b48Spatrick return;
217909467b48Spatrick }
218009467b48Spatrick }
218109467b48Spatrick } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
218209467b48Spatrick SelectS_BFEFromShifts(N);
218309467b48Spatrick return;
218409467b48Spatrick }
218509467b48Spatrick break;
218609467b48Spatrick case ISD::SRA:
218709467b48Spatrick if (N->getOperand(0).getOpcode() == ISD::SHL) {
218809467b48Spatrick SelectS_BFEFromShifts(N);
218909467b48Spatrick return;
219009467b48Spatrick }
219109467b48Spatrick break;
219209467b48Spatrick
219309467b48Spatrick case ISD::SIGN_EXTEND_INREG: {
219409467b48Spatrick // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
219509467b48Spatrick SDValue Src = N->getOperand(0);
219609467b48Spatrick if (Src.getOpcode() != ISD::SRL)
219709467b48Spatrick break;
219809467b48Spatrick
219909467b48Spatrick const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
220009467b48Spatrick if (!Amt)
220109467b48Spatrick break;
220209467b48Spatrick
220309467b48Spatrick unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2204*d415bd75Srobert ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
220509467b48Spatrick Amt->getZExtValue(), Width));
220609467b48Spatrick return;
220709467b48Spatrick }
220809467b48Spatrick }
220909467b48Spatrick
221009467b48Spatrick SelectCode(N);
221109467b48Spatrick }
221209467b48Spatrick
isCBranchSCC(const SDNode * N) const221309467b48Spatrick bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
221409467b48Spatrick assert(N->getOpcode() == ISD::BRCOND);
221509467b48Spatrick if (!N->hasOneUse())
221609467b48Spatrick return false;
221709467b48Spatrick
221809467b48Spatrick SDValue Cond = N->getOperand(1);
221909467b48Spatrick if (Cond.getOpcode() == ISD::CopyToReg)
222009467b48Spatrick Cond = Cond.getOperand(2);
222109467b48Spatrick
222209467b48Spatrick if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
222309467b48Spatrick return false;
222409467b48Spatrick
222509467b48Spatrick MVT VT = Cond.getOperand(0).getSimpleValueType();
222609467b48Spatrick if (VT == MVT::i32)
222709467b48Spatrick return true;
222809467b48Spatrick
222909467b48Spatrick if (VT == MVT::i64) {
223009467b48Spatrick auto ST = static_cast<const GCNSubtarget *>(Subtarget);
223109467b48Spatrick
223209467b48Spatrick ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
223309467b48Spatrick return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
223409467b48Spatrick }
223509467b48Spatrick
223609467b48Spatrick return false;
223709467b48Spatrick }
223809467b48Spatrick
SelectBRCOND(SDNode * N)223909467b48Spatrick void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
224009467b48Spatrick SDValue Cond = N->getOperand(1);
224109467b48Spatrick
224209467b48Spatrick if (Cond.isUndef()) {
224309467b48Spatrick CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
224409467b48Spatrick N->getOperand(2), N->getOperand(0));
224509467b48Spatrick return;
224609467b48Spatrick }
224709467b48Spatrick
224809467b48Spatrick const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
224909467b48Spatrick const SIRegisterInfo *TRI = ST->getRegisterInfo();
225009467b48Spatrick
225109467b48Spatrick bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
225209467b48Spatrick unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2253097a140dSpatrick Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
225409467b48Spatrick SDLoc SL(N);
225509467b48Spatrick
225609467b48Spatrick if (!UseSCCBr) {
225709467b48Spatrick // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
225809467b48Spatrick // analyzed what generates the vcc value, so we do not know whether vcc
225909467b48Spatrick // bits for disabled lanes are 0. Thus we need to mask out bits for
226009467b48Spatrick // disabled lanes.
226109467b48Spatrick //
226209467b48Spatrick // For the case that we select S_CBRANCH_SCC1 and it gets
226309467b48Spatrick // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
226409467b48Spatrick // SIInstrInfo::moveToVALU which inserts the S_AND).
226509467b48Spatrick //
226609467b48Spatrick // We could add an analysis of what generates the vcc value here and omit
226709467b48Spatrick // the S_AND when is unnecessary. But it would be better to add a separate
226809467b48Spatrick // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
226909467b48Spatrick // catches both cases.
227009467b48Spatrick Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
227109467b48Spatrick : AMDGPU::S_AND_B64,
227209467b48Spatrick SL, MVT::i1,
227309467b48Spatrick CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
227409467b48Spatrick : AMDGPU::EXEC,
227509467b48Spatrick MVT::i1),
227609467b48Spatrick Cond),
227709467b48Spatrick 0);
227809467b48Spatrick }
227909467b48Spatrick
228009467b48Spatrick SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
228109467b48Spatrick CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
228209467b48Spatrick N->getOperand(2), // Basic Block
228309467b48Spatrick VCC.getValue(0));
228409467b48Spatrick }
228509467b48Spatrick
SelectFMAD_FMA(SDNode * N)228609467b48Spatrick void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
228709467b48Spatrick MVT VT = N->getSimpleValueType(0);
228809467b48Spatrick bool IsFMA = N->getOpcode() == ISD::FMA;
228909467b48Spatrick if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
229009467b48Spatrick !Subtarget->hasFmaMixInsts()) ||
229109467b48Spatrick ((IsFMA && Subtarget->hasMadMixInsts()) ||
229209467b48Spatrick (!IsFMA && Subtarget->hasFmaMixInsts()))) {
229309467b48Spatrick SelectCode(N);
229409467b48Spatrick return;
229509467b48Spatrick }
229609467b48Spatrick
229709467b48Spatrick SDValue Src0 = N->getOperand(0);
229809467b48Spatrick SDValue Src1 = N->getOperand(1);
229909467b48Spatrick SDValue Src2 = N->getOperand(2);
230009467b48Spatrick unsigned Src0Mods, Src1Mods, Src2Mods;
230109467b48Spatrick
230209467b48Spatrick // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
230309467b48Spatrick // using the conversion from f16.
230409467b48Spatrick bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
230509467b48Spatrick bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
230609467b48Spatrick bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
230709467b48Spatrick
2308097a140dSpatrick assert((IsFMA || !Mode.allFP32Denormals()) &&
230909467b48Spatrick "fmad selected with denormals enabled");
231009467b48Spatrick // TODO: We can select this with f32 denormals enabled if all the sources are
231109467b48Spatrick // converted from f16 (in which case fmad isn't legal).
231209467b48Spatrick
231309467b48Spatrick if (Sel0 || Sel1 || Sel2) {
231409467b48Spatrick // For dummy operands.
231509467b48Spatrick SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
231609467b48Spatrick SDValue Ops[] = {
231709467b48Spatrick CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
231809467b48Spatrick CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
231909467b48Spatrick CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
232009467b48Spatrick CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
232109467b48Spatrick Zero, Zero
232209467b48Spatrick };
232309467b48Spatrick
232409467b48Spatrick CurDAG->SelectNodeTo(N,
232509467b48Spatrick IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
232609467b48Spatrick MVT::f32, Ops);
232709467b48Spatrick } else {
232809467b48Spatrick SelectCode(N);
232909467b48Spatrick }
233009467b48Spatrick }
233109467b48Spatrick
SelectDSAppendConsume(SDNode * N,unsigned IntrID)233209467b48Spatrick void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
233309467b48Spatrick // The address is assumed to be uniform, so if it ends up in a VGPR, it will
233409467b48Spatrick // be copied to an SGPR with readfirstlane.
233509467b48Spatrick unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
233609467b48Spatrick AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
233709467b48Spatrick
233809467b48Spatrick SDValue Chain = N->getOperand(0);
233909467b48Spatrick SDValue Ptr = N->getOperand(2);
234009467b48Spatrick MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
234109467b48Spatrick MachineMemOperand *MMO = M->getMemOperand();
234209467b48Spatrick bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
234309467b48Spatrick
234409467b48Spatrick SDValue Offset;
234509467b48Spatrick if (CurDAG->isBaseWithConstantOffset(Ptr)) {
234609467b48Spatrick SDValue PtrBase = Ptr.getOperand(0);
234709467b48Spatrick SDValue PtrOffset = Ptr.getOperand(1);
234809467b48Spatrick
234909467b48Spatrick const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
235073471bf0Spatrick if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
235109467b48Spatrick N = glueCopyToM0(N, PtrBase);
235209467b48Spatrick Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
235309467b48Spatrick }
235409467b48Spatrick }
235509467b48Spatrick
235609467b48Spatrick if (!Offset) {
235709467b48Spatrick N = glueCopyToM0(N, Ptr);
235809467b48Spatrick Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
235909467b48Spatrick }
236009467b48Spatrick
236109467b48Spatrick SDValue Ops[] = {
236209467b48Spatrick Offset,
236309467b48Spatrick CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
236409467b48Spatrick Chain,
236509467b48Spatrick N->getOperand(N->getNumOperands() - 1) // New glue
236609467b48Spatrick };
236709467b48Spatrick
236809467b48Spatrick SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
236909467b48Spatrick CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
237009467b48Spatrick }
237109467b48Spatrick
2372*d415bd75Srobert // We need to handle this here because tablegen doesn't support matching
2373*d415bd75Srobert // instructions with multiple outputs.
SelectDSBvhStackIntrinsic(SDNode * N)2374*d415bd75Srobert void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
2375*d415bd75Srobert unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2376*d415bd75Srobert SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4),
2377*d415bd75Srobert N->getOperand(5), N->getOperand(0)};
2378*d415bd75Srobert
2379*d415bd75Srobert MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2380*d415bd75Srobert MachineMemOperand *MMO = M->getMemOperand();
2381*d415bd75Srobert SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2382*d415bd75Srobert CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2383*d415bd75Srobert }
2384*d415bd75Srobert
gwsIntrinToOpcode(unsigned IntrID)238509467b48Spatrick static unsigned gwsIntrinToOpcode(unsigned IntrID) {
238609467b48Spatrick switch (IntrID) {
238709467b48Spatrick case Intrinsic::amdgcn_ds_gws_init:
238809467b48Spatrick return AMDGPU::DS_GWS_INIT;
238909467b48Spatrick case Intrinsic::amdgcn_ds_gws_barrier:
239009467b48Spatrick return AMDGPU::DS_GWS_BARRIER;
239109467b48Spatrick case Intrinsic::amdgcn_ds_gws_sema_v:
239209467b48Spatrick return AMDGPU::DS_GWS_SEMA_V;
239309467b48Spatrick case Intrinsic::amdgcn_ds_gws_sema_br:
239409467b48Spatrick return AMDGPU::DS_GWS_SEMA_BR;
239509467b48Spatrick case Intrinsic::amdgcn_ds_gws_sema_p:
239609467b48Spatrick return AMDGPU::DS_GWS_SEMA_P;
239709467b48Spatrick case Intrinsic::amdgcn_ds_gws_sema_release_all:
239809467b48Spatrick return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
239909467b48Spatrick default:
240009467b48Spatrick llvm_unreachable("not a gws intrinsic");
240109467b48Spatrick }
240209467b48Spatrick }
240309467b48Spatrick
SelectDS_GWS(SDNode * N,unsigned IntrID)240409467b48Spatrick void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
240509467b48Spatrick if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
240609467b48Spatrick !Subtarget->hasGWSSemaReleaseAll()) {
240709467b48Spatrick // Let this error.
240809467b48Spatrick SelectCode(N);
240909467b48Spatrick return;
241009467b48Spatrick }
241109467b48Spatrick
241209467b48Spatrick // Chain, intrinsic ID, vsrc, offset
241309467b48Spatrick const bool HasVSrc = N->getNumOperands() == 4;
241409467b48Spatrick assert(HasVSrc || N->getNumOperands() == 3);
241509467b48Spatrick
241609467b48Spatrick SDLoc SL(N);
241709467b48Spatrick SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
241809467b48Spatrick int ImmOffset = 0;
241909467b48Spatrick MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
242009467b48Spatrick MachineMemOperand *MMO = M->getMemOperand();
242109467b48Spatrick
242209467b48Spatrick // Don't worry if the offset ends up in a VGPR. Only one lane will have
242309467b48Spatrick // effect, so SIFixSGPRCopies will validly insert readfirstlane.
242409467b48Spatrick
242509467b48Spatrick // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
242609467b48Spatrick // offset field) % 64. Some versions of the programming guide omit the m0
242709467b48Spatrick // part, or claim it's from offset 0.
242809467b48Spatrick if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
242909467b48Spatrick // If we have a constant offset, try to use the 0 in m0 as the base.
243009467b48Spatrick // TODO: Look into changing the default m0 initialization value. If the
243109467b48Spatrick // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
243209467b48Spatrick // the immediate offset.
243309467b48Spatrick glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
243409467b48Spatrick ImmOffset = ConstOffset->getZExtValue();
243509467b48Spatrick } else {
243609467b48Spatrick if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
243709467b48Spatrick ImmOffset = BaseOffset.getConstantOperandVal(1);
243809467b48Spatrick BaseOffset = BaseOffset.getOperand(0);
243909467b48Spatrick }
244009467b48Spatrick
244109467b48Spatrick // Prefer to do the shift in an SGPR since it should be possible to use m0
244209467b48Spatrick // as the result directly. If it's already an SGPR, it will be eliminated
244309467b48Spatrick // later.
244409467b48Spatrick SDNode *SGPROffset
244509467b48Spatrick = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
244609467b48Spatrick BaseOffset);
244709467b48Spatrick // Shift to offset in m0
244809467b48Spatrick SDNode *M0Base
244909467b48Spatrick = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
245009467b48Spatrick SDValue(SGPROffset, 0),
245109467b48Spatrick CurDAG->getTargetConstant(16, SL, MVT::i32));
245209467b48Spatrick glueCopyToM0(N, SDValue(M0Base, 0));
245309467b48Spatrick }
245409467b48Spatrick
245509467b48Spatrick SDValue Chain = N->getOperand(0);
245609467b48Spatrick SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
245709467b48Spatrick
245809467b48Spatrick const unsigned Opc = gwsIntrinToOpcode(IntrID);
245909467b48Spatrick SmallVector<SDValue, 5> Ops;
246009467b48Spatrick if (HasVSrc)
246109467b48Spatrick Ops.push_back(N->getOperand(2));
246209467b48Spatrick Ops.push_back(OffsetField);
246309467b48Spatrick Ops.push_back(Chain);
246409467b48Spatrick
246509467b48Spatrick SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
246609467b48Spatrick CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
246709467b48Spatrick }
246809467b48Spatrick
SelectInterpP1F16(SDNode * N)2469097a140dSpatrick void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2470097a140dSpatrick if (Subtarget->getLDSBankCount() != 16) {
2471097a140dSpatrick // This is a single instruction with a pattern.
2472097a140dSpatrick SelectCode(N);
2473097a140dSpatrick return;
2474097a140dSpatrick }
2475097a140dSpatrick
2476097a140dSpatrick SDLoc DL(N);
2477097a140dSpatrick
2478097a140dSpatrick // This requires 2 instructions. It is possible to write a pattern to support
2479097a140dSpatrick // this, but the generated isel emitter doesn't correctly deal with multiple
2480097a140dSpatrick // output instructions using the same physical register input. The copy to m0
2481097a140dSpatrick // is incorrectly placed before the second instruction.
2482097a140dSpatrick //
2483097a140dSpatrick // TODO: Match source modifiers.
2484097a140dSpatrick //
2485097a140dSpatrick // def : Pat <
2486097a140dSpatrick // (int_amdgcn_interp_p1_f16
2487097a140dSpatrick // (VOP3Mods f32:$src0, i32:$src0_modifiers),
2488097a140dSpatrick // (i32 timm:$attrchan), (i32 timm:$attr),
2489097a140dSpatrick // (i1 timm:$high), M0),
2490097a140dSpatrick // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2491097a140dSpatrick // timm:$attrchan, 0,
2492097a140dSpatrick // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2493097a140dSpatrick // let Predicates = [has16BankLDS];
2494097a140dSpatrick // }
2495097a140dSpatrick
2496097a140dSpatrick // 16 bank LDS
2497097a140dSpatrick SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
2498097a140dSpatrick N->getOperand(5), SDValue());
2499097a140dSpatrick
2500097a140dSpatrick SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
2501097a140dSpatrick
2502097a140dSpatrick SDNode *InterpMov =
2503097a140dSpatrick CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2504097a140dSpatrick CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2505097a140dSpatrick N->getOperand(3), // Attr
2506097a140dSpatrick N->getOperand(2), // Attrchan
2507097a140dSpatrick ToM0.getValue(1) // In glue
2508097a140dSpatrick });
2509097a140dSpatrick
2510097a140dSpatrick SDNode *InterpP1LV =
2511097a140dSpatrick CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2512097a140dSpatrick CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2513097a140dSpatrick N->getOperand(1), // Src0
2514097a140dSpatrick N->getOperand(3), // Attr
2515097a140dSpatrick N->getOperand(2), // Attrchan
2516097a140dSpatrick CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2517097a140dSpatrick SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2518097a140dSpatrick N->getOperand(4), // high
2519097a140dSpatrick CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2520097a140dSpatrick CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2521097a140dSpatrick SDValue(InterpMov, 1)
2522097a140dSpatrick });
2523097a140dSpatrick
2524097a140dSpatrick CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2525097a140dSpatrick }
2526097a140dSpatrick
SelectINTRINSIC_W_CHAIN(SDNode * N)252709467b48Spatrick void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
252809467b48Spatrick unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
252909467b48Spatrick switch (IntrID) {
253009467b48Spatrick case Intrinsic::amdgcn_ds_append:
253109467b48Spatrick case Intrinsic::amdgcn_ds_consume: {
253209467b48Spatrick if (N->getValueType(0) != MVT::i32)
253309467b48Spatrick break;
253409467b48Spatrick SelectDSAppendConsume(N, IntrID);
253509467b48Spatrick return;
253609467b48Spatrick }
2537*d415bd75Srobert case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2538*d415bd75Srobert SelectDSBvhStackIntrinsic(N);
2539*d415bd75Srobert return;
254009467b48Spatrick }
254109467b48Spatrick
254209467b48Spatrick SelectCode(N);
254309467b48Spatrick }
254409467b48Spatrick
SelectINTRINSIC_WO_CHAIN(SDNode * N)254509467b48Spatrick void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
254609467b48Spatrick unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
254709467b48Spatrick unsigned Opcode;
254809467b48Spatrick switch (IntrID) {
254909467b48Spatrick case Intrinsic::amdgcn_wqm:
255009467b48Spatrick Opcode = AMDGPU::WQM;
255109467b48Spatrick break;
255209467b48Spatrick case Intrinsic::amdgcn_softwqm:
255309467b48Spatrick Opcode = AMDGPU::SOFT_WQM;
255409467b48Spatrick break;
255509467b48Spatrick case Intrinsic::amdgcn_wwm:
255673471bf0Spatrick case Intrinsic::amdgcn_strict_wwm:
255773471bf0Spatrick Opcode = AMDGPU::STRICT_WWM;
255873471bf0Spatrick break;
255973471bf0Spatrick case Intrinsic::amdgcn_strict_wqm:
256073471bf0Spatrick Opcode = AMDGPU::STRICT_WQM;
256109467b48Spatrick break;
2562097a140dSpatrick case Intrinsic::amdgcn_interp_p1_f16:
2563097a140dSpatrick SelectInterpP1F16(N);
2564097a140dSpatrick return;
256509467b48Spatrick default:
256609467b48Spatrick SelectCode(N);
256709467b48Spatrick return;
256809467b48Spatrick }
256909467b48Spatrick
257009467b48Spatrick SDValue Src = N->getOperand(1);
257109467b48Spatrick CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
257209467b48Spatrick }
257309467b48Spatrick
SelectINTRINSIC_VOID(SDNode * N)257409467b48Spatrick void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
257509467b48Spatrick unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
257609467b48Spatrick switch (IntrID) {
257709467b48Spatrick case Intrinsic::amdgcn_ds_gws_init:
257809467b48Spatrick case Intrinsic::amdgcn_ds_gws_barrier:
257909467b48Spatrick case Intrinsic::amdgcn_ds_gws_sema_v:
258009467b48Spatrick case Intrinsic::amdgcn_ds_gws_sema_br:
258109467b48Spatrick case Intrinsic::amdgcn_ds_gws_sema_p:
258209467b48Spatrick case Intrinsic::amdgcn_ds_gws_sema_release_all:
258309467b48Spatrick SelectDS_GWS(N, IntrID);
258409467b48Spatrick return;
258509467b48Spatrick default:
258609467b48Spatrick break;
258709467b48Spatrick }
258809467b48Spatrick
258909467b48Spatrick SelectCode(N);
259009467b48Spatrick }
259109467b48Spatrick
SelectVOP3ModsImpl(SDValue In,SDValue & Src,unsigned & Mods,bool AllowAbs) const259209467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
259373471bf0Spatrick unsigned &Mods,
259473471bf0Spatrick bool AllowAbs) const {
259509467b48Spatrick Mods = 0;
259609467b48Spatrick Src = In;
259709467b48Spatrick
259809467b48Spatrick if (Src.getOpcode() == ISD::FNEG) {
259909467b48Spatrick Mods |= SISrcMods::NEG;
260009467b48Spatrick Src = Src.getOperand(0);
260109467b48Spatrick }
260209467b48Spatrick
260373471bf0Spatrick if (AllowAbs && Src.getOpcode() == ISD::FABS) {
260409467b48Spatrick Mods |= SISrcMods::ABS;
260509467b48Spatrick Src = Src.getOperand(0);
260609467b48Spatrick }
260709467b48Spatrick
260809467b48Spatrick return true;
260909467b48Spatrick }
261009467b48Spatrick
SelectVOP3Mods(SDValue In,SDValue & Src,SDValue & SrcMods) const261109467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
261209467b48Spatrick SDValue &SrcMods) const {
261309467b48Spatrick unsigned Mods;
261409467b48Spatrick if (SelectVOP3ModsImpl(In, Src, Mods)) {
261509467b48Spatrick SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
261609467b48Spatrick return true;
261709467b48Spatrick }
261809467b48Spatrick
261909467b48Spatrick return false;
262009467b48Spatrick }
262109467b48Spatrick
SelectVOP3BMods(SDValue In,SDValue & Src,SDValue & SrcMods) const262273471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
262373471bf0Spatrick SDValue &SrcMods) const {
262473471bf0Spatrick unsigned Mods;
262573471bf0Spatrick if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
262673471bf0Spatrick SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
262773471bf0Spatrick return true;
262873471bf0Spatrick }
262973471bf0Spatrick
263073471bf0Spatrick return false;
263173471bf0Spatrick }
263273471bf0Spatrick
SelectVOP3NoMods(SDValue In,SDValue & Src) const263309467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
263409467b48Spatrick if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
263509467b48Spatrick return false;
263609467b48Spatrick
263709467b48Spatrick Src = In;
263809467b48Spatrick return true;
263909467b48Spatrick }
264009467b48Spatrick
SelectVINTERPModsImpl(SDValue In,SDValue & Src,SDValue & SrcMods,bool OpSel) const2641*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
2642*d415bd75Srobert SDValue &SrcMods,
2643*d415bd75Srobert bool OpSel) const {
2644*d415bd75Srobert unsigned Mods;
2645*d415bd75Srobert if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2646*d415bd75Srobert if (OpSel)
2647*d415bd75Srobert Mods |= SISrcMods::OP_SEL_0;
2648*d415bd75Srobert SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2649*d415bd75Srobert return true;
2650*d415bd75Srobert }
2651*d415bd75Srobert
2652*d415bd75Srobert return false;
2653*d415bd75Srobert }
2654*d415bd75Srobert
SelectVINTERPMods(SDValue In,SDValue & Src,SDValue & SrcMods) const2655*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
2656*d415bd75Srobert SDValue &SrcMods) const {
2657*d415bd75Srobert return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false);
2658*d415bd75Srobert }
2659*d415bd75Srobert
SelectVINTERPModsHi(SDValue In,SDValue & Src,SDValue & SrcMods) const2660*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
2661*d415bd75Srobert SDValue &SrcMods) const {
2662*d415bd75Srobert return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true);
2663*d415bd75Srobert }
2664*d415bd75Srobert
SelectVOP3Mods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const266509467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
266609467b48Spatrick SDValue &SrcMods, SDValue &Clamp,
266709467b48Spatrick SDValue &Omod) const {
266809467b48Spatrick SDLoc DL(In);
266909467b48Spatrick Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
267009467b48Spatrick Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
267109467b48Spatrick
267209467b48Spatrick return SelectVOP3Mods(In, Src, SrcMods);
267309467b48Spatrick }
267409467b48Spatrick
SelectVOP3BMods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const267573471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
267673471bf0Spatrick SDValue &SrcMods, SDValue &Clamp,
267773471bf0Spatrick SDValue &Omod) const {
267873471bf0Spatrick SDLoc DL(In);
267973471bf0Spatrick Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
268073471bf0Spatrick Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
268173471bf0Spatrick
268273471bf0Spatrick return SelectVOP3BMods(In, Src, SrcMods);
268373471bf0Spatrick }
268473471bf0Spatrick
SelectVOP3OMods(SDValue In,SDValue & Src,SDValue & Clamp,SDValue & Omod) const268509467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
268609467b48Spatrick SDValue &Clamp, SDValue &Omod) const {
268709467b48Spatrick Src = In;
268809467b48Spatrick
268909467b48Spatrick SDLoc DL(In);
269009467b48Spatrick Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
269109467b48Spatrick Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
269209467b48Spatrick
269309467b48Spatrick return true;
269409467b48Spatrick }
269509467b48Spatrick
SelectVOP3PMods(SDValue In,SDValue & Src,SDValue & SrcMods,bool IsDOT) const269609467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2697*d415bd75Srobert SDValue &SrcMods, bool IsDOT) const {
269809467b48Spatrick unsigned Mods = 0;
269909467b48Spatrick Src = In;
270009467b48Spatrick
270109467b48Spatrick if (Src.getOpcode() == ISD::FNEG) {
270209467b48Spatrick Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
270309467b48Spatrick Src = Src.getOperand(0);
270409467b48Spatrick }
270509467b48Spatrick
2706*d415bd75Srobert if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 &&
2707*d415bd75Srobert (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
270809467b48Spatrick unsigned VecMods = Mods;
270909467b48Spatrick
271009467b48Spatrick SDValue Lo = stripBitcast(Src.getOperand(0));
271109467b48Spatrick SDValue Hi = stripBitcast(Src.getOperand(1));
271209467b48Spatrick
271309467b48Spatrick if (Lo.getOpcode() == ISD::FNEG) {
271409467b48Spatrick Lo = stripBitcast(Lo.getOperand(0));
271509467b48Spatrick Mods ^= SISrcMods::NEG;
271609467b48Spatrick }
271709467b48Spatrick
271809467b48Spatrick if (Hi.getOpcode() == ISD::FNEG) {
271909467b48Spatrick Hi = stripBitcast(Hi.getOperand(0));
272009467b48Spatrick Mods ^= SISrcMods::NEG_HI;
272109467b48Spatrick }
272209467b48Spatrick
272309467b48Spatrick if (isExtractHiElt(Lo, Lo))
272409467b48Spatrick Mods |= SISrcMods::OP_SEL_0;
272509467b48Spatrick
272609467b48Spatrick if (isExtractHiElt(Hi, Hi))
272709467b48Spatrick Mods |= SISrcMods::OP_SEL_1;
272809467b48Spatrick
272973471bf0Spatrick unsigned VecSize = Src.getValueSizeInBits();
273009467b48Spatrick Lo = stripExtractLoElt(Lo);
273109467b48Spatrick Hi = stripExtractLoElt(Hi);
273209467b48Spatrick
273373471bf0Spatrick if (Lo.getValueSizeInBits() > VecSize) {
273473471bf0Spatrick Lo = CurDAG->getTargetExtractSubreg(
273573471bf0Spatrick (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
273673471bf0Spatrick MVT::getIntegerVT(VecSize), Lo);
273773471bf0Spatrick }
273873471bf0Spatrick
273973471bf0Spatrick if (Hi.getValueSizeInBits() > VecSize) {
274073471bf0Spatrick Hi = CurDAG->getTargetExtractSubreg(
274173471bf0Spatrick (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
274273471bf0Spatrick MVT::getIntegerVT(VecSize), Hi);
274373471bf0Spatrick }
274473471bf0Spatrick
274573471bf0Spatrick assert(Lo.getValueSizeInBits() <= VecSize &&
274673471bf0Spatrick Hi.getValueSizeInBits() <= VecSize);
274773471bf0Spatrick
274809467b48Spatrick if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
274909467b48Spatrick // Really a scalar input. Just select from the low half of the register to
275009467b48Spatrick // avoid packing.
275109467b48Spatrick
275273471bf0Spatrick if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
275309467b48Spatrick Src = Lo;
275473471bf0Spatrick } else {
275573471bf0Spatrick assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
275673471bf0Spatrick
275773471bf0Spatrick SDLoc SL(In);
275873471bf0Spatrick SDValue Undef = SDValue(
275973471bf0Spatrick CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
276073471bf0Spatrick Lo.getValueType()), 0);
276173471bf0Spatrick auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
276273471bf0Spatrick : AMDGPU::SReg_64RegClassID;
276373471bf0Spatrick const SDValue Ops[] = {
276473471bf0Spatrick CurDAG->getTargetConstant(RC, SL, MVT::i32),
276573471bf0Spatrick Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
276673471bf0Spatrick Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
276773471bf0Spatrick
276873471bf0Spatrick Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
276973471bf0Spatrick Src.getValueType(), Ops), 0);
277073471bf0Spatrick }
277109467b48Spatrick SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
277209467b48Spatrick return true;
277309467b48Spatrick }
277409467b48Spatrick
277573471bf0Spatrick if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
277673471bf0Spatrick uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
277773471bf0Spatrick .bitcastToAPInt().getZExtValue();
277873471bf0Spatrick if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
277973471bf0Spatrick Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
278073471bf0Spatrick SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
278173471bf0Spatrick return true;
278273471bf0Spatrick }
278373471bf0Spatrick }
278473471bf0Spatrick
278509467b48Spatrick Mods = VecMods;
278609467b48Spatrick }
278709467b48Spatrick
278809467b48Spatrick // Packed instructions do not have abs modifiers.
278909467b48Spatrick Mods |= SISrcMods::OP_SEL_1;
279009467b48Spatrick
279109467b48Spatrick SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
279209467b48Spatrick return true;
279309467b48Spatrick }
279409467b48Spatrick
SelectVOP3PModsDOT(SDValue In,SDValue & Src,SDValue & SrcMods) const2795*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
2796*d415bd75Srobert SDValue &SrcMods) const {
2797*d415bd75Srobert return SelectVOP3PMods(In, Src, SrcMods, true);
2798*d415bd75Srobert }
2799*d415bd75Srobert
SelectDotIUVOP3PMods(SDValue In,SDValue & Src) const2800*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const {
2801*d415bd75Srobert const ConstantSDNode *C = cast<ConstantSDNode>(In);
2802*d415bd75Srobert // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
2803*d415bd75Srobert // 1 promotes packed values to signed, 0 treats them as unsigned.
2804*d415bd75Srobert assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
2805*d415bd75Srobert
2806*d415bd75Srobert unsigned Mods = SISrcMods::OP_SEL_1;
2807*d415bd75Srobert unsigned SrcSign = C->getAPIntValue().getZExtValue();
2808*d415bd75Srobert if (SrcSign == 1)
2809*d415bd75Srobert Mods ^= SISrcMods::NEG;
2810*d415bd75Srobert
2811*d415bd75Srobert Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2812*d415bd75Srobert return true;
2813*d415bd75Srobert }
2814*d415bd75Srobert
SelectWMMAOpSelVOP3PMods(SDValue In,SDValue & Src) const2815*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
2816*d415bd75Srobert SDValue &Src) const {
2817*d415bd75Srobert const ConstantSDNode *C = cast<ConstantSDNode>(In);
2818*d415bd75Srobert assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
2819*d415bd75Srobert
2820*d415bd75Srobert unsigned Mods = SISrcMods::OP_SEL_1;
2821*d415bd75Srobert unsigned SrcVal = C->getAPIntValue().getZExtValue();
2822*d415bd75Srobert if (SrcVal == 1)
2823*d415bd75Srobert Mods |= SISrcMods::OP_SEL_0;
2824*d415bd75Srobert
2825*d415bd75Srobert Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2826*d415bd75Srobert return true;
2827*d415bd75Srobert }
2828*d415bd75Srobert
SelectVOP3OpSel(SDValue In,SDValue & Src,SDValue & SrcMods) const282909467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
283009467b48Spatrick SDValue &SrcMods) const {
283109467b48Spatrick Src = In;
283209467b48Spatrick // FIXME: Handle op_sel
283309467b48Spatrick SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
283409467b48Spatrick return true;
283509467b48Spatrick }
283609467b48Spatrick
SelectVOP3OpSelMods(SDValue In,SDValue & Src,SDValue & SrcMods) const283709467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
283809467b48Spatrick SDValue &SrcMods) const {
283909467b48Spatrick // FIXME: Handle op_sel
284009467b48Spatrick return SelectVOP3Mods(In, Src, SrcMods);
284109467b48Spatrick }
284209467b48Spatrick
284309467b48Spatrick // The return value is not whether the match is possible (which it always is),
284409467b48Spatrick // but whether or not it a conversion is really used.
SelectVOP3PMadMixModsImpl(SDValue In,SDValue & Src,unsigned & Mods) const284509467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
284609467b48Spatrick unsigned &Mods) const {
284709467b48Spatrick Mods = 0;
284809467b48Spatrick SelectVOP3ModsImpl(In, Src, Mods);
284909467b48Spatrick
285009467b48Spatrick if (Src.getOpcode() == ISD::FP_EXTEND) {
285109467b48Spatrick Src = Src.getOperand(0);
285209467b48Spatrick assert(Src.getValueType() == MVT::f16);
285309467b48Spatrick Src = stripBitcast(Src);
285409467b48Spatrick
285509467b48Spatrick // Be careful about folding modifiers if we already have an abs. fneg is
285609467b48Spatrick // applied last, so we don't want to apply an earlier fneg.
285709467b48Spatrick if ((Mods & SISrcMods::ABS) == 0) {
285809467b48Spatrick unsigned ModsTmp;
285909467b48Spatrick SelectVOP3ModsImpl(Src, Src, ModsTmp);
286009467b48Spatrick
286109467b48Spatrick if ((ModsTmp & SISrcMods::NEG) != 0)
286209467b48Spatrick Mods ^= SISrcMods::NEG;
286309467b48Spatrick
286409467b48Spatrick if ((ModsTmp & SISrcMods::ABS) != 0)
286509467b48Spatrick Mods |= SISrcMods::ABS;
286609467b48Spatrick }
286709467b48Spatrick
286809467b48Spatrick // op_sel/op_sel_hi decide the source type and source.
286909467b48Spatrick // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
287009467b48Spatrick // If the sources's op_sel is set, it picks the high half of the source
287109467b48Spatrick // register.
287209467b48Spatrick
287309467b48Spatrick Mods |= SISrcMods::OP_SEL_1;
287409467b48Spatrick if (isExtractHiElt(Src, Src)) {
287509467b48Spatrick Mods |= SISrcMods::OP_SEL_0;
287609467b48Spatrick
287709467b48Spatrick // TODO: Should we try to look for neg/abs here?
287809467b48Spatrick }
287909467b48Spatrick
288009467b48Spatrick return true;
288109467b48Spatrick }
288209467b48Spatrick
288309467b48Spatrick return false;
288409467b48Spatrick }
288509467b48Spatrick
SelectVOP3PMadMixMods(SDValue In,SDValue & Src,SDValue & SrcMods) const288609467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
288709467b48Spatrick SDValue &SrcMods) const {
288809467b48Spatrick unsigned Mods = 0;
288909467b48Spatrick SelectVOP3PMadMixModsImpl(In, Src, Mods);
289009467b48Spatrick SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
289109467b48Spatrick return true;
289209467b48Spatrick }
289309467b48Spatrick
getHi16Elt(SDValue In) const289409467b48Spatrick SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
289509467b48Spatrick if (In.isUndef())
289609467b48Spatrick return CurDAG->getUNDEF(MVT::i32);
289709467b48Spatrick
289809467b48Spatrick if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
289909467b48Spatrick SDLoc SL(In);
290009467b48Spatrick return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
290109467b48Spatrick }
290209467b48Spatrick
290309467b48Spatrick if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
290409467b48Spatrick SDLoc SL(In);
290509467b48Spatrick return CurDAG->getConstant(
290609467b48Spatrick C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
290709467b48Spatrick }
290809467b48Spatrick
290909467b48Spatrick SDValue Src;
291009467b48Spatrick if (isExtractHiElt(In, Src))
291109467b48Spatrick return Src;
291209467b48Spatrick
291309467b48Spatrick return SDValue();
291409467b48Spatrick }
291509467b48Spatrick
isVGPRImm(const SDNode * N) const291609467b48Spatrick bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
291709467b48Spatrick assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
291809467b48Spatrick
291909467b48Spatrick const SIRegisterInfo *SIRI =
292009467b48Spatrick static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
292109467b48Spatrick const SIInstrInfo * SII =
292209467b48Spatrick static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
292309467b48Spatrick
292409467b48Spatrick unsigned Limit = 0;
292509467b48Spatrick bool AllUsesAcceptSReg = true;
292609467b48Spatrick for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
292709467b48Spatrick Limit < 10 && U != E; ++U, ++Limit) {
292809467b48Spatrick const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
292909467b48Spatrick
293009467b48Spatrick // If the register class is unknown, it could be an unknown
293109467b48Spatrick // register class that needs to be an SGPR, e.g. an inline asm
293209467b48Spatrick // constraint
293309467b48Spatrick if (!RC || SIRI->isSGPRClass(RC))
293409467b48Spatrick return false;
293509467b48Spatrick
293609467b48Spatrick if (RC != &AMDGPU::VS_32RegClass) {
293709467b48Spatrick AllUsesAcceptSReg = false;
293809467b48Spatrick SDNode * User = *U;
293909467b48Spatrick if (User->isMachineOpcode()) {
294009467b48Spatrick unsigned Opc = User->getMachineOpcode();
2941*d415bd75Srobert const MCInstrDesc &Desc = SII->get(Opc);
294209467b48Spatrick if (Desc.isCommutable()) {
294309467b48Spatrick unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
294409467b48Spatrick unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
294509467b48Spatrick if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
294609467b48Spatrick unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
294709467b48Spatrick const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
294809467b48Spatrick if (CommutedRC == &AMDGPU::VS_32RegClass)
294909467b48Spatrick AllUsesAcceptSReg = true;
295009467b48Spatrick }
295109467b48Spatrick }
295209467b48Spatrick }
2953*d415bd75Srobert // If "AllUsesAcceptSReg == false" so far we haven't succeeded
295409467b48Spatrick // commuting current user. This means have at least one use
295509467b48Spatrick // that strictly require VGPR. Thus, we will not attempt to commute
295609467b48Spatrick // other user instructions.
295709467b48Spatrick if (!AllUsesAcceptSReg)
295809467b48Spatrick break;
295909467b48Spatrick }
296009467b48Spatrick }
296109467b48Spatrick return !AllUsesAcceptSReg && (Limit < 10);
296209467b48Spatrick }
296309467b48Spatrick
isUniformLoad(const SDNode * N) const296409467b48Spatrick bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
296509467b48Spatrick auto Ld = cast<LoadSDNode>(N);
296609467b48Spatrick
2967*d415bd75Srobert if (N->isDivergent() && !AMDGPUInstrInfo::isUniformMMO(Ld->getMemOperand()))
2968*d415bd75Srobert return false;
2969*d415bd75Srobert
2970*d415bd75Srobert return Ld->getAlign() >= Align(4) &&
2971*d415bd75Srobert ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2972*d415bd75Srobert Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||
2973*d415bd75Srobert (Subtarget->getScalarizeGlobalBehavior() &&
297409467b48Spatrick Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2975097a140dSpatrick Ld->isSimple() &&
2976*d415bd75Srobert static_cast<const SITargetLowering *>(getTargetLowering())
2977*d415bd75Srobert ->isMemOpHasNoClobberedMemOperand(N)));
297809467b48Spatrick }
297909467b48Spatrick
PostprocessISelDAG()298009467b48Spatrick void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
298109467b48Spatrick const AMDGPUTargetLowering& Lowering =
298209467b48Spatrick *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
298309467b48Spatrick bool IsModified = false;
298409467b48Spatrick do {
298509467b48Spatrick IsModified = false;
298609467b48Spatrick
298709467b48Spatrick // Go over all selected nodes and try to fold them a bit more
298809467b48Spatrick SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
298909467b48Spatrick while (Position != CurDAG->allnodes_end()) {
299009467b48Spatrick SDNode *Node = &*Position++;
299109467b48Spatrick MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
299209467b48Spatrick if (!MachineNode)
299309467b48Spatrick continue;
299409467b48Spatrick
299509467b48Spatrick SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
299609467b48Spatrick if (ResNode != Node) {
299709467b48Spatrick if (ResNode)
299809467b48Spatrick ReplaceUses(Node, ResNode);
299909467b48Spatrick IsModified = true;
300009467b48Spatrick }
300109467b48Spatrick }
300209467b48Spatrick CurDAG->RemoveDeadNodes();
300309467b48Spatrick } while (IsModified);
300409467b48Spatrick }
300509467b48Spatrick
3006*d415bd75Srobert char AMDGPUDAGToDAGISel::ID = 0;
3007