xref: /openbsd-src/gnu/llvm/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
109467b48Spatrick //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //==-----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick /// \file
1009467b48Spatrick /// Defines an instruction selector for the AMDGPU target.
1109467b48Spatrick //
1209467b48Spatrick //===----------------------------------------------------------------------===//
1309467b48Spatrick 
14*d415bd75Srobert #include "AMDGPUISelDAGToDAG.h"
1509467b48Spatrick #include "AMDGPU.h"
16*d415bd75Srobert #include "AMDGPUInstrInfo.h"
17*d415bd75Srobert #include "AMDGPUSubtarget.h"
1809467b48Spatrick #include "AMDGPUTargetMachine.h"
19*d415bd75Srobert #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20*d415bd75Srobert #include "MCTargetDesc/R600MCTargetDesc.h"
21*d415bd75Srobert #include "R600RegisterInfo.h"
2209467b48Spatrick #include "SIMachineFunctionInfo.h"
2309467b48Spatrick #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
2409467b48Spatrick #include "llvm/Analysis/ValueTracking.h"
2509467b48Spatrick #include "llvm/CodeGen/FunctionLoweringInfo.h"
2609467b48Spatrick #include "llvm/CodeGen/SelectionDAG.h"
2709467b48Spatrick #include "llvm/CodeGen/SelectionDAGISel.h"
2809467b48Spatrick #include "llvm/CodeGen/SelectionDAGNodes.h"
2973471bf0Spatrick #include "llvm/IR/IntrinsicsAMDGPU.h"
3009467b48Spatrick #include "llvm/InitializePasses.h"
3173471bf0Spatrick 
3209467b48Spatrick #ifdef EXPENSIVE_CHECKS
3373471bf0Spatrick #include "llvm/Analysis/LoopInfo.h"
3409467b48Spatrick #include "llvm/IR/Dominators.h"
3509467b48Spatrick #endif
3609467b48Spatrick 
37*d415bd75Srobert #define DEBUG_TYPE "amdgpu-isel"
3809467b48Spatrick 
3909467b48Spatrick using namespace llvm;
4009467b48Spatrick 
4109467b48Spatrick //===----------------------------------------------------------------------===//
4209467b48Spatrick // Instruction Selector Implementation
4309467b48Spatrick //===----------------------------------------------------------------------===//
4409467b48Spatrick 
4509467b48Spatrick namespace {
stripBitcast(SDValue Val)4609467b48Spatrick static SDValue stripBitcast(SDValue Val) {
4709467b48Spatrick   return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
4809467b48Spatrick }
4909467b48Spatrick 
5009467b48Spatrick // Figure out if this is really an extract of the high 16-bits of a dword.
isExtractHiElt(SDValue In,SDValue & Out)5109467b48Spatrick static bool isExtractHiElt(SDValue In, SDValue &Out) {
5209467b48Spatrick   In = stripBitcast(In);
5373471bf0Spatrick 
5473471bf0Spatrick   if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
5573471bf0Spatrick     if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
5673471bf0Spatrick       if (!Idx->isOne())
5773471bf0Spatrick         return false;
5873471bf0Spatrick       Out = In.getOperand(0);
5973471bf0Spatrick       return true;
6073471bf0Spatrick     }
6173471bf0Spatrick   }
6273471bf0Spatrick 
6309467b48Spatrick   if (In.getOpcode() != ISD::TRUNCATE)
6409467b48Spatrick     return false;
6509467b48Spatrick 
6609467b48Spatrick   SDValue Srl = In.getOperand(0);
6709467b48Spatrick   if (Srl.getOpcode() == ISD::SRL) {
6809467b48Spatrick     if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
6909467b48Spatrick       if (ShiftAmt->getZExtValue() == 16) {
7009467b48Spatrick         Out = stripBitcast(Srl.getOperand(0));
7109467b48Spatrick         return true;
7209467b48Spatrick       }
7309467b48Spatrick     }
7409467b48Spatrick   }
7509467b48Spatrick 
7609467b48Spatrick   return false;
7709467b48Spatrick }
7809467b48Spatrick 
7909467b48Spatrick // Look through operations that obscure just looking at the low 16-bits of the
8009467b48Spatrick // same register.
stripExtractLoElt(SDValue In)8109467b48Spatrick static SDValue stripExtractLoElt(SDValue In) {
8273471bf0Spatrick   if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
8373471bf0Spatrick     if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
84*d415bd75Srobert       if (Idx->isZero() && In.getValueSizeInBits() <= 32)
8573471bf0Spatrick         return In.getOperand(0);
8673471bf0Spatrick     }
8773471bf0Spatrick   }
8873471bf0Spatrick 
8909467b48Spatrick   if (In.getOpcode() == ISD::TRUNCATE) {
9009467b48Spatrick     SDValue Src = In.getOperand(0);
9109467b48Spatrick     if (Src.getValueType().getSizeInBits() == 32)
9209467b48Spatrick       return stripBitcast(Src);
9309467b48Spatrick   }
9409467b48Spatrick 
9509467b48Spatrick   return In;
9609467b48Spatrick }
9709467b48Spatrick 
9809467b48Spatrick } // end anonymous namespace
9909467b48Spatrick 
10009467b48Spatrick INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
10109467b48Spatrick                       "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)10209467b48Spatrick INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
10309467b48Spatrick INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
10409467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
10509467b48Spatrick #ifdef EXPENSIVE_CHECKS
10609467b48Spatrick INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
10709467b48Spatrick INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
10809467b48Spatrick #endif
10909467b48Spatrick INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",
11009467b48Spatrick                     "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
11109467b48Spatrick 
11209467b48Spatrick /// This pass converts a legalized DAG into a AMDGPU-specific
11309467b48Spatrick // DAG, ready for instruction scheduling.
114*d415bd75Srobert FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
11509467b48Spatrick                                         CodeGenOpt::Level OptLevel) {
11609467b48Spatrick   return new AMDGPUDAGToDAGISel(TM, OptLevel);
11709467b48Spatrick }
11809467b48Spatrick 
AMDGPUDAGToDAGISel(TargetMachine & TM,CodeGenOpt::Level OptLevel)119*d415bd75Srobert AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM,
120*d415bd75Srobert                                        CodeGenOpt::Level OptLevel)
121*d415bd75Srobert     : SelectionDAGISel(ID, TM, OptLevel) {
122*d415bd75Srobert   EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
12309467b48Spatrick }
12409467b48Spatrick 
runOnMachineFunction(MachineFunction & MF)12509467b48Spatrick bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
12609467b48Spatrick #ifdef EXPENSIVE_CHECKS
12709467b48Spatrick   DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
12809467b48Spatrick   LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
12909467b48Spatrick   for (auto &L : LI->getLoopsInPreorder()) {
13009467b48Spatrick     assert(L->isLCSSAForm(DT));
13109467b48Spatrick   }
13209467b48Spatrick #endif
13309467b48Spatrick   Subtarget = &MF.getSubtarget<GCNSubtarget>();
134097a140dSpatrick   Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
13509467b48Spatrick   return SelectionDAGISel::runOnMachineFunction(MF);
13609467b48Spatrick }
13709467b48Spatrick 
fp16SrcZerosHighBits(unsigned Opc) const13873471bf0Spatrick bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
13973471bf0Spatrick   // XXX - only need to list legal operations.
14073471bf0Spatrick   switch (Opc) {
14173471bf0Spatrick   case ISD::FADD:
14273471bf0Spatrick   case ISD::FSUB:
14373471bf0Spatrick   case ISD::FMUL:
14473471bf0Spatrick   case ISD::FDIV:
14573471bf0Spatrick   case ISD::FREM:
14673471bf0Spatrick   case ISD::FCANONICALIZE:
14773471bf0Spatrick   case ISD::UINT_TO_FP:
14873471bf0Spatrick   case ISD::SINT_TO_FP:
14973471bf0Spatrick   case ISD::FABS:
15073471bf0Spatrick     // Fabs is lowered to a bit operation, but it's an and which will clear the
15173471bf0Spatrick     // high bits anyway.
15273471bf0Spatrick   case ISD::FSQRT:
15373471bf0Spatrick   case ISD::FSIN:
15473471bf0Spatrick   case ISD::FCOS:
15573471bf0Spatrick   case ISD::FPOWI:
15673471bf0Spatrick   case ISD::FPOW:
15773471bf0Spatrick   case ISD::FLOG:
15873471bf0Spatrick   case ISD::FLOG2:
15973471bf0Spatrick   case ISD::FLOG10:
16073471bf0Spatrick   case ISD::FEXP:
16173471bf0Spatrick   case ISD::FEXP2:
16273471bf0Spatrick   case ISD::FCEIL:
16373471bf0Spatrick   case ISD::FTRUNC:
16473471bf0Spatrick   case ISD::FRINT:
16573471bf0Spatrick   case ISD::FNEARBYINT:
16673471bf0Spatrick   case ISD::FROUND:
16773471bf0Spatrick   case ISD::FFLOOR:
16873471bf0Spatrick   case ISD::FMINNUM:
16973471bf0Spatrick   case ISD::FMAXNUM:
17073471bf0Spatrick   case AMDGPUISD::FRACT:
17173471bf0Spatrick   case AMDGPUISD::CLAMP:
17273471bf0Spatrick   case AMDGPUISD::COS_HW:
17373471bf0Spatrick   case AMDGPUISD::SIN_HW:
17473471bf0Spatrick   case AMDGPUISD::FMIN3:
17573471bf0Spatrick   case AMDGPUISD::FMAX3:
17673471bf0Spatrick   case AMDGPUISD::FMED3:
17773471bf0Spatrick   case AMDGPUISD::FMAD_FTZ:
17873471bf0Spatrick   case AMDGPUISD::RCP:
17973471bf0Spatrick   case AMDGPUISD::RSQ:
18073471bf0Spatrick   case AMDGPUISD::RCP_IFLAG:
18173471bf0Spatrick   case AMDGPUISD::LDEXP:
18273471bf0Spatrick     // On gfx10, all 16-bit instructions preserve the high bits.
18373471bf0Spatrick     return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
18473471bf0Spatrick   case ISD::FP_ROUND:
18573471bf0Spatrick     // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the
18673471bf0Spatrick     // high bits on gfx9.
18773471bf0Spatrick     // TODO: If we had the source node we could see if the source was fma/mad
18873471bf0Spatrick     return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
18973471bf0Spatrick   case ISD::FMA:
19073471bf0Spatrick   case ISD::FMAD:
19173471bf0Spatrick   case AMDGPUISD::DIV_FIXUP:
19273471bf0Spatrick     return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
19373471bf0Spatrick   default:
19473471bf0Spatrick     // fcopysign, select and others may be lowered to 32-bit bit operations
19573471bf0Spatrick     // which don't zero the high bits.
19673471bf0Spatrick     return false;
19773471bf0Spatrick   }
19873471bf0Spatrick }
19973471bf0Spatrick 
getAnalysisUsage(AnalysisUsage & AU) const200*d415bd75Srobert void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
201*d415bd75Srobert   AU.addRequired<AMDGPUArgumentUsageInfo>();
202*d415bd75Srobert   AU.addRequired<LegacyDivergenceAnalysis>();
203*d415bd75Srobert #ifdef EXPENSIVE_CHECKS
204*d415bd75Srobert   AU.addRequired<DominatorTreeWrapperPass>();
205*d415bd75Srobert   AU.addRequired<LoopInfoWrapperPass>();
206*d415bd75Srobert #endif
207*d415bd75Srobert   SelectionDAGISel::getAnalysisUsage(AU);
208*d415bd75Srobert }
209*d415bd75Srobert 
matchLoadD16FromBuildVector(SDNode * N) const21009467b48Spatrick bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
21109467b48Spatrick   assert(Subtarget->d16PreservesUnusedBits());
21209467b48Spatrick   MVT VT = N->getValueType(0).getSimpleVT();
21309467b48Spatrick   if (VT != MVT::v2i16 && VT != MVT::v2f16)
21409467b48Spatrick     return false;
21509467b48Spatrick 
21609467b48Spatrick   SDValue Lo = N->getOperand(0);
21709467b48Spatrick   SDValue Hi = N->getOperand(1);
21809467b48Spatrick 
21909467b48Spatrick   LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
22009467b48Spatrick 
22109467b48Spatrick   // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
22209467b48Spatrick   // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
22309467b48Spatrick   // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
22409467b48Spatrick 
22509467b48Spatrick   // Need to check for possible indirect dependencies on the other half of the
22609467b48Spatrick   // vector to avoid introducing a cycle.
22709467b48Spatrick   if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
22809467b48Spatrick     SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
22909467b48Spatrick 
23009467b48Spatrick     SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
23109467b48Spatrick     SDValue Ops[] = {
23209467b48Spatrick       LdHi->getChain(), LdHi->getBasePtr(), TiedIn
23309467b48Spatrick     };
23409467b48Spatrick 
23509467b48Spatrick     unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
23609467b48Spatrick     if (LdHi->getMemoryVT() == MVT::i8) {
23709467b48Spatrick       LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
23809467b48Spatrick         AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
23909467b48Spatrick     } else {
24009467b48Spatrick       assert(LdHi->getMemoryVT() == MVT::i16);
24109467b48Spatrick     }
24209467b48Spatrick 
24309467b48Spatrick     SDValue NewLoadHi =
24409467b48Spatrick       CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
24509467b48Spatrick                                   Ops, LdHi->getMemoryVT(),
24609467b48Spatrick                                   LdHi->getMemOperand());
24709467b48Spatrick 
24809467b48Spatrick     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
24909467b48Spatrick     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
25009467b48Spatrick     return true;
25109467b48Spatrick   }
25209467b48Spatrick 
25309467b48Spatrick   // build_vector (load ptr), hi -> load_d16_lo ptr, hi
25409467b48Spatrick   // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
25509467b48Spatrick   // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
25609467b48Spatrick   LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
25709467b48Spatrick   if (LdLo && Lo.hasOneUse()) {
25809467b48Spatrick     SDValue TiedIn = getHi16Elt(Hi);
25909467b48Spatrick     if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
26009467b48Spatrick       return false;
26109467b48Spatrick 
26209467b48Spatrick     SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
26309467b48Spatrick     unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
26409467b48Spatrick     if (LdLo->getMemoryVT() == MVT::i8) {
26509467b48Spatrick       LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
26609467b48Spatrick         AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
26709467b48Spatrick     } else {
26809467b48Spatrick       assert(LdLo->getMemoryVT() == MVT::i16);
26909467b48Spatrick     }
27009467b48Spatrick 
27109467b48Spatrick     TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
27209467b48Spatrick 
27309467b48Spatrick     SDValue Ops[] = {
27409467b48Spatrick       LdLo->getChain(), LdLo->getBasePtr(), TiedIn
27509467b48Spatrick     };
27609467b48Spatrick 
27709467b48Spatrick     SDValue NewLoadLo =
27809467b48Spatrick       CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
27909467b48Spatrick                                   Ops, LdLo->getMemoryVT(),
28009467b48Spatrick                                   LdLo->getMemOperand());
28109467b48Spatrick 
28209467b48Spatrick     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
28309467b48Spatrick     CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
28409467b48Spatrick     return true;
28509467b48Spatrick   }
28609467b48Spatrick 
28709467b48Spatrick   return false;
28809467b48Spatrick }
28909467b48Spatrick 
PreprocessISelDAG()29009467b48Spatrick void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
29109467b48Spatrick   if (!Subtarget->d16PreservesUnusedBits())
29209467b48Spatrick     return;
29309467b48Spatrick 
29409467b48Spatrick   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
29509467b48Spatrick 
29609467b48Spatrick   bool MadeChange = false;
29709467b48Spatrick   while (Position != CurDAG->allnodes_begin()) {
29809467b48Spatrick     SDNode *N = &*--Position;
29909467b48Spatrick     if (N->use_empty())
30009467b48Spatrick       continue;
30109467b48Spatrick 
30209467b48Spatrick     switch (N->getOpcode()) {
30309467b48Spatrick     case ISD::BUILD_VECTOR:
30409467b48Spatrick       MadeChange |= matchLoadD16FromBuildVector(N);
30509467b48Spatrick       break;
30609467b48Spatrick     default:
30709467b48Spatrick       break;
30809467b48Spatrick     }
30909467b48Spatrick   }
31009467b48Spatrick 
31109467b48Spatrick   if (MadeChange) {
31209467b48Spatrick     CurDAG->RemoveDeadNodes();
31309467b48Spatrick     LLVM_DEBUG(dbgs() << "After PreProcess:\n";
31409467b48Spatrick                CurDAG->dump(););
31509467b48Spatrick   }
31609467b48Spatrick }
31709467b48Spatrick 
isInlineImmediate(const SDNode * N,bool Negated) const31809467b48Spatrick bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N,
31909467b48Spatrick                                            bool Negated) const {
32009467b48Spatrick   if (N->isUndef())
32109467b48Spatrick     return true;
32209467b48Spatrick 
32309467b48Spatrick   const SIInstrInfo *TII = Subtarget->getInstrInfo();
32409467b48Spatrick   if (Negated) {
32509467b48Spatrick     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
32609467b48Spatrick       return TII->isInlineConstant(-C->getAPIntValue());
32709467b48Spatrick 
32809467b48Spatrick     if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
32909467b48Spatrick       return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt());
33009467b48Spatrick 
33109467b48Spatrick   } else {
33209467b48Spatrick     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
33309467b48Spatrick       return TII->isInlineConstant(C->getAPIntValue());
33409467b48Spatrick 
33509467b48Spatrick     if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
33609467b48Spatrick       return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
33709467b48Spatrick   }
33809467b48Spatrick 
33909467b48Spatrick   return false;
34009467b48Spatrick }
34109467b48Spatrick 
34209467b48Spatrick /// Determine the register class for \p OpNo
34309467b48Spatrick /// \returns The register class of the virtual register that will be used for
34409467b48Spatrick /// the given operand number \OpNo or NULL if the register class cannot be
34509467b48Spatrick /// determined.
getOperandRegClass(SDNode * N,unsigned OpNo) const34609467b48Spatrick const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
34709467b48Spatrick                                                           unsigned OpNo) const {
34809467b48Spatrick   if (!N->isMachineOpcode()) {
34909467b48Spatrick     if (N->getOpcode() == ISD::CopyToReg) {
35073471bf0Spatrick       Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
35173471bf0Spatrick       if (Reg.isVirtual()) {
35209467b48Spatrick         MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
35309467b48Spatrick         return MRI.getRegClass(Reg);
35409467b48Spatrick       }
35509467b48Spatrick 
35609467b48Spatrick       const SIRegisterInfo *TRI
35709467b48Spatrick         = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
358*d415bd75Srobert       return TRI->getPhysRegBaseClass(Reg);
35909467b48Spatrick     }
36009467b48Spatrick 
36109467b48Spatrick     return nullptr;
36209467b48Spatrick   }
36309467b48Spatrick 
36409467b48Spatrick   switch (N->getMachineOpcode()) {
36509467b48Spatrick   default: {
36609467b48Spatrick     const MCInstrDesc &Desc =
36709467b48Spatrick         Subtarget->getInstrInfo()->get(N->getMachineOpcode());
36809467b48Spatrick     unsigned OpIdx = Desc.getNumDefs() + OpNo;
36909467b48Spatrick     if (OpIdx >= Desc.getNumOperands())
37009467b48Spatrick       return nullptr;
371*d415bd75Srobert     int RegClass = Desc.operands()[OpIdx].RegClass;
37209467b48Spatrick     if (RegClass == -1)
37309467b48Spatrick       return nullptr;
37409467b48Spatrick 
37509467b48Spatrick     return Subtarget->getRegisterInfo()->getRegClass(RegClass);
37609467b48Spatrick   }
37709467b48Spatrick   case AMDGPU::REG_SEQUENCE: {
37809467b48Spatrick     unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
37909467b48Spatrick     const TargetRegisterClass *SuperRC =
38009467b48Spatrick         Subtarget->getRegisterInfo()->getRegClass(RCID);
38109467b48Spatrick 
38209467b48Spatrick     SDValue SubRegOp = N->getOperand(OpNo + 1);
38309467b48Spatrick     unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
38409467b48Spatrick     return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
38509467b48Spatrick                                                               SubRegIdx);
38609467b48Spatrick   }
38709467b48Spatrick   }
38809467b48Spatrick }
38909467b48Spatrick 
glueCopyToOp(SDNode * N,SDValue NewChain,SDValue Glue) const39009467b48Spatrick SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain,
39109467b48Spatrick                                          SDValue Glue) const {
39209467b48Spatrick   SmallVector <SDValue, 8> Ops;
39309467b48Spatrick   Ops.push_back(NewChain); // Replace the chain.
39409467b48Spatrick   for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
39509467b48Spatrick     Ops.push_back(N->getOperand(i));
39609467b48Spatrick 
39709467b48Spatrick   Ops.push_back(Glue);
39809467b48Spatrick   return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
39909467b48Spatrick }
40009467b48Spatrick 
glueCopyToM0(SDNode * N,SDValue Val) const40109467b48Spatrick SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
40209467b48Spatrick   const SITargetLowering& Lowering =
40309467b48Spatrick     *static_cast<const SITargetLowering*>(getTargetLowering());
40409467b48Spatrick 
40509467b48Spatrick   assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
40609467b48Spatrick 
40709467b48Spatrick   SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val);
40809467b48Spatrick   return glueCopyToOp(N, M0, M0.getValue(1));
40909467b48Spatrick }
41009467b48Spatrick 
glueCopyToM0LDSInit(SDNode * N) const41109467b48Spatrick SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
41209467b48Spatrick   unsigned AS = cast<MemSDNode>(N)->getAddressSpace();
41309467b48Spatrick   if (AS == AMDGPUAS::LOCAL_ADDRESS) {
41409467b48Spatrick     if (Subtarget->ldsRequiresM0Init())
41509467b48Spatrick       return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
41609467b48Spatrick   } else if (AS == AMDGPUAS::REGION_ADDRESS) {
41709467b48Spatrick     MachineFunction &MF = CurDAG->getMachineFunction();
41809467b48Spatrick     unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
41909467b48Spatrick     return
42009467b48Spatrick         glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
42109467b48Spatrick   }
42209467b48Spatrick   return N;
42309467b48Spatrick }
42409467b48Spatrick 
buildSMovImm64(SDLoc & DL,uint64_t Imm,EVT VT) const42509467b48Spatrick MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
42609467b48Spatrick                                                   EVT VT) const {
42709467b48Spatrick   SDNode *Lo = CurDAG->getMachineNode(
42809467b48Spatrick       AMDGPU::S_MOV_B32, DL, MVT::i32,
42909467b48Spatrick       CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32));
43009467b48Spatrick   SDNode *Hi =
43109467b48Spatrick       CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
43209467b48Spatrick                              CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32));
43309467b48Spatrick   const SDValue Ops[] = {
43409467b48Spatrick       CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
43509467b48Spatrick       SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
43609467b48Spatrick       SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
43709467b48Spatrick 
43809467b48Spatrick   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
43909467b48Spatrick }
44009467b48Spatrick 
SelectBuildVector(SDNode * N,unsigned RegClassID)44109467b48Spatrick void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
44209467b48Spatrick   EVT VT = N->getValueType(0);
44309467b48Spatrick   unsigned NumVectorElts = VT.getVectorNumElements();
44409467b48Spatrick   EVT EltVT = VT.getVectorElementType();
44509467b48Spatrick   SDLoc DL(N);
44609467b48Spatrick   SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
44709467b48Spatrick 
44809467b48Spatrick   if (NumVectorElts == 1) {
44909467b48Spatrick     CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
45009467b48Spatrick                          RegClass);
45109467b48Spatrick     return;
45209467b48Spatrick   }
45309467b48Spatrick 
45409467b48Spatrick   assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
45509467b48Spatrick                                   "supported yet");
45609467b48Spatrick   // 32 = Max Num Vector Elements
45709467b48Spatrick   // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
45809467b48Spatrick   // 1 = Vector Register Class
45909467b48Spatrick   SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
46009467b48Spatrick 
461097a140dSpatrick   bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() ==
462097a140dSpatrick                Triple::amdgcn;
46309467b48Spatrick   RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
46409467b48Spatrick   bool IsRegSeq = true;
46509467b48Spatrick   unsigned NOps = N->getNumOperands();
46609467b48Spatrick   for (unsigned i = 0; i < NOps; i++) {
46709467b48Spatrick     // XXX: Why is this here?
46809467b48Spatrick     if (isa<RegisterSDNode>(N->getOperand(i))) {
46909467b48Spatrick       IsRegSeq = false;
47009467b48Spatrick       break;
47109467b48Spatrick     }
472097a140dSpatrick     unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
473097a140dSpatrick                          : R600RegisterInfo::getSubRegFromChannel(i);
47409467b48Spatrick     RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
47509467b48Spatrick     RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
47609467b48Spatrick   }
47709467b48Spatrick   if (NOps != NumVectorElts) {
47809467b48Spatrick     // Fill in the missing undef elements if this was a scalar_to_vector.
47909467b48Spatrick     assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
48009467b48Spatrick     MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
48109467b48Spatrick                                                    DL, EltVT);
48209467b48Spatrick     for (unsigned i = NOps; i < NumVectorElts; ++i) {
483097a140dSpatrick       unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
484097a140dSpatrick                            : R600RegisterInfo::getSubRegFromChannel(i);
48509467b48Spatrick       RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
48609467b48Spatrick       RegSeqArgs[1 + (2 * i) + 1] =
48709467b48Spatrick           CurDAG->getTargetConstant(Sub, DL, MVT::i32);
48809467b48Spatrick     }
48909467b48Spatrick   }
49009467b48Spatrick 
49109467b48Spatrick   if (!IsRegSeq)
49209467b48Spatrick     SelectCode(N);
49309467b48Spatrick   CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
49409467b48Spatrick }
49509467b48Spatrick 
Select(SDNode * N)49609467b48Spatrick void AMDGPUDAGToDAGISel::Select(SDNode *N) {
49709467b48Spatrick   unsigned int Opc = N->getOpcode();
49809467b48Spatrick   if (N->isMachineOpcode()) {
49909467b48Spatrick     N->setNodeId(-1);
50009467b48Spatrick     return;   // Already selected.
50109467b48Spatrick   }
50209467b48Spatrick 
50309467b48Spatrick   // isa<MemSDNode> almost works but is slightly too permissive for some DS
50409467b48Spatrick   // intrinsics.
50509467b48Spatrick   if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
50609467b48Spatrick       (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
50709467b48Spatrick        Opc == ISD::ATOMIC_LOAD_FADD ||
50809467b48Spatrick        Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
50973471bf0Spatrick        Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
51009467b48Spatrick     N = glueCopyToM0LDSInit(N);
51109467b48Spatrick     SelectCode(N);
51209467b48Spatrick     return;
51309467b48Spatrick   }
51409467b48Spatrick 
51509467b48Spatrick   switch (Opc) {
51609467b48Spatrick   default:
51709467b48Spatrick     break;
51809467b48Spatrick   // We are selecting i64 ADD here instead of custom lower it during
51909467b48Spatrick   // DAG legalization, so we can fold some i64 ADDs used for address
52009467b48Spatrick   // calculation into the LOAD and STORE instructions.
52109467b48Spatrick   case ISD::ADDC:
52209467b48Spatrick   case ISD::ADDE:
52309467b48Spatrick   case ISD::SUBC:
52409467b48Spatrick   case ISD::SUBE: {
52509467b48Spatrick     if (N->getValueType(0) != MVT::i64)
52609467b48Spatrick       break;
52709467b48Spatrick 
52809467b48Spatrick     SelectADD_SUB_I64(N);
52909467b48Spatrick     return;
53009467b48Spatrick   }
53109467b48Spatrick   case ISD::ADDCARRY:
53209467b48Spatrick   case ISD::SUBCARRY:
53309467b48Spatrick     if (N->getValueType(0) != MVT::i32)
53409467b48Spatrick       break;
53509467b48Spatrick 
53609467b48Spatrick     SelectAddcSubb(N);
53709467b48Spatrick     return;
53809467b48Spatrick   case ISD::UADDO:
53909467b48Spatrick   case ISD::USUBO: {
54009467b48Spatrick     SelectUADDO_USUBO(N);
54109467b48Spatrick     return;
54209467b48Spatrick   }
54309467b48Spatrick   case AMDGPUISD::FMUL_W_CHAIN: {
54409467b48Spatrick     SelectFMUL_W_CHAIN(N);
54509467b48Spatrick     return;
54609467b48Spatrick   }
54709467b48Spatrick   case AMDGPUISD::FMA_W_CHAIN: {
54809467b48Spatrick     SelectFMA_W_CHAIN(N);
54909467b48Spatrick     return;
55009467b48Spatrick   }
55109467b48Spatrick 
55209467b48Spatrick   case ISD::SCALAR_TO_VECTOR:
55309467b48Spatrick   case ISD::BUILD_VECTOR: {
55409467b48Spatrick     EVT VT = N->getValueType(0);
55509467b48Spatrick     unsigned NumVectorElts = VT.getVectorNumElements();
55609467b48Spatrick     if (VT.getScalarSizeInBits() == 16) {
55709467b48Spatrick       if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
55809467b48Spatrick         if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
55909467b48Spatrick           ReplaceNode(N, Packed);
56009467b48Spatrick           return;
56109467b48Spatrick         }
56209467b48Spatrick       }
56309467b48Spatrick 
56409467b48Spatrick       break;
56509467b48Spatrick     }
56609467b48Spatrick 
56709467b48Spatrick     assert(VT.getVectorElementType().bitsEq(MVT::i32));
568097a140dSpatrick     unsigned RegClassID =
569097a140dSpatrick         SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
57009467b48Spatrick     SelectBuildVector(N, RegClassID);
57109467b48Spatrick     return;
57209467b48Spatrick   }
57309467b48Spatrick   case ISD::BUILD_PAIR: {
57409467b48Spatrick     SDValue RC, SubReg0, SubReg1;
57509467b48Spatrick     SDLoc DL(N);
57609467b48Spatrick     if (N->getValueType(0) == MVT::i128) {
57709467b48Spatrick       RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
57809467b48Spatrick       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
57909467b48Spatrick       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
58009467b48Spatrick     } else if (N->getValueType(0) == MVT::i64) {
58109467b48Spatrick       RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
58209467b48Spatrick       SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
58309467b48Spatrick       SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
58409467b48Spatrick     } else {
58509467b48Spatrick       llvm_unreachable("Unhandled value type for BUILD_PAIR");
58609467b48Spatrick     }
58709467b48Spatrick     const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
58809467b48Spatrick                             N->getOperand(1), SubReg1 };
58909467b48Spatrick     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
59009467b48Spatrick                                           N->getValueType(0), Ops));
59109467b48Spatrick     return;
59209467b48Spatrick   }
59309467b48Spatrick 
59409467b48Spatrick   case ISD::Constant:
59509467b48Spatrick   case ISD::ConstantFP: {
59609467b48Spatrick     if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
59709467b48Spatrick       break;
59809467b48Spatrick 
59909467b48Spatrick     uint64_t Imm;
60009467b48Spatrick     if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
60109467b48Spatrick       Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
60209467b48Spatrick     else {
60309467b48Spatrick       ConstantSDNode *C = cast<ConstantSDNode>(N);
60409467b48Spatrick       Imm = C->getZExtValue();
60509467b48Spatrick     }
60609467b48Spatrick 
60709467b48Spatrick     SDLoc DL(N);
60809467b48Spatrick     ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
60909467b48Spatrick     return;
61009467b48Spatrick   }
61109467b48Spatrick   case AMDGPUISD::BFE_I32:
61209467b48Spatrick   case AMDGPUISD::BFE_U32: {
61309467b48Spatrick     // There is a scalar version available, but unlike the vector version which
61409467b48Spatrick     // has a separate operand for the offset and width, the scalar version packs
61509467b48Spatrick     // the width and offset into a single operand. Try to move to the scalar
61609467b48Spatrick     // version if the offsets are constant, so that we can try to keep extended
61709467b48Spatrick     // loads of kernel arguments in SGPRs.
61809467b48Spatrick 
61909467b48Spatrick     // TODO: Technically we could try to pattern match scalar bitshifts of
62009467b48Spatrick     // dynamic values, but it's probably not useful.
62109467b48Spatrick     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
62209467b48Spatrick     if (!Offset)
62309467b48Spatrick       break;
62409467b48Spatrick 
62509467b48Spatrick     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
62609467b48Spatrick     if (!Width)
62709467b48Spatrick       break;
62809467b48Spatrick 
62909467b48Spatrick     bool Signed = Opc == AMDGPUISD::BFE_I32;
63009467b48Spatrick 
63109467b48Spatrick     uint32_t OffsetVal = Offset->getZExtValue();
63209467b48Spatrick     uint32_t WidthVal = Width->getZExtValue();
63309467b48Spatrick 
634*d415bd75Srobert     ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
635*d415bd75Srobert                             WidthVal));
63609467b48Spatrick     return;
63709467b48Spatrick   }
63809467b48Spatrick   case AMDGPUISD::DIV_SCALE: {
63909467b48Spatrick     SelectDIV_SCALE(N);
64009467b48Spatrick     return;
64109467b48Spatrick   }
64209467b48Spatrick   case AMDGPUISD::MAD_I64_I32:
64309467b48Spatrick   case AMDGPUISD::MAD_U64_U32: {
64409467b48Spatrick     SelectMAD_64_32(N);
64509467b48Spatrick     return;
64609467b48Spatrick   }
647*d415bd75Srobert   case ISD::SMUL_LOHI:
648*d415bd75Srobert   case ISD::UMUL_LOHI:
649*d415bd75Srobert     return SelectMUL_LOHI(N);
65009467b48Spatrick   case ISD::CopyToReg: {
65109467b48Spatrick     const SITargetLowering& Lowering =
65209467b48Spatrick       *static_cast<const SITargetLowering*>(getTargetLowering());
65309467b48Spatrick     N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
65409467b48Spatrick     break;
65509467b48Spatrick   }
65609467b48Spatrick   case ISD::AND:
65709467b48Spatrick   case ISD::SRL:
65809467b48Spatrick   case ISD::SRA:
65909467b48Spatrick   case ISD::SIGN_EXTEND_INREG:
66009467b48Spatrick     if (N->getValueType(0) != MVT::i32)
66109467b48Spatrick       break;
66209467b48Spatrick 
66309467b48Spatrick     SelectS_BFE(N);
66409467b48Spatrick     return;
66509467b48Spatrick   case ISD::BRCOND:
66609467b48Spatrick     SelectBRCOND(N);
66709467b48Spatrick     return;
66809467b48Spatrick   case ISD::FMAD:
66909467b48Spatrick   case ISD::FMA:
67009467b48Spatrick     SelectFMAD_FMA(N);
67109467b48Spatrick     return;
67209467b48Spatrick   case AMDGPUISD::CVT_PKRTZ_F16_F32:
67309467b48Spatrick   case AMDGPUISD::CVT_PKNORM_I16_F32:
67409467b48Spatrick   case AMDGPUISD::CVT_PKNORM_U16_F32:
67509467b48Spatrick   case AMDGPUISD::CVT_PK_U16_U32:
67609467b48Spatrick   case AMDGPUISD::CVT_PK_I16_I32: {
67709467b48Spatrick     // Hack around using a legal type if f16 is illegal.
67809467b48Spatrick     if (N->getValueType(0) == MVT::i32) {
67909467b48Spatrick       MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
68009467b48Spatrick       N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
68109467b48Spatrick                               { N->getOperand(0), N->getOperand(1) });
68209467b48Spatrick       SelectCode(N);
68309467b48Spatrick       return;
68409467b48Spatrick     }
68509467b48Spatrick 
68609467b48Spatrick     break;
68709467b48Spatrick   }
68809467b48Spatrick   case ISD::INTRINSIC_W_CHAIN: {
68909467b48Spatrick     SelectINTRINSIC_W_CHAIN(N);
69009467b48Spatrick     return;
69109467b48Spatrick   }
69209467b48Spatrick   case ISD::INTRINSIC_WO_CHAIN: {
69309467b48Spatrick     SelectINTRINSIC_WO_CHAIN(N);
69409467b48Spatrick     return;
69509467b48Spatrick   }
69609467b48Spatrick   case ISD::INTRINSIC_VOID: {
69709467b48Spatrick     SelectINTRINSIC_VOID(N);
69809467b48Spatrick     return;
69909467b48Spatrick   }
70009467b48Spatrick   }
70109467b48Spatrick 
70209467b48Spatrick   SelectCode(N);
70309467b48Spatrick }
70409467b48Spatrick 
isUniformBr(const SDNode * N) const70509467b48Spatrick bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
70609467b48Spatrick   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
70709467b48Spatrick   const Instruction *Term = BB->getTerminator();
70809467b48Spatrick   return Term->getMetadata("amdgpu.uniform") ||
70909467b48Spatrick          Term->getMetadata("structurizecfg.uniform");
71009467b48Spatrick }
71109467b48Spatrick 
isUnneededShiftMask(const SDNode * N,unsigned ShAmtBits) const712*d415bd75Srobert bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
713*d415bd75Srobert                                              unsigned ShAmtBits) const {
714*d415bd75Srobert   assert(N->getOpcode() == ISD::AND);
715*d415bd75Srobert 
716*d415bd75Srobert   const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
717*d415bd75Srobert   if (RHS.countTrailingOnes() >= ShAmtBits)
718*d415bd75Srobert     return true;
719*d415bd75Srobert 
720*d415bd75Srobert   const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
721*d415bd75Srobert   return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits;
722*d415bd75Srobert }
723*d415bd75Srobert 
getBaseWithOffsetUsingSplitOR(SelectionDAG & DAG,SDValue Addr,SDValue & N0,SDValue & N1)72473471bf0Spatrick static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
72573471bf0Spatrick                                           SDValue &N0, SDValue &N1) {
72673471bf0Spatrick   if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
72773471bf0Spatrick       Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
72873471bf0Spatrick     // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
72973471bf0Spatrick     // (i64 (bitcast (v2i32 (build_vector
73073471bf0Spatrick     //                        (or (extract_vector_elt V, 0), OFFSET),
73173471bf0Spatrick     //                        (extract_vector_elt V, 1)))))
73273471bf0Spatrick     SDValue Lo = Addr.getOperand(0).getOperand(0);
73373471bf0Spatrick     if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
73473471bf0Spatrick       SDValue BaseLo = Lo.getOperand(0);
73573471bf0Spatrick       SDValue BaseHi = Addr.getOperand(0).getOperand(1);
73673471bf0Spatrick       // Check that split base (Lo and Hi) are extracted from the same one.
73773471bf0Spatrick       if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
73873471bf0Spatrick           BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
73973471bf0Spatrick           BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
74073471bf0Spatrick           // Lo is statically extracted from index 0.
74173471bf0Spatrick           isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
74273471bf0Spatrick           BaseLo.getConstantOperandVal(1) == 0 &&
74373471bf0Spatrick           // Hi is statically extracted from index 0.
74473471bf0Spatrick           isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
74573471bf0Spatrick           BaseHi.getConstantOperandVal(1) == 1) {
74673471bf0Spatrick         N0 = BaseLo.getOperand(0).getOperand(0);
74773471bf0Spatrick         N1 = Lo.getOperand(1);
74873471bf0Spatrick         return true;
74973471bf0Spatrick       }
75073471bf0Spatrick     }
75173471bf0Spatrick   }
75273471bf0Spatrick   return false;
75373471bf0Spatrick }
75473471bf0Spatrick 
isBaseWithConstantOffset64(SDValue Addr,SDValue & LHS,SDValue & RHS) const75573471bf0Spatrick bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
75673471bf0Spatrick                                                     SDValue &RHS) const {
75773471bf0Spatrick   if (CurDAG->isBaseWithConstantOffset(Addr)) {
75873471bf0Spatrick     LHS = Addr.getOperand(0);
75973471bf0Spatrick     RHS = Addr.getOperand(1);
76073471bf0Spatrick     return true;
76173471bf0Spatrick   }
76273471bf0Spatrick 
76373471bf0Spatrick   if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) {
76473471bf0Spatrick     assert(LHS && RHS && isa<ConstantSDNode>(RHS));
76573471bf0Spatrick     return true;
76673471bf0Spatrick   }
76773471bf0Spatrick 
76873471bf0Spatrick   return false;
76973471bf0Spatrick }
77073471bf0Spatrick 
getPassName() const77109467b48Spatrick StringRef AMDGPUDAGToDAGISel::getPassName() const {
77209467b48Spatrick   return "AMDGPU DAG->DAG Pattern Instruction Selection";
77309467b48Spatrick }
77409467b48Spatrick 
77509467b48Spatrick //===----------------------------------------------------------------------===//
77609467b48Spatrick // Complex Patterns
77709467b48Spatrick //===----------------------------------------------------------------------===//
77809467b48Spatrick 
SelectADDRVTX_READ(SDValue Addr,SDValue & Base,SDValue & Offset)77909467b48Spatrick bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
78009467b48Spatrick                                             SDValue &Offset) {
78109467b48Spatrick   return false;
78209467b48Spatrick }
78309467b48Spatrick 
SelectADDRIndirect(SDValue Addr,SDValue & Base,SDValue & Offset)78409467b48Spatrick bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
78509467b48Spatrick                                             SDValue &Offset) {
78609467b48Spatrick   ConstantSDNode *C;
78709467b48Spatrick   SDLoc DL(Addr);
78809467b48Spatrick 
78909467b48Spatrick   if ((C = dyn_cast<ConstantSDNode>(Addr))) {
79009467b48Spatrick     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
79109467b48Spatrick     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
79209467b48Spatrick   } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
79309467b48Spatrick              (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
79409467b48Spatrick     Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
79509467b48Spatrick     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
79609467b48Spatrick   } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
79709467b48Spatrick             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
79809467b48Spatrick     Base = Addr.getOperand(0);
79909467b48Spatrick     Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
80009467b48Spatrick   } else {
80109467b48Spatrick     Base = Addr;
80209467b48Spatrick     Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
80309467b48Spatrick   }
80409467b48Spatrick 
80509467b48Spatrick   return true;
80609467b48Spatrick }
80709467b48Spatrick 
getMaterializedScalarImm32(int64_t Val,const SDLoc & DL) const80809467b48Spatrick SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
80909467b48Spatrick                                                        const SDLoc &DL) const {
81009467b48Spatrick   SDNode *Mov = CurDAG->getMachineNode(
81109467b48Spatrick     AMDGPU::S_MOV_B32, DL, MVT::i32,
81209467b48Spatrick     CurDAG->getTargetConstant(Val, DL, MVT::i32));
81309467b48Spatrick   return SDValue(Mov, 0);
81409467b48Spatrick }
81509467b48Spatrick 
81609467b48Spatrick // FIXME: Should only handle addcarry/subcarry
SelectADD_SUB_I64(SDNode * N)81709467b48Spatrick void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
81809467b48Spatrick   SDLoc DL(N);
81909467b48Spatrick   SDValue LHS = N->getOperand(0);
82009467b48Spatrick   SDValue RHS = N->getOperand(1);
82109467b48Spatrick 
82209467b48Spatrick   unsigned Opcode = N->getOpcode();
82309467b48Spatrick   bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
82409467b48Spatrick   bool ProduceCarry =
82509467b48Spatrick       ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
82609467b48Spatrick   bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
82709467b48Spatrick 
82809467b48Spatrick   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
82909467b48Spatrick   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
83009467b48Spatrick 
83109467b48Spatrick   SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
83209467b48Spatrick                                        DL, MVT::i32, LHS, Sub0);
83309467b48Spatrick   SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
83409467b48Spatrick                                        DL, MVT::i32, LHS, Sub1);
83509467b48Spatrick 
83609467b48Spatrick   SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
83709467b48Spatrick                                        DL, MVT::i32, RHS, Sub0);
83809467b48Spatrick   SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
83909467b48Spatrick                                        DL, MVT::i32, RHS, Sub1);
84009467b48Spatrick 
84109467b48Spatrick   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
84209467b48Spatrick 
843097a140dSpatrick   static const unsigned OpcMap[2][2][2] = {
844097a140dSpatrick       {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
84573471bf0Spatrick        {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
846097a140dSpatrick       {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
847097a140dSpatrick        {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
848097a140dSpatrick 
849097a140dSpatrick   unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
850097a140dSpatrick   unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
85109467b48Spatrick 
85209467b48Spatrick   SDNode *AddLo;
85309467b48Spatrick   if (!ConsumeCarry) {
85409467b48Spatrick     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
85509467b48Spatrick     AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
85609467b48Spatrick   } else {
85709467b48Spatrick     SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
85809467b48Spatrick     AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
85909467b48Spatrick   }
86009467b48Spatrick   SDValue AddHiArgs[] = {
86109467b48Spatrick     SDValue(Hi0, 0),
86209467b48Spatrick     SDValue(Hi1, 0),
86309467b48Spatrick     SDValue(AddLo, 1)
86409467b48Spatrick   };
86509467b48Spatrick   SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
86609467b48Spatrick 
86709467b48Spatrick   SDValue RegSequenceArgs[] = {
86809467b48Spatrick     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
86909467b48Spatrick     SDValue(AddLo,0),
87009467b48Spatrick     Sub0,
87109467b48Spatrick     SDValue(AddHi,0),
87209467b48Spatrick     Sub1,
87309467b48Spatrick   };
87409467b48Spatrick   SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
87509467b48Spatrick                                                MVT::i64, RegSequenceArgs);
87609467b48Spatrick 
87709467b48Spatrick   if (ProduceCarry) {
87809467b48Spatrick     // Replace the carry-use
87909467b48Spatrick     ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
88009467b48Spatrick   }
88109467b48Spatrick 
88209467b48Spatrick   // Replace the remaining uses.
88309467b48Spatrick   ReplaceNode(N, RegSequence);
88409467b48Spatrick }
88509467b48Spatrick 
SelectAddcSubb(SDNode * N)88609467b48Spatrick void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
88709467b48Spatrick   SDLoc DL(N);
88809467b48Spatrick   SDValue LHS = N->getOperand(0);
88909467b48Spatrick   SDValue RHS = N->getOperand(1);
89009467b48Spatrick   SDValue CI = N->getOperand(2);
89109467b48Spatrick 
892097a140dSpatrick   if (N->isDivergent()) {
89309467b48Spatrick     unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
89409467b48Spatrick                                                    : AMDGPU::V_SUBB_U32_e64;
89509467b48Spatrick     CurDAG->SelectNodeTo(
89609467b48Spatrick         N, Opc, N->getVTList(),
897097a140dSpatrick         {LHS, RHS, CI,
898097a140dSpatrick          CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
899097a140dSpatrick   } else {
900097a140dSpatrick     unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
901097a140dSpatrick                                                    : AMDGPU::S_SUB_CO_PSEUDO;
902097a140dSpatrick     CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
903097a140dSpatrick   }
90409467b48Spatrick }
90509467b48Spatrick 
SelectUADDO_USUBO(SDNode * N)90609467b48Spatrick void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
90709467b48Spatrick   // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
90809467b48Spatrick   // carry out despite the _i32 name. These were renamed in VI to _U32.
90909467b48Spatrick   // FIXME: We should probably rename the opcodes here.
910097a140dSpatrick   bool IsAdd = N->getOpcode() == ISD::UADDO;
911097a140dSpatrick   bool IsVALU = N->isDivergent();
912097a140dSpatrick 
913097a140dSpatrick   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
914097a140dSpatrick        ++UI)
915097a140dSpatrick     if (UI.getUse().getResNo() == 1) {
916097a140dSpatrick       if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
917097a140dSpatrick           (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
918097a140dSpatrick         IsVALU = true;
919097a140dSpatrick         break;
920097a140dSpatrick       }
921097a140dSpatrick     }
922097a140dSpatrick 
923097a140dSpatrick   if (IsVALU) {
92473471bf0Spatrick     unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
92509467b48Spatrick 
92609467b48Spatrick     CurDAG->SelectNodeTo(
92709467b48Spatrick         N, Opc, N->getVTList(),
92809467b48Spatrick         {N->getOperand(0), N->getOperand(1),
92909467b48Spatrick          CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
930097a140dSpatrick   } else {
931097a140dSpatrick     unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
932097a140dSpatrick                                                 : AMDGPU::S_USUBO_PSEUDO;
933097a140dSpatrick 
934097a140dSpatrick     CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
935097a140dSpatrick                          {N->getOperand(0), N->getOperand(1)});
936097a140dSpatrick   }
93709467b48Spatrick }
93809467b48Spatrick 
SelectFMA_W_CHAIN(SDNode * N)93909467b48Spatrick void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
94009467b48Spatrick   SDLoc SL(N);
94109467b48Spatrick   //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
94209467b48Spatrick   SDValue Ops[10];
94309467b48Spatrick 
94409467b48Spatrick   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
94509467b48Spatrick   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
94609467b48Spatrick   SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
94709467b48Spatrick   Ops[8] = N->getOperand(0);
94809467b48Spatrick   Ops[9] = N->getOperand(4);
94909467b48Spatrick 
950*d415bd75Srobert   // If there are no source modifiers, prefer fmac over fma because it can use
951*d415bd75Srobert   // the smaller VOP2 encoding.
952*d415bd75Srobert   bool UseFMAC = Subtarget->hasDLInsts() &&
953*d415bd75Srobert                  cast<ConstantSDNode>(Ops[0])->isZero() &&
954*d415bd75Srobert                  cast<ConstantSDNode>(Ops[2])->isZero() &&
955*d415bd75Srobert                  cast<ConstantSDNode>(Ops[4])->isZero();
956*d415bd75Srobert   unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
957*d415bd75Srobert   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
95809467b48Spatrick }
95909467b48Spatrick 
SelectFMUL_W_CHAIN(SDNode * N)96009467b48Spatrick void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
96109467b48Spatrick   SDLoc SL(N);
96209467b48Spatrick   //    src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
96309467b48Spatrick   SDValue Ops[8];
96409467b48Spatrick 
96509467b48Spatrick   SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
96609467b48Spatrick   SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
96709467b48Spatrick   Ops[6] = N->getOperand(0);
96809467b48Spatrick   Ops[7] = N->getOperand(3);
96909467b48Spatrick 
97009467b48Spatrick   CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
97109467b48Spatrick }
97209467b48Spatrick 
97309467b48Spatrick // We need to handle this here because tablegen doesn't support matching
97409467b48Spatrick // instructions with multiple outputs.
SelectDIV_SCALE(SDNode * N)97509467b48Spatrick void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
97609467b48Spatrick   SDLoc SL(N);
97709467b48Spatrick   EVT VT = N->getValueType(0);
97809467b48Spatrick 
97909467b48Spatrick   assert(VT == MVT::f32 || VT == MVT::f64);
98009467b48Spatrick 
98109467b48Spatrick   unsigned Opc
98273471bf0Spatrick     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
98309467b48Spatrick 
98473471bf0Spatrick   // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
98573471bf0Spatrick   // omod
98673471bf0Spatrick   SDValue Ops[8];
98773471bf0Spatrick   SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
98873471bf0Spatrick   SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
98973471bf0Spatrick   SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
99009467b48Spatrick   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
99109467b48Spatrick }
99209467b48Spatrick 
99309467b48Spatrick // We need to handle this here because tablegen doesn't support matching
99409467b48Spatrick // instructions with multiple outputs.
SelectMAD_64_32(SDNode * N)99509467b48Spatrick void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
99609467b48Spatrick   SDLoc SL(N);
99709467b48Spatrick   bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
998*d415bd75Srobert   unsigned Opc;
999*d415bd75Srobert   if (Subtarget->hasMADIntraFwdBug())
1000*d415bd75Srobert     Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1001*d415bd75Srobert                  : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1002*d415bd75Srobert   else
1003*d415bd75Srobert     Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
100409467b48Spatrick 
100509467b48Spatrick   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
100609467b48Spatrick   SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
100709467b48Spatrick                     Clamp };
100809467b48Spatrick   CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
100909467b48Spatrick }
101009467b48Spatrick 
1011*d415bd75Srobert // We need to handle this here because tablegen doesn't support matching
1012*d415bd75Srobert // instructions with multiple outputs.
SelectMUL_LOHI(SDNode * N)1013*d415bd75Srobert void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
1014*d415bd75Srobert   SDLoc SL(N);
1015*d415bd75Srobert   bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
1016*d415bd75Srobert   unsigned Opc;
1017*d415bd75Srobert   if (Subtarget->hasMADIntraFwdBug())
1018*d415bd75Srobert     Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1019*d415bd75Srobert                  : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1020*d415bd75Srobert   else
1021*d415bd75Srobert     Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1022*d415bd75Srobert 
1023*d415bd75Srobert   SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
1024*d415bd75Srobert   SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1025*d415bd75Srobert   SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
1026*d415bd75Srobert   SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
1027*d415bd75Srobert   if (!SDValue(N, 0).use_empty()) {
1028*d415bd75Srobert     SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1029*d415bd75Srobert     SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1030*d415bd75Srobert                                         MVT::i32, SDValue(Mad, 0), Sub0);
1031*d415bd75Srobert     ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
1032*d415bd75Srobert   }
1033*d415bd75Srobert   if (!SDValue(N, 1).use_empty()) {
1034*d415bd75Srobert     SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1035*d415bd75Srobert     SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1036*d415bd75Srobert                                         MVT::i32, SDValue(Mad, 0), Sub1);
1037*d415bd75Srobert     ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
1038*d415bd75Srobert   }
1039*d415bd75Srobert   CurDAG->RemoveDeadNode(N);
1040*d415bd75Srobert }
1041*d415bd75Srobert 
isDSOffsetLegal(SDValue Base,unsigned Offset) const104273471bf0Spatrick bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
104373471bf0Spatrick   if (!isUInt<16>(Offset))
104409467b48Spatrick     return false;
104509467b48Spatrick 
104673471bf0Spatrick   if (!Base || Subtarget->hasUsableDSOffset() ||
104709467b48Spatrick       Subtarget->unsafeDSOffsetFoldingEnabled())
104809467b48Spatrick     return true;
104909467b48Spatrick 
105009467b48Spatrick   // On Southern Islands instruction with a negative base value and an offset
105109467b48Spatrick   // don't seem to work.
105209467b48Spatrick   return CurDAG->SignBitIsZero(Base);
105309467b48Spatrick }
105409467b48Spatrick 
SelectDS1Addr1Offset(SDValue Addr,SDValue & Base,SDValue & Offset) const105509467b48Spatrick bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
105609467b48Spatrick                                               SDValue &Offset) const {
105709467b48Spatrick   SDLoc DL(Addr);
105809467b48Spatrick   if (CurDAG->isBaseWithConstantOffset(Addr)) {
105909467b48Spatrick     SDValue N0 = Addr.getOperand(0);
106009467b48Spatrick     SDValue N1 = Addr.getOperand(1);
106109467b48Spatrick     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
106273471bf0Spatrick     if (isDSOffsetLegal(N0, C1->getSExtValue())) {
106309467b48Spatrick       // (add n0, c0)
106409467b48Spatrick       Base = N0;
106509467b48Spatrick       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
106609467b48Spatrick       return true;
106709467b48Spatrick     }
106809467b48Spatrick   } else if (Addr.getOpcode() == ISD::SUB) {
106909467b48Spatrick     // sub C, x -> add (sub 0, x), C
107009467b48Spatrick     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
107109467b48Spatrick       int64_t ByteOffset = C->getSExtValue();
107273471bf0Spatrick       if (isDSOffsetLegal(SDValue(), ByteOffset)) {
107309467b48Spatrick         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
107409467b48Spatrick 
107509467b48Spatrick         // XXX - This is kind of hacky. Create a dummy sub node so we can check
107609467b48Spatrick         // the known bits in isDSOffsetLegal. We need to emit the selected node
107709467b48Spatrick         // here, so this is thrown away.
107809467b48Spatrick         SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
107909467b48Spatrick                                       Zero, Addr.getOperand(1));
108009467b48Spatrick 
108173471bf0Spatrick         if (isDSOffsetLegal(Sub, ByteOffset)) {
108209467b48Spatrick           SmallVector<SDValue, 3> Opnds;
108309467b48Spatrick           Opnds.push_back(Zero);
108409467b48Spatrick           Opnds.push_back(Addr.getOperand(1));
108509467b48Spatrick 
108609467b48Spatrick           // FIXME: Select to VOP3 version for with-carry.
108773471bf0Spatrick           unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
108809467b48Spatrick           if (Subtarget->hasAddNoCarry()) {
108909467b48Spatrick             SubOp = AMDGPU::V_SUB_U32_e64;
109009467b48Spatrick             Opnds.push_back(
109109467b48Spatrick                 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
109209467b48Spatrick           }
109309467b48Spatrick 
109409467b48Spatrick           MachineSDNode *MachineSub =
109509467b48Spatrick               CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
109609467b48Spatrick 
109709467b48Spatrick           Base = SDValue(MachineSub, 0);
109809467b48Spatrick           Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
109909467b48Spatrick           return true;
110009467b48Spatrick         }
110109467b48Spatrick       }
110209467b48Spatrick     }
110309467b48Spatrick   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
110409467b48Spatrick     // If we have a constant address, prefer to put the constant into the
110509467b48Spatrick     // offset. This can save moves to load the constant address since multiple
110609467b48Spatrick     // operations can share the zero base address register, and enables merging
110709467b48Spatrick     // into read2 / write2 instructions.
110809467b48Spatrick 
110909467b48Spatrick     SDLoc DL(Addr);
111009467b48Spatrick 
111173471bf0Spatrick     if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
111209467b48Spatrick       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
111309467b48Spatrick       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
111409467b48Spatrick                                  DL, MVT::i32, Zero);
111509467b48Spatrick       Base = SDValue(MovZero, 0);
111609467b48Spatrick       Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
111709467b48Spatrick       return true;
111809467b48Spatrick     }
111909467b48Spatrick   }
112009467b48Spatrick 
112109467b48Spatrick   // default case
112209467b48Spatrick   Base = Addr;
112309467b48Spatrick   Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
112409467b48Spatrick   return true;
112509467b48Spatrick }
112609467b48Spatrick 
isDSOffset2Legal(SDValue Base,unsigned Offset0,unsigned Offset1,unsigned Size) const112773471bf0Spatrick bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
112873471bf0Spatrick                                           unsigned Offset1,
112973471bf0Spatrick                                           unsigned Size) const {
113073471bf0Spatrick   if (Offset0 % Size != 0 || Offset1 % Size != 0)
113173471bf0Spatrick     return false;
113273471bf0Spatrick   if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size))
113373471bf0Spatrick     return false;
113473471bf0Spatrick 
113573471bf0Spatrick   if (!Base || Subtarget->hasUsableDSOffset() ||
113673471bf0Spatrick       Subtarget->unsafeDSOffsetFoldingEnabled())
113773471bf0Spatrick     return true;
113873471bf0Spatrick 
113973471bf0Spatrick   // On Southern Islands instruction with a negative base value and an offset
114073471bf0Spatrick   // don't seem to work.
114173471bf0Spatrick   return CurDAG->SignBitIsZero(Base);
114273471bf0Spatrick }
114373471bf0Spatrick 
114409467b48Spatrick // TODO: If offset is too big, put low 16-bit into offset.
SelectDS64Bit4ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const114509467b48Spatrick bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
114609467b48Spatrick                                                    SDValue &Offset0,
114709467b48Spatrick                                                    SDValue &Offset1) const {
114873471bf0Spatrick   return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
114973471bf0Spatrick }
115073471bf0Spatrick 
SelectDS128Bit8ByteAligned(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1) const115173471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
115273471bf0Spatrick                                                     SDValue &Offset0,
115373471bf0Spatrick                                                     SDValue &Offset1) const {
115473471bf0Spatrick   return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
115573471bf0Spatrick }
115673471bf0Spatrick 
SelectDSReadWrite2(SDValue Addr,SDValue & Base,SDValue & Offset0,SDValue & Offset1,unsigned Size) const115773471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
115873471bf0Spatrick                                             SDValue &Offset0, SDValue &Offset1,
115973471bf0Spatrick                                             unsigned Size) const {
116009467b48Spatrick   SDLoc DL(Addr);
116109467b48Spatrick 
116209467b48Spatrick   if (CurDAG->isBaseWithConstantOffset(Addr)) {
116309467b48Spatrick     SDValue N0 = Addr.getOperand(0);
116409467b48Spatrick     SDValue N1 = Addr.getOperand(1);
116509467b48Spatrick     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
116673471bf0Spatrick     unsigned OffsetValue0 = C1->getZExtValue();
116773471bf0Spatrick     unsigned OffsetValue1 = OffsetValue0 + Size;
116873471bf0Spatrick 
116909467b48Spatrick     // (add n0, c0)
117073471bf0Spatrick     if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
117109467b48Spatrick       Base = N0;
117273471bf0Spatrick       Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
117373471bf0Spatrick       Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
117409467b48Spatrick       return true;
117509467b48Spatrick     }
117609467b48Spatrick   } else if (Addr.getOpcode() == ISD::SUB) {
117709467b48Spatrick     // sub C, x -> add (sub 0, x), C
117873471bf0Spatrick     if (const ConstantSDNode *C =
117973471bf0Spatrick             dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
118073471bf0Spatrick       unsigned OffsetValue0 = C->getZExtValue();
118173471bf0Spatrick       unsigned OffsetValue1 = OffsetValue0 + Size;
118209467b48Spatrick 
118373471bf0Spatrick       if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
118409467b48Spatrick         SDLoc DL(Addr);
118509467b48Spatrick         SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
118609467b48Spatrick 
118709467b48Spatrick         // XXX - This is kind of hacky. Create a dummy sub node so we can check
118809467b48Spatrick         // the known bits in isDSOffsetLegal. We need to emit the selected node
118909467b48Spatrick         // here, so this is thrown away.
119073471bf0Spatrick         SDValue Sub =
119173471bf0Spatrick             CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1));
119209467b48Spatrick 
119373471bf0Spatrick         if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
119409467b48Spatrick           SmallVector<SDValue, 3> Opnds;
119509467b48Spatrick           Opnds.push_back(Zero);
119609467b48Spatrick           Opnds.push_back(Addr.getOperand(1));
119773471bf0Spatrick           unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
119809467b48Spatrick           if (Subtarget->hasAddNoCarry()) {
119909467b48Spatrick             SubOp = AMDGPU::V_SUB_U32_e64;
120009467b48Spatrick             Opnds.push_back(
120109467b48Spatrick                 CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit
120209467b48Spatrick           }
120309467b48Spatrick 
120473471bf0Spatrick           MachineSDNode *MachineSub = CurDAG->getMachineNode(
120573471bf0Spatrick               SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds);
120609467b48Spatrick 
120709467b48Spatrick           Base = SDValue(MachineSub, 0);
120873471bf0Spatrick           Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
120973471bf0Spatrick           Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
121009467b48Spatrick           return true;
121109467b48Spatrick         }
121209467b48Spatrick       }
121309467b48Spatrick     }
121409467b48Spatrick   } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
121573471bf0Spatrick     unsigned OffsetValue0 = CAddr->getZExtValue();
121673471bf0Spatrick     unsigned OffsetValue1 = OffsetValue0 + Size;
121709467b48Spatrick 
121873471bf0Spatrick     if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
121909467b48Spatrick       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
122073471bf0Spatrick       MachineSDNode *MovZero =
122173471bf0Spatrick           CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
122209467b48Spatrick       Base = SDValue(MovZero, 0);
122373471bf0Spatrick       Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8);
122473471bf0Spatrick       Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8);
122509467b48Spatrick       return true;
122609467b48Spatrick     }
122709467b48Spatrick   }
122809467b48Spatrick 
122909467b48Spatrick   // default case
123009467b48Spatrick 
123109467b48Spatrick   Base = Addr;
123209467b48Spatrick   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
123309467b48Spatrick   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
123409467b48Spatrick   return true;
123509467b48Spatrick }
123609467b48Spatrick 
SelectMUBUF(SDValue Addr,SDValue & Ptr,SDValue & VAddr,SDValue & SOffset,SDValue & Offset,SDValue & Offen,SDValue & Idxen,SDValue & Addr64) const123773471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
123873471bf0Spatrick                                      SDValue &SOffset, SDValue &Offset,
123973471bf0Spatrick                                      SDValue &Offen, SDValue &Idxen,
124073471bf0Spatrick                                      SDValue &Addr64) const {
124109467b48Spatrick   // Subtarget prefers to use flat instruction
1242097a140dSpatrick   // FIXME: This should be a pattern predicate and not reach here
124309467b48Spatrick   if (Subtarget->useFlatForGlobal())
124409467b48Spatrick     return false;
124509467b48Spatrick 
124609467b48Spatrick   SDLoc DL(Addr);
124709467b48Spatrick 
124809467b48Spatrick   Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
124909467b48Spatrick   Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
125009467b48Spatrick   Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
125109467b48Spatrick   SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
125209467b48Spatrick 
125309467b48Spatrick   ConstantSDNode *C1 = nullptr;
125409467b48Spatrick   SDValue N0 = Addr;
125509467b48Spatrick   if (CurDAG->isBaseWithConstantOffset(Addr)) {
125609467b48Spatrick     C1 = cast<ConstantSDNode>(Addr.getOperand(1));
125709467b48Spatrick     if (isUInt<32>(C1->getZExtValue()))
125809467b48Spatrick       N0 = Addr.getOperand(0);
125909467b48Spatrick     else
126009467b48Spatrick       C1 = nullptr;
126109467b48Spatrick   }
126209467b48Spatrick 
126309467b48Spatrick   if (N0.getOpcode() == ISD::ADD) {
126409467b48Spatrick     // (add N2, N3) -> addr64, or
126509467b48Spatrick     // (add (add N2, N3), C1) -> addr64
126609467b48Spatrick     SDValue N2 = N0.getOperand(0);
126709467b48Spatrick     SDValue N3 = N0.getOperand(1);
126809467b48Spatrick     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
126909467b48Spatrick 
127009467b48Spatrick     if (N2->isDivergent()) {
127109467b48Spatrick       if (N3->isDivergent()) {
127209467b48Spatrick         // Both N2 and N3 are divergent. Use N0 (the result of the add) as the
127309467b48Spatrick         // addr64, and construct the resource from a 0 address.
127409467b48Spatrick         Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
127509467b48Spatrick         VAddr = N0;
127609467b48Spatrick       } else {
127709467b48Spatrick         // N2 is divergent, N3 is not.
127809467b48Spatrick         Ptr = N3;
127909467b48Spatrick         VAddr = N2;
128009467b48Spatrick       }
128109467b48Spatrick     } else {
128209467b48Spatrick       // N2 is not divergent.
128309467b48Spatrick       Ptr = N2;
128409467b48Spatrick       VAddr = N3;
128509467b48Spatrick     }
128609467b48Spatrick     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
128709467b48Spatrick   } else if (N0->isDivergent()) {
128809467b48Spatrick     // N0 is divergent. Use it as the addr64, and construct the resource from a
128909467b48Spatrick     // 0 address.
129009467b48Spatrick     Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
129109467b48Spatrick     VAddr = N0;
129209467b48Spatrick     Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
129309467b48Spatrick   } else {
129409467b48Spatrick     // N0 -> offset, or
129509467b48Spatrick     // (N0 + C1) -> offset
129609467b48Spatrick     VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
129709467b48Spatrick     Ptr = N0;
129809467b48Spatrick   }
129909467b48Spatrick 
130009467b48Spatrick   if (!C1) {
130109467b48Spatrick     // No offset.
130209467b48Spatrick     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
130309467b48Spatrick     return true;
130409467b48Spatrick   }
130509467b48Spatrick 
130609467b48Spatrick   if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
130709467b48Spatrick     // Legal offset for instruction.
130809467b48Spatrick     Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
130909467b48Spatrick     return true;
131009467b48Spatrick   }
131109467b48Spatrick 
131209467b48Spatrick   // Illegal offset, store it in soffset.
131309467b48Spatrick   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
131409467b48Spatrick   SOffset =
131509467b48Spatrick       SDValue(CurDAG->getMachineNode(
131609467b48Spatrick                   AMDGPU::S_MOV_B32, DL, MVT::i32,
131709467b48Spatrick                   CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
131809467b48Spatrick               0);
131909467b48Spatrick   return true;
132009467b48Spatrick }
132109467b48Spatrick 
SelectMUBUFAddr64(SDValue Addr,SDValue & SRsrc,SDValue & VAddr,SDValue & SOffset,SDValue & Offset) const132209467b48Spatrick bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
132309467b48Spatrick                                            SDValue &VAddr, SDValue &SOffset,
132473471bf0Spatrick                                            SDValue &Offset) const {
132509467b48Spatrick   SDValue Ptr, Offen, Idxen, Addr64;
132609467b48Spatrick 
132709467b48Spatrick   // addr64 bit was removed for volcanic islands.
1328097a140dSpatrick   // FIXME: This should be a pattern predicate and not reach here
132909467b48Spatrick   if (!Subtarget->hasAddr64())
133009467b48Spatrick     return false;
133109467b48Spatrick 
133273471bf0Spatrick   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
133309467b48Spatrick     return false;
133409467b48Spatrick 
133509467b48Spatrick   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
133609467b48Spatrick   if (C->getSExtValue()) {
133709467b48Spatrick     SDLoc DL(Addr);
133809467b48Spatrick 
133909467b48Spatrick     const SITargetLowering& Lowering =
134009467b48Spatrick       *static_cast<const SITargetLowering*>(getTargetLowering());
134109467b48Spatrick 
134209467b48Spatrick     SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
134309467b48Spatrick     return true;
134409467b48Spatrick   }
134509467b48Spatrick 
134609467b48Spatrick   return false;
134709467b48Spatrick }
134809467b48Spatrick 
foldFrameIndex(SDValue N) const134909467b48Spatrick std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1350097a140dSpatrick   SDLoc DL(N);
135109467b48Spatrick 
135273471bf0Spatrick   auto *FI = dyn_cast<FrameIndexSDNode>(N);
135373471bf0Spatrick   SDValue TFI =
135473471bf0Spatrick       FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
135509467b48Spatrick 
135673471bf0Spatrick   // We rebase the base address into an absolute stack address and hence
135773471bf0Spatrick   // use constant 0 for soffset. This value must be retained until
135873471bf0Spatrick   // frame elimination and eliminateFrameIndex will choose the appropriate
135973471bf0Spatrick   // frame register if need be.
1360*d415bd75Srobert   return std::pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
136109467b48Spatrick }
136209467b48Spatrick 
SelectMUBUFScratchOffen(SDNode * Parent,SDValue Addr,SDValue & Rsrc,SDValue & VAddr,SDValue & SOffset,SDValue & ImmOffset) const136309467b48Spatrick bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
136409467b48Spatrick                                                  SDValue Addr, SDValue &Rsrc,
136509467b48Spatrick                                                  SDValue &VAddr, SDValue &SOffset,
136609467b48Spatrick                                                  SDValue &ImmOffset) const {
136709467b48Spatrick 
136809467b48Spatrick   SDLoc DL(Addr);
136909467b48Spatrick   MachineFunction &MF = CurDAG->getMachineFunction();
137009467b48Spatrick   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
137109467b48Spatrick 
137209467b48Spatrick   Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
137309467b48Spatrick 
137409467b48Spatrick   if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
1375097a140dSpatrick     int64_t Imm = CAddr->getSExtValue();
1376097a140dSpatrick     const int64_t NullPtr =
1377097a140dSpatrick         AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
1378097a140dSpatrick     // Don't fold null pointer.
1379097a140dSpatrick     if (Imm != NullPtr) {
138009467b48Spatrick       SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
1381097a140dSpatrick       MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1382097a140dSpatrick         AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
138309467b48Spatrick       VAddr = SDValue(MovHighBits, 0);
138409467b48Spatrick 
138573471bf0Spatrick       SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
138609467b48Spatrick       ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
138709467b48Spatrick       return true;
138809467b48Spatrick     }
1389097a140dSpatrick   }
139009467b48Spatrick 
139109467b48Spatrick   if (CurDAG->isBaseWithConstantOffset(Addr)) {
139209467b48Spatrick     // (add n0, c1)
139309467b48Spatrick 
139409467b48Spatrick     SDValue N0 = Addr.getOperand(0);
139509467b48Spatrick     SDValue N1 = Addr.getOperand(1);
139609467b48Spatrick 
139709467b48Spatrick     // Offsets in vaddr must be positive if range checking is enabled.
139809467b48Spatrick     //
139909467b48Spatrick     // The total computation of vaddr + soffset + offset must not overflow.  If
140009467b48Spatrick     // vaddr is negative, even if offset is 0 the sgpr offset add will end up
140109467b48Spatrick     // overflowing.
140209467b48Spatrick     //
140309467b48Spatrick     // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
140409467b48Spatrick     // always perform a range check. If a negative vaddr base index was used,
140509467b48Spatrick     // this would fail the range check. The overall address computation would
140609467b48Spatrick     // compute a valid address, but this doesn't happen due to the range
140709467b48Spatrick     // check. For out-of-bounds MUBUF loads, a 0 is returned.
140809467b48Spatrick     //
140909467b48Spatrick     // Therefore it should be safe to fold any VGPR offset on gfx9 into the
141009467b48Spatrick     // MUBUF vaddr, but not on older subtargets which can only do this if the
141109467b48Spatrick     // sign bit is known 0.
141209467b48Spatrick     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
141309467b48Spatrick     if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
141409467b48Spatrick         (!Subtarget->privateMemoryResourceIsRangeChecked() ||
141509467b48Spatrick          CurDAG->SignBitIsZero(N0))) {
141609467b48Spatrick       std::tie(VAddr, SOffset) = foldFrameIndex(N0);
141709467b48Spatrick       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
141809467b48Spatrick       return true;
141909467b48Spatrick     }
142009467b48Spatrick   }
142109467b48Spatrick 
142209467b48Spatrick   // (node)
142309467b48Spatrick   std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
142409467b48Spatrick   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
142509467b48Spatrick   return true;
142609467b48Spatrick }
142709467b48Spatrick 
IsCopyFromSGPR(const SIRegisterInfo & TRI,SDValue Val)142873471bf0Spatrick static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
142973471bf0Spatrick   if (Val.getOpcode() != ISD::CopyFromReg)
143073471bf0Spatrick     return false;
1431*d415bd75Srobert   auto Reg = cast<RegisterSDNode>(Val.getOperand(1))->getReg();
1432*d415bd75Srobert   if (!Reg.isPhysical())
1433*d415bd75Srobert     return false;
1434*d415bd75Srobert   auto RC = TRI.getPhysRegBaseClass(Reg);
143573471bf0Spatrick   return RC && TRI.isSGPRClass(RC);
143673471bf0Spatrick }
143773471bf0Spatrick 
SelectMUBUFScratchOffset(SDNode * Parent,SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const143809467b48Spatrick bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
143909467b48Spatrick                                                   SDValue Addr,
144009467b48Spatrick                                                   SDValue &SRsrc,
144109467b48Spatrick                                                   SDValue &SOffset,
144209467b48Spatrick                                                   SDValue &Offset) const {
144373471bf0Spatrick   const SIRegisterInfo *TRI =
144473471bf0Spatrick       static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
144509467b48Spatrick   MachineFunction &MF = CurDAG->getMachineFunction();
144609467b48Spatrick   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
144773471bf0Spatrick   SDLoc DL(Addr);
144873471bf0Spatrick 
144973471bf0Spatrick   // CopyFromReg <sgpr>
145073471bf0Spatrick   if (IsCopyFromSGPR(*TRI, Addr)) {
145173471bf0Spatrick     SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
145273471bf0Spatrick     SOffset = Addr;
145373471bf0Spatrick     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
145473471bf0Spatrick     return true;
145573471bf0Spatrick   }
145673471bf0Spatrick 
145773471bf0Spatrick   ConstantSDNode *CAddr;
145873471bf0Spatrick   if (Addr.getOpcode() == ISD::ADD) {
145973471bf0Spatrick     // Add (CopyFromReg <sgpr>) <constant>
146073471bf0Spatrick     CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
146173471bf0Spatrick     if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
146273471bf0Spatrick       return false;
146373471bf0Spatrick     if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0)))
146473471bf0Spatrick       return false;
146573471bf0Spatrick 
146673471bf0Spatrick     SOffset = Addr.getOperand(0);
146773471bf0Spatrick   } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) &&
146873471bf0Spatrick              SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) {
146973471bf0Spatrick     // <constant>
147073471bf0Spatrick     SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
147173471bf0Spatrick   } else {
147273471bf0Spatrick     return false;
147373471bf0Spatrick   }
147409467b48Spatrick 
147509467b48Spatrick   SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
147609467b48Spatrick 
147709467b48Spatrick   Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
147809467b48Spatrick   return true;
147909467b48Spatrick }
148009467b48Spatrick 
SelectMUBUFOffset(SDValue Addr,SDValue & SRsrc,SDValue & SOffset,SDValue & Offset) const148109467b48Spatrick bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
148273471bf0Spatrick                                            SDValue &SOffset, SDValue &Offset
148373471bf0Spatrick                                            ) const {
148409467b48Spatrick   SDValue Ptr, VAddr, Offen, Idxen, Addr64;
148509467b48Spatrick   const SIInstrInfo *TII =
148609467b48Spatrick     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
148709467b48Spatrick 
148873471bf0Spatrick   if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
148909467b48Spatrick     return false;
149009467b48Spatrick 
149109467b48Spatrick   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
149209467b48Spatrick       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
149309467b48Spatrick       !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
149409467b48Spatrick     uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1495*d415bd75Srobert                     APInt::getAllOnes(32).getZExtValue(); // Size
149609467b48Spatrick     SDLoc DL(Addr);
149709467b48Spatrick 
149809467b48Spatrick     const SITargetLowering& Lowering =
149909467b48Spatrick       *static_cast<const SITargetLowering*>(getTargetLowering());
150009467b48Spatrick 
150109467b48Spatrick     SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
150209467b48Spatrick     return true;
150309467b48Spatrick   }
150409467b48Spatrick   return false;
150509467b48Spatrick }
150609467b48Spatrick 
150709467b48Spatrick // Find a load or store from corresponding pattern root.
150809467b48Spatrick // Roots may be build_vector, bitconvert or their combinations.
findMemSDNode(SDNode * N)150909467b48Spatrick static MemSDNode* findMemSDNode(SDNode *N) {
151009467b48Spatrick   N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode();
151109467b48Spatrick   if (MemSDNode *MN = dyn_cast<MemSDNode>(N))
151209467b48Spatrick     return MN;
151309467b48Spatrick   assert(isa<BuildVectorSDNode>(N));
151409467b48Spatrick   for (SDValue V : N->op_values())
151509467b48Spatrick     if (MemSDNode *MN =
151609467b48Spatrick           dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V)))
151709467b48Spatrick       return MN;
151809467b48Spatrick   llvm_unreachable("cannot find MemSDNode in the pattern!");
151909467b48Spatrick }
152009467b48Spatrick 
SelectFlatOffsetImpl(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset,uint64_t FlatVariant) const152173471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
152273471bf0Spatrick                                               SDValue &VAddr, SDValue &Offset,
152373471bf0Spatrick                                               uint64_t FlatVariant) const {
152409467b48Spatrick   int64_t OffsetVal = 0;
152509467b48Spatrick 
152673471bf0Spatrick   unsigned AS = findMemSDNode(N)->getAddressSpace();
152773471bf0Spatrick 
152873471bf0Spatrick   bool CanHaveFlatSegmentOffsetBug =
152973471bf0Spatrick       Subtarget->hasFlatSegmentOffsetBug() &&
153073471bf0Spatrick       FlatVariant == SIInstrFlags::FLAT &&
153173471bf0Spatrick       (AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS);
153273471bf0Spatrick 
153373471bf0Spatrick   if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1534097a140dSpatrick     SDValue N0, N1;
153573471bf0Spatrick     if (isBaseWithConstantOffset64(Addr, N0, N1)) {
153673471bf0Spatrick       int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
153709467b48Spatrick 
153809467b48Spatrick       const SIInstrInfo *TII = Subtarget->getInstrInfo();
153973471bf0Spatrick       if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
154009467b48Spatrick         Addr = N0;
154109467b48Spatrick         OffsetVal = COffsetVal;
154209467b48Spatrick       } else {
154309467b48Spatrick         // If the offset doesn't fit, put the low bits into the offset field and
154409467b48Spatrick         // add the rest.
154573471bf0Spatrick         //
154673471bf0Spatrick         // For a FLAT instruction the hardware decides whether to access
154773471bf0Spatrick         // global/scratch/shared memory based on the high bits of vaddr,
154873471bf0Spatrick         // ignoring the offset field, so we have to ensure that when we add
154973471bf0Spatrick         // remainder to vaddr it still points into the same underlying object.
155073471bf0Spatrick         // The easiest way to do that is to make sure that we split the offset
155173471bf0Spatrick         // into two pieces that are both >= 0 or both <= 0.
155209467b48Spatrick 
155309467b48Spatrick         SDLoc DL(N);
155473471bf0Spatrick         uint64_t RemainderOffset;
155509467b48Spatrick 
155673471bf0Spatrick         std::tie(OffsetVal, RemainderOffset) =
155773471bf0Spatrick             TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
155809467b48Spatrick 
155973471bf0Spatrick         SDValue AddOffsetLo =
156073471bf0Spatrick             getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
156173471bf0Spatrick         SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
156273471bf0Spatrick 
156373471bf0Spatrick         if (Addr.getValueType().getSizeInBits() == 32) {
156473471bf0Spatrick           SmallVector<SDValue, 3> Opnds;
156573471bf0Spatrick           Opnds.push_back(N0);
156673471bf0Spatrick           Opnds.push_back(AddOffsetLo);
156773471bf0Spatrick           unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
156873471bf0Spatrick           if (Subtarget->hasAddNoCarry()) {
156973471bf0Spatrick             AddOp = AMDGPU::V_ADD_U32_e64;
157073471bf0Spatrick             Opnds.push_back(Clamp);
157109467b48Spatrick           }
157273471bf0Spatrick           Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
157309467b48Spatrick         } else {
1574097a140dSpatrick           // TODO: Should this try to use a scalar add pseudo if the base address
1575097a140dSpatrick           // is uniform and saddr is usable?
157609467b48Spatrick           SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
157709467b48Spatrick           SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
157809467b48Spatrick 
157973471bf0Spatrick           SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
158073471bf0Spatrick                                                 DL, MVT::i32, N0, Sub0);
158173471bf0Spatrick           SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
158273471bf0Spatrick                                                 DL, MVT::i32, N0, Sub1);
158309467b48Spatrick 
1584097a140dSpatrick           SDValue AddOffsetHi =
1585097a140dSpatrick               getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
158609467b48Spatrick 
158709467b48Spatrick           SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
158809467b48Spatrick 
1589097a140dSpatrick           SDNode *Add =
159073471bf0Spatrick               CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
159109467b48Spatrick                                      {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
159209467b48Spatrick 
159309467b48Spatrick           SDNode *Addc = CurDAG->getMachineNode(
159409467b48Spatrick               AMDGPU::V_ADDC_U32_e64, DL, VTs,
159509467b48Spatrick               {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
159609467b48Spatrick 
159709467b48Spatrick           SDValue RegSequenceArgs[] = {
159809467b48Spatrick               CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
1599097a140dSpatrick               SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
160009467b48Spatrick 
160109467b48Spatrick           Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1602097a140dSpatrick                                                 MVT::i64, RegSequenceArgs),
1603097a140dSpatrick                          0);
1604097a140dSpatrick         }
160509467b48Spatrick       }
160609467b48Spatrick     }
160773471bf0Spatrick   }
160809467b48Spatrick 
160909467b48Spatrick   VAddr = Addr;
161009467b48Spatrick   Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
161109467b48Spatrick   return true;
161209467b48Spatrick }
161309467b48Spatrick 
SelectFlatOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const161473471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
161509467b48Spatrick                                           SDValue &VAddr,
161673471bf0Spatrick                                           SDValue &Offset) const {
161773471bf0Spatrick   return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT);
161809467b48Spatrick }
161909467b48Spatrick 
SelectGlobalOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const162073471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
162109467b48Spatrick                                             SDValue &VAddr,
162273471bf0Spatrick                                             SDValue &Offset) const {
162373471bf0Spatrick   return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal);
162473471bf0Spatrick }
162573471bf0Spatrick 
SelectScratchOffset(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & Offset) const162673471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
162773471bf0Spatrick                                              SDValue &VAddr,
162873471bf0Spatrick                                              SDValue &Offset) const {
162973471bf0Spatrick   return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
163073471bf0Spatrick                               SIInstrFlags::FlatScratch);
163173471bf0Spatrick }
163273471bf0Spatrick 
163373471bf0Spatrick // If this matches zero_extend i32:x, return x
matchZExtFromI32(SDValue Op)163473471bf0Spatrick static SDValue matchZExtFromI32(SDValue Op) {
163573471bf0Spatrick   if (Op.getOpcode() != ISD::ZERO_EXTEND)
163673471bf0Spatrick     return SDValue();
163773471bf0Spatrick 
163873471bf0Spatrick   SDValue ExtSrc = Op.getOperand(0);
163973471bf0Spatrick   return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue();
164073471bf0Spatrick }
164173471bf0Spatrick 
164273471bf0Spatrick // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset)
SelectGlobalSAddr(SDNode * N,SDValue Addr,SDValue & SAddr,SDValue & VOffset,SDValue & Offset) const164373471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N,
164473471bf0Spatrick                                            SDValue Addr,
164573471bf0Spatrick                                            SDValue &SAddr,
164673471bf0Spatrick                                            SDValue &VOffset,
164773471bf0Spatrick                                            SDValue &Offset) const {
164873471bf0Spatrick   int64_t ImmOffset = 0;
164973471bf0Spatrick 
165073471bf0Spatrick   // Match the immediate offset first, which canonically is moved as low as
165173471bf0Spatrick   // possible.
165273471bf0Spatrick 
165373471bf0Spatrick   SDValue LHS, RHS;
165473471bf0Spatrick   if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
165573471bf0Spatrick     int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
165673471bf0Spatrick     const SIInstrInfo *TII = Subtarget->getInstrInfo();
165773471bf0Spatrick 
165873471bf0Spatrick     if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS,
165973471bf0Spatrick                                SIInstrFlags::FlatGlobal)) {
166073471bf0Spatrick       Addr = LHS;
166173471bf0Spatrick       ImmOffset = COffsetVal;
166273471bf0Spatrick     } else if (!LHS->isDivergent()) {
166373471bf0Spatrick       if (COffsetVal > 0) {
166473471bf0Spatrick         SDLoc SL(N);
166573471bf0Spatrick         // saddr + large_offset -> saddr +
166673471bf0Spatrick         //                         (voffset = large_offset & ~MaxOffset) +
166773471bf0Spatrick         //                         (large_offset & MaxOffset);
166873471bf0Spatrick         int64_t SplitImmOffset, RemainderOffset;
166973471bf0Spatrick         std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
167073471bf0Spatrick             COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal);
167173471bf0Spatrick 
167273471bf0Spatrick         if (isUInt<32>(RemainderOffset)) {
167373471bf0Spatrick           SDNode *VMov = CurDAG->getMachineNode(
167473471bf0Spatrick               AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
167573471bf0Spatrick               CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
167673471bf0Spatrick           VOffset = SDValue(VMov, 0);
167773471bf0Spatrick           SAddr = LHS;
167873471bf0Spatrick           Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
167973471bf0Spatrick           return true;
168073471bf0Spatrick         }
168173471bf0Spatrick       }
168273471bf0Spatrick 
168373471bf0Spatrick       // We are adding a 64 bit SGPR and a constant. If constant bus limit
168473471bf0Spatrick       // is 1 we would need to perform 1 or 2 extra moves for each half of
168573471bf0Spatrick       // the constant and it is better to do a scalar add and then issue a
168673471bf0Spatrick       // single VALU instruction to materialize zero. Otherwise it is less
168773471bf0Spatrick       // instructions to perform VALU adds with immediates or inline literals.
168873471bf0Spatrick       unsigned NumLiterals =
168973471bf0Spatrick           !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) +
169073471bf0Spatrick           !TII->isInlineConstant(APInt(32, COffsetVal >> 32));
169173471bf0Spatrick       if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
169273471bf0Spatrick         return false;
169373471bf0Spatrick     }
169473471bf0Spatrick   }
169573471bf0Spatrick 
169673471bf0Spatrick   // Match the variable offset.
169773471bf0Spatrick   if (Addr.getOpcode() == ISD::ADD) {
169873471bf0Spatrick     LHS = Addr.getOperand(0);
169973471bf0Spatrick     RHS = Addr.getOperand(1);
170073471bf0Spatrick 
170173471bf0Spatrick     if (!LHS->isDivergent()) {
170273471bf0Spatrick       // add (i64 sgpr), (zero_extend (i32 vgpr))
170373471bf0Spatrick       if (SDValue ZextRHS = matchZExtFromI32(RHS)) {
170473471bf0Spatrick         SAddr = LHS;
170573471bf0Spatrick         VOffset = ZextRHS;
170673471bf0Spatrick       }
170773471bf0Spatrick     }
170873471bf0Spatrick 
170973471bf0Spatrick     if (!SAddr && !RHS->isDivergent()) {
171073471bf0Spatrick       // add (zero_extend (i32 vgpr)), (i64 sgpr)
171173471bf0Spatrick       if (SDValue ZextLHS = matchZExtFromI32(LHS)) {
171273471bf0Spatrick         SAddr = RHS;
171373471bf0Spatrick         VOffset = ZextLHS;
171473471bf0Spatrick       }
171573471bf0Spatrick     }
171673471bf0Spatrick 
171773471bf0Spatrick     if (SAddr) {
171873471bf0Spatrick       Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
171973471bf0Spatrick       return true;
172073471bf0Spatrick     }
172173471bf0Spatrick   }
172273471bf0Spatrick 
172373471bf0Spatrick   if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF ||
172473471bf0Spatrick       isa<ConstantSDNode>(Addr))
172573471bf0Spatrick     return false;
172673471bf0Spatrick 
172773471bf0Spatrick   // It's cheaper to materialize a single 32-bit zero for vaddr than the two
172873471bf0Spatrick   // moves required to copy a 64-bit SGPR to VGPR.
172973471bf0Spatrick   SAddr = Addr;
173073471bf0Spatrick   SDNode *VMov =
173173471bf0Spatrick       CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
173273471bf0Spatrick                              CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
173373471bf0Spatrick   VOffset = SDValue(VMov, 0);
173473471bf0Spatrick   Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
173573471bf0Spatrick   return true;
173673471bf0Spatrick }
173773471bf0Spatrick 
SelectSAddrFI(SelectionDAG * CurDAG,SDValue SAddr)173873471bf0Spatrick static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) {
173973471bf0Spatrick   if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) {
174073471bf0Spatrick     SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
174173471bf0Spatrick   } else if (SAddr.getOpcode() == ISD::ADD &&
174273471bf0Spatrick              isa<FrameIndexSDNode>(SAddr.getOperand(0))) {
174373471bf0Spatrick     // Materialize this into a scalar move for scalar address to avoid
174473471bf0Spatrick     // readfirstlane.
174573471bf0Spatrick     auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0));
174673471bf0Spatrick     SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
174773471bf0Spatrick                                               FI->getValueType(0));
174873471bf0Spatrick     SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr),
174973471bf0Spatrick                                            MVT::i32, TFI, SAddr.getOperand(1)),
175073471bf0Spatrick                     0);
175173471bf0Spatrick   }
175273471bf0Spatrick 
175373471bf0Spatrick   return SAddr;
175473471bf0Spatrick }
175573471bf0Spatrick 
175673471bf0Spatrick // Match (32-bit SGPR base) + sext(imm offset)
SelectScratchSAddr(SDNode * Parent,SDValue Addr,SDValue & SAddr,SDValue & Offset) const175773471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
175873471bf0Spatrick                                             SDValue &SAddr,
175973471bf0Spatrick                                             SDValue &Offset) const {
176073471bf0Spatrick   if (Addr->isDivergent())
176173471bf0Spatrick     return false;
176273471bf0Spatrick 
176373471bf0Spatrick   SDLoc DL(Addr);
176473471bf0Spatrick 
176573471bf0Spatrick   int64_t COffsetVal = 0;
176673471bf0Spatrick 
176773471bf0Spatrick   if (CurDAG->isBaseWithConstantOffset(Addr)) {
176873471bf0Spatrick     COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
176973471bf0Spatrick     SAddr = Addr.getOperand(0);
177073471bf0Spatrick   } else {
177173471bf0Spatrick     SAddr = Addr;
177273471bf0Spatrick   }
177373471bf0Spatrick 
177473471bf0Spatrick   SAddr = SelectSAddrFI(CurDAG, SAddr);
177573471bf0Spatrick 
177673471bf0Spatrick   const SIInstrInfo *TII = Subtarget->getInstrInfo();
177773471bf0Spatrick 
177873471bf0Spatrick   if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS,
177973471bf0Spatrick                               SIInstrFlags::FlatScratch)) {
178073471bf0Spatrick     int64_t SplitImmOffset, RemainderOffset;
178173471bf0Spatrick     std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
178273471bf0Spatrick         COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch);
178373471bf0Spatrick 
178473471bf0Spatrick     COffsetVal = SplitImmOffset;
178573471bf0Spatrick 
178673471bf0Spatrick     SDValue AddOffset =
178773471bf0Spatrick         SAddr.getOpcode() == ISD::TargetFrameIndex
178873471bf0Spatrick             ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
178973471bf0Spatrick             : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32);
179073471bf0Spatrick     SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
179173471bf0Spatrick                                            SAddr, AddOffset),
179273471bf0Spatrick                     0);
179373471bf0Spatrick   }
179473471bf0Spatrick 
179573471bf0Spatrick   Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
179673471bf0Spatrick 
179773471bf0Spatrick   return true;
179809467b48Spatrick }
179909467b48Spatrick 
1800*d415bd75Srobert // Check whether the flat scratch SVS swizzle bug affects this access.
checkFlatScratchSVSSwizzleBug(SDValue VAddr,SDValue SAddr,uint64_t ImmOffset) const1801*d415bd75Srobert bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
1802*d415bd75Srobert     SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {
1803*d415bd75Srobert   if (!Subtarget->hasFlatScratchSVSSwizzleBug())
1804*d415bd75Srobert     return false;
1805*d415bd75Srobert 
1806*d415bd75Srobert   // The bug affects the swizzling of SVS accesses if there is any carry out
1807*d415bd75Srobert   // from the two low order bits (i.e. from bit 1 into bit 2) when adding
1808*d415bd75Srobert   // voffset to (soffset + inst_offset).
1809*d415bd75Srobert   KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
1810*d415bd75Srobert   KnownBits SKnown = KnownBits::computeForAddSub(
1811*d415bd75Srobert       true, false, CurDAG->computeKnownBits(SAddr),
1812*d415bd75Srobert       KnownBits::makeConstant(APInt(32, ImmOffset)));
1813*d415bd75Srobert   uint64_t VMax = VKnown.getMaxValue().getZExtValue();
1814*d415bd75Srobert   uint64_t SMax = SKnown.getMaxValue().getZExtValue();
1815*d415bd75Srobert   return (VMax & 3) + (SMax & 3) >= 4;
1816*d415bd75Srobert }
1817*d415bd75Srobert 
SelectScratchSVAddr(SDNode * N,SDValue Addr,SDValue & VAddr,SDValue & SAddr,SDValue & Offset) const1818*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
1819*d415bd75Srobert                                              SDValue &VAddr, SDValue &SAddr,
1820*d415bd75Srobert                                              SDValue &Offset) const  {
1821*d415bd75Srobert   int64_t ImmOffset = 0;
1822*d415bd75Srobert 
1823*d415bd75Srobert   SDValue LHS, RHS;
1824*d415bd75Srobert   if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1825*d415bd75Srobert     int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
1826*d415bd75Srobert     const SIInstrInfo *TII = Subtarget->getInstrInfo();
1827*d415bd75Srobert 
1828*d415bd75Srobert     if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) {
1829*d415bd75Srobert       Addr = LHS;
1830*d415bd75Srobert       ImmOffset = COffsetVal;
1831*d415bd75Srobert     } else if (!LHS->isDivergent() && COffsetVal > 0) {
1832*d415bd75Srobert       SDLoc SL(N);
1833*d415bd75Srobert       // saddr + large_offset -> saddr + (vaddr = large_offset & ~MaxOffset) +
1834*d415bd75Srobert       //                         (large_offset & MaxOffset);
1835*d415bd75Srobert       int64_t SplitImmOffset, RemainderOffset;
1836*d415bd75Srobert       std::tie(SplitImmOffset, RemainderOffset)
1837*d415bd75Srobert         = TII->splitFlatOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true);
1838*d415bd75Srobert 
1839*d415bd75Srobert       if (isUInt<32>(RemainderOffset)) {
1840*d415bd75Srobert         SDNode *VMov = CurDAG->getMachineNode(
1841*d415bd75Srobert           AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
1842*d415bd75Srobert           CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
1843*d415bd75Srobert         VAddr = SDValue(VMov, 0);
1844*d415bd75Srobert         SAddr = LHS;
1845*d415bd75Srobert         if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
1846*d415bd75Srobert           return false;
1847*d415bd75Srobert         Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
1848*d415bd75Srobert         return true;
1849*d415bd75Srobert       }
1850*d415bd75Srobert     }
1851*d415bd75Srobert   }
1852*d415bd75Srobert 
1853*d415bd75Srobert   if (Addr.getOpcode() != ISD::ADD)
1854*d415bd75Srobert     return false;
1855*d415bd75Srobert 
1856*d415bd75Srobert   LHS = Addr.getOperand(0);
1857*d415bd75Srobert   RHS = Addr.getOperand(1);
1858*d415bd75Srobert 
1859*d415bd75Srobert   if (!LHS->isDivergent() && RHS->isDivergent()) {
1860*d415bd75Srobert     SAddr = LHS;
1861*d415bd75Srobert     VAddr = RHS;
1862*d415bd75Srobert   } else if (!RHS->isDivergent() && LHS->isDivergent()) {
1863*d415bd75Srobert     SAddr = RHS;
1864*d415bd75Srobert     VAddr = LHS;
1865*d415bd75Srobert   } else {
1866*d415bd75Srobert     return false;
1867*d415bd75Srobert   }
1868*d415bd75Srobert 
1869*d415bd75Srobert   if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
1870*d415bd75Srobert     return false;
1871*d415bd75Srobert   SAddr = SelectSAddrFI(CurDAG, SAddr);
1872*d415bd75Srobert   Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
1873*d415bd75Srobert   return true;
1874*d415bd75Srobert }
1875*d415bd75Srobert 
1876*d415bd75Srobert // Match an immediate (if Offset is not null) or an SGPR (if SOffset is
1877*d415bd75Srobert // not null) offset. If Imm32Only is true, match only 32-bit immediate
1878*d415bd75Srobert // offsets available on CI.
SelectSMRDOffset(SDValue ByteOffsetNode,SDValue * SOffset,SDValue * Offset,bool Imm32Only,bool IsBuffer) const187909467b48Spatrick bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
1880*d415bd75Srobert                                           SDValue *SOffset, SDValue *Offset,
1881*d415bd75Srobert                                           bool Imm32Only, bool IsBuffer) const {
1882*d415bd75Srobert   assert((!SOffset || !Offset) &&
1883*d415bd75Srobert          "Cannot match both soffset and offset at the same time!");
1884*d415bd75Srobert 
188509467b48Spatrick   ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1886097a140dSpatrick   if (!C) {
1887*d415bd75Srobert     if (!SOffset)
1888*d415bd75Srobert       return false;
1889097a140dSpatrick     if (ByteOffsetNode.getValueType().isScalarInteger() &&
1890097a140dSpatrick         ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1891*d415bd75Srobert       *SOffset = ByteOffsetNode;
1892097a140dSpatrick       return true;
1893097a140dSpatrick     }
1894097a140dSpatrick     if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1895097a140dSpatrick       if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1896*d415bd75Srobert         *SOffset = ByteOffsetNode.getOperand(0);
1897097a140dSpatrick         return true;
1898097a140dSpatrick       }
1899097a140dSpatrick     }
190009467b48Spatrick     return false;
1901097a140dSpatrick   }
190209467b48Spatrick 
190309467b48Spatrick   SDLoc SL(ByteOffsetNode);
1904*d415bd75Srobert 
1905*d415bd75Srobert   // GFX9 and GFX10 have signed byte immediate offsets. The immediate
1906*d415bd75Srobert   // offset for S_BUFFER instructions is unsigned.
1907*d415bd75Srobert   int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();
1908*d415bd75Srobert   std::optional<int64_t> EncodedOffset =
1909*d415bd75Srobert       AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, IsBuffer);
1910*d415bd75Srobert   if (EncodedOffset && Offset && !Imm32Only) {
1911*d415bd75Srobert     *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
191209467b48Spatrick     return true;
191309467b48Spatrick   }
191409467b48Spatrick 
1915097a140dSpatrick   // SGPR and literal offsets are unsigned.
1916097a140dSpatrick   if (ByteOffset < 0)
191709467b48Spatrick     return false;
191809467b48Spatrick 
1919097a140dSpatrick   EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset);
1920*d415bd75Srobert   if (EncodedOffset && Offset && Imm32Only) {
1921*d415bd75Srobert     *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
1922097a140dSpatrick     return true;
192309467b48Spatrick   }
1924097a140dSpatrick 
1925097a140dSpatrick   if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset))
1926097a140dSpatrick     return false;
1927097a140dSpatrick 
1928*d415bd75Srobert   if (SOffset) {
1929097a140dSpatrick     SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
1930*d415bd75Srobert     *SOffset = SDValue(
1931097a140dSpatrick         CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
193209467b48Spatrick     return true;
193309467b48Spatrick   }
193409467b48Spatrick 
1935*d415bd75Srobert   return false;
1936*d415bd75Srobert }
1937*d415bd75Srobert 
Expand32BitAddress(SDValue Addr) const193809467b48Spatrick SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
193909467b48Spatrick   if (Addr.getValueType() != MVT::i32)
194009467b48Spatrick     return Addr;
194109467b48Spatrick 
194209467b48Spatrick   // Zero-extend a 32-bit address.
194309467b48Spatrick   SDLoc SL(Addr);
194409467b48Spatrick 
194509467b48Spatrick   const MachineFunction &MF = CurDAG->getMachineFunction();
194609467b48Spatrick   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
194709467b48Spatrick   unsigned AddrHiVal = Info->get32BitAddressHighBits();
194809467b48Spatrick   SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
194909467b48Spatrick 
195009467b48Spatrick   const SDValue Ops[] = {
195109467b48Spatrick     CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
195209467b48Spatrick     Addr,
195309467b48Spatrick     CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
195409467b48Spatrick     SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
195509467b48Spatrick             0),
195609467b48Spatrick     CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
195709467b48Spatrick   };
195809467b48Spatrick 
195909467b48Spatrick   return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
196009467b48Spatrick                                         Ops), 0);
196109467b48Spatrick }
196209467b48Spatrick 
1963*d415bd75Srobert // Match a base and an immediate (if Offset is not null) or an SGPR (if
1964*d415bd75Srobert // SOffset is not null) or an immediate+SGPR offset. If Imm32Only is
1965*d415bd75Srobert // true, match only 32-bit immediate offsets available on CI.
SelectSMRDBaseOffset(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only,bool IsBuffer) const1966*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase,
1967*d415bd75Srobert                                               SDValue *SOffset, SDValue *Offset,
1968*d415bd75Srobert                                               bool Imm32Only,
1969*d415bd75Srobert                                               bool IsBuffer) const {
1970*d415bd75Srobert   if (SOffset && Offset) {
1971*d415bd75Srobert     assert(!Imm32Only && !IsBuffer);
1972*d415bd75Srobert     SDValue B;
1973*d415bd75Srobert     return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) &&
1974*d415bd75Srobert            SelectSMRDBaseOffset(B, SBase, SOffset, nullptr);
1975*d415bd75Srobert   }
197609467b48Spatrick 
197709467b48Spatrick   // A 32-bit (address + offset) should not cause unsigned 32-bit integer
197809467b48Spatrick   // wraparound, because s_load instructions perform the addition in 64 bits.
1979*d415bd75Srobert   if (Addr.getValueType() == MVT::i32 && Addr.getOpcode() == ISD::ADD &&
1980*d415bd75Srobert       !Addr->getFlags().hasNoUnsignedWrap())
1981*d415bd75Srobert     return false;
1982*d415bd75Srobert 
1983097a140dSpatrick   SDValue N0, N1;
1984097a140dSpatrick   // Extract the base and offset if possible.
1985*d415bd75Srobert   if (CurDAG->isBaseWithConstantOffset(Addr) || Addr.getOpcode() == ISD::ADD) {
1986097a140dSpatrick     N0 = Addr.getOperand(0);
1987097a140dSpatrick     N1 = Addr.getOperand(1);
1988097a140dSpatrick   } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
1989097a140dSpatrick     assert(N0 && N1 && isa<ConstantSDNode>(N1));
1990097a140dSpatrick   }
1991*d415bd75Srobert   if (!N0 || !N1)
1992*d415bd75Srobert     return false;
1993*d415bd75Srobert   if (SelectSMRDOffset(N1, SOffset, Offset, Imm32Only, IsBuffer)) {
1994*d415bd75Srobert     SBase = N0;
199509467b48Spatrick     return true;
199609467b48Spatrick   }
1997*d415bd75Srobert   if (SelectSMRDOffset(N0, SOffset, Offset, Imm32Only, IsBuffer)) {
1998*d415bd75Srobert     SBase = N1;
1999*d415bd75Srobert     return true;
200009467b48Spatrick   }
2001*d415bd75Srobert   return false;
2002097a140dSpatrick }
2003*d415bd75Srobert 
SelectSMRD(SDValue Addr,SDValue & SBase,SDValue * SOffset,SDValue * Offset,bool Imm32Only) const2004*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
2005*d415bd75Srobert                                     SDValue *SOffset, SDValue *Offset,
2006*d415bd75Srobert                                     bool Imm32Only) const {
2007*d415bd75Srobert   if (SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) {
2008*d415bd75Srobert     SBase = Expand32BitAddress(SBase);
2009*d415bd75Srobert     return true;
2010*d415bd75Srobert   }
2011*d415bd75Srobert 
2012*d415bd75Srobert   if (Addr.getValueType() == MVT::i32 && Offset && !SOffset) {
201309467b48Spatrick     SBase = Expand32BitAddress(Addr);
2014*d415bd75Srobert     *Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
201509467b48Spatrick     return true;
201609467b48Spatrick   }
201709467b48Spatrick 
2018*d415bd75Srobert   return false;
2019*d415bd75Srobert }
2020*d415bd75Srobert 
SelectSMRDImm(SDValue Addr,SDValue & SBase,SDValue & Offset) const202109467b48Spatrick bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
202209467b48Spatrick                                        SDValue &Offset) const {
2023*d415bd75Srobert   return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset);
202409467b48Spatrick }
202509467b48Spatrick 
SelectSMRDImm32(SDValue Addr,SDValue & SBase,SDValue & Offset) const202609467b48Spatrick bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
202709467b48Spatrick                                          SDValue &Offset) const {
2028097a140dSpatrick   assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2029*d415bd75Srobert   return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset,
2030*d415bd75Srobert                     /* Imm32Only */ true);
203109467b48Spatrick }
203209467b48Spatrick 
SelectSMRDSgpr(SDValue Addr,SDValue & SBase,SDValue & SOffset) const203309467b48Spatrick bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
2034*d415bd75Srobert                                         SDValue &SOffset) const {
2035*d415bd75Srobert   return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr);
2036*d415bd75Srobert }
2037*d415bd75Srobert 
SelectSMRDSgprImm(SDValue Addr,SDValue & SBase,SDValue & SOffset,SDValue & Offset) const2038*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase,
2039*d415bd75Srobert                                            SDValue &SOffset,
204009467b48Spatrick                                            SDValue &Offset) const {
2041*d415bd75Srobert   return SelectSMRD(Addr, SBase, &SOffset, &Offset);
204209467b48Spatrick }
204309467b48Spatrick 
SelectSMRDBufferImm(SDValue N,SDValue & Offset) const2044*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue N, SDValue &Offset) const {
2045*d415bd75Srobert   return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2046*d415bd75Srobert                           /* Imm32Only */ false, /* IsBuffer */ true);
2047097a140dSpatrick }
2048097a140dSpatrick 
SelectSMRDBufferImm32(SDValue N,SDValue & Offset) const2049*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
205009467b48Spatrick                                                SDValue &Offset) const {
2051097a140dSpatrick   assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS);
2052*d415bd75Srobert   return SelectSMRDOffset(N, /* SOffset */ nullptr, &Offset,
2053*d415bd75Srobert                           /* Imm32Only */ true, /* IsBuffer */ true);
2054097a140dSpatrick }
205509467b48Spatrick 
SelectSMRDBufferSgprImm(SDValue N,SDValue & SOffset,SDValue & Offset) const2056*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
2057*d415bd75Srobert                                                  SDValue &Offset) const {
2058*d415bd75Srobert   // Match the (soffset + offset) pair as a 32-bit register base and
2059*d415bd75Srobert   // an immediate offset.
2060*d415bd75Srobert   return N.getValueType() == MVT::i32 &&
2061*d415bd75Srobert          SelectSMRDBaseOffset(N, /* SBase */ SOffset, /* SOffset*/ nullptr,
2062*d415bd75Srobert                               &Offset, /* Imm32Only */ false,
2063*d415bd75Srobert                               /* IsBuffer */ true);
206409467b48Spatrick }
206509467b48Spatrick 
SelectMOVRELOffset(SDValue Index,SDValue & Base,SDValue & Offset) const206609467b48Spatrick bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
206709467b48Spatrick                                             SDValue &Base,
206809467b48Spatrick                                             SDValue &Offset) const {
206909467b48Spatrick   SDLoc DL(Index);
207009467b48Spatrick 
207109467b48Spatrick   if (CurDAG->isBaseWithConstantOffset(Index)) {
207209467b48Spatrick     SDValue N0 = Index.getOperand(0);
207309467b48Spatrick     SDValue N1 = Index.getOperand(1);
207409467b48Spatrick     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
207509467b48Spatrick 
207609467b48Spatrick     // (add n0, c0)
207709467b48Spatrick     // Don't peel off the offset (c0) if doing so could possibly lead
207809467b48Spatrick     // the base (n0) to be negative.
2079097a140dSpatrick     // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset.
2080097a140dSpatrick     if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) ||
2081097a140dSpatrick         (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) {
208209467b48Spatrick       Base = N0;
208309467b48Spatrick       Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
208409467b48Spatrick       return true;
208509467b48Spatrick     }
208609467b48Spatrick   }
208709467b48Spatrick 
208809467b48Spatrick   if (isa<ConstantSDNode>(Index))
208909467b48Spatrick     return false;
209009467b48Spatrick 
209109467b48Spatrick   Base = Index;
209209467b48Spatrick   Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
209309467b48Spatrick   return true;
209409467b48Spatrick }
209509467b48Spatrick 
getBFE32(bool IsSigned,const SDLoc & DL,SDValue Val,uint32_t Offset,uint32_t Width)2096*d415bd75Srobert SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
209709467b48Spatrick                                      SDValue Val, uint32_t Offset,
209809467b48Spatrick                                      uint32_t Width) {
2099*d415bd75Srobert   if (Val->isDivergent()) {
2100*d415bd75Srobert     unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2101*d415bd75Srobert     SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32);
2102*d415bd75Srobert     SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32);
2103*d415bd75Srobert 
2104*d415bd75Srobert     return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
2105*d415bd75Srobert   }
2106*d415bd75Srobert   unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
210709467b48Spatrick   // Transformation function, pack the offset and width of a BFE into
210809467b48Spatrick   // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
210909467b48Spatrick   // source, bits [5:0] contain the offset and bits [22:16] the width.
211009467b48Spatrick   uint32_t PackedVal = Offset | (Width << 16);
211109467b48Spatrick   SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
211209467b48Spatrick 
211309467b48Spatrick   return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
211409467b48Spatrick }
211509467b48Spatrick 
SelectS_BFEFromShifts(SDNode * N)211609467b48Spatrick void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
211709467b48Spatrick   // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
211809467b48Spatrick   // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
211909467b48Spatrick   // Predicate: 0 < b <= c < 32
212009467b48Spatrick 
212109467b48Spatrick   const SDValue &Shl = N->getOperand(0);
212209467b48Spatrick   ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
212309467b48Spatrick   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
212409467b48Spatrick 
212509467b48Spatrick   if (B && C) {
212609467b48Spatrick     uint32_t BVal = B->getZExtValue();
212709467b48Spatrick     uint32_t CVal = C->getZExtValue();
212809467b48Spatrick 
212909467b48Spatrick     if (0 < BVal && BVal <= CVal && CVal < 32) {
213009467b48Spatrick       bool Signed = N->getOpcode() == ISD::SRA;
2131*d415bd75Srobert       ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
213209467b48Spatrick                   32 - CVal));
213309467b48Spatrick       return;
213409467b48Spatrick     }
213509467b48Spatrick   }
213609467b48Spatrick   SelectCode(N);
213709467b48Spatrick }
213809467b48Spatrick 
SelectS_BFE(SDNode * N)213909467b48Spatrick void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
214009467b48Spatrick   switch (N->getOpcode()) {
214109467b48Spatrick   case ISD::AND:
214209467b48Spatrick     if (N->getOperand(0).getOpcode() == ISD::SRL) {
214309467b48Spatrick       // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
214409467b48Spatrick       // Predicate: isMask(mask)
214509467b48Spatrick       const SDValue &Srl = N->getOperand(0);
214609467b48Spatrick       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
214709467b48Spatrick       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
214809467b48Spatrick 
214909467b48Spatrick       if (Shift && Mask) {
215009467b48Spatrick         uint32_t ShiftVal = Shift->getZExtValue();
215109467b48Spatrick         uint32_t MaskVal = Mask->getZExtValue();
215209467b48Spatrick 
215309467b48Spatrick         if (isMask_32(MaskVal)) {
2154*d415bd75Srobert           uint32_t WidthVal = llvm::popcount(MaskVal);
2155*d415bd75Srobert           ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
2156*d415bd75Srobert                                   WidthVal));
215709467b48Spatrick           return;
215809467b48Spatrick         }
215909467b48Spatrick       }
216009467b48Spatrick     }
216109467b48Spatrick     break;
216209467b48Spatrick   case ISD::SRL:
216309467b48Spatrick     if (N->getOperand(0).getOpcode() == ISD::AND) {
216409467b48Spatrick       // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
216509467b48Spatrick       // Predicate: isMask(mask >> b)
216609467b48Spatrick       const SDValue &And = N->getOperand(0);
216709467b48Spatrick       ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
216809467b48Spatrick       ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
216909467b48Spatrick 
217009467b48Spatrick       if (Shift && Mask) {
217109467b48Spatrick         uint32_t ShiftVal = Shift->getZExtValue();
217209467b48Spatrick         uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
217309467b48Spatrick 
217409467b48Spatrick         if (isMask_32(MaskVal)) {
2175*d415bd75Srobert           uint32_t WidthVal = llvm::popcount(MaskVal);
2176*d415bd75Srobert           ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
2177*d415bd75Srobert                       WidthVal));
217809467b48Spatrick           return;
217909467b48Spatrick         }
218009467b48Spatrick       }
218109467b48Spatrick     } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
218209467b48Spatrick       SelectS_BFEFromShifts(N);
218309467b48Spatrick       return;
218409467b48Spatrick     }
218509467b48Spatrick     break;
218609467b48Spatrick   case ISD::SRA:
218709467b48Spatrick     if (N->getOperand(0).getOpcode() == ISD::SHL) {
218809467b48Spatrick       SelectS_BFEFromShifts(N);
218909467b48Spatrick       return;
219009467b48Spatrick     }
219109467b48Spatrick     break;
219209467b48Spatrick 
219309467b48Spatrick   case ISD::SIGN_EXTEND_INREG: {
219409467b48Spatrick     // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
219509467b48Spatrick     SDValue Src = N->getOperand(0);
219609467b48Spatrick     if (Src.getOpcode() != ISD::SRL)
219709467b48Spatrick       break;
219809467b48Spatrick 
219909467b48Spatrick     const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
220009467b48Spatrick     if (!Amt)
220109467b48Spatrick       break;
220209467b48Spatrick 
220309467b48Spatrick     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2204*d415bd75Srobert     ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
220509467b48Spatrick                             Amt->getZExtValue(), Width));
220609467b48Spatrick     return;
220709467b48Spatrick   }
220809467b48Spatrick   }
220909467b48Spatrick 
221009467b48Spatrick   SelectCode(N);
221109467b48Spatrick }
221209467b48Spatrick 
isCBranchSCC(const SDNode * N) const221309467b48Spatrick bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
221409467b48Spatrick   assert(N->getOpcode() == ISD::BRCOND);
221509467b48Spatrick   if (!N->hasOneUse())
221609467b48Spatrick     return false;
221709467b48Spatrick 
221809467b48Spatrick   SDValue Cond = N->getOperand(1);
221909467b48Spatrick   if (Cond.getOpcode() == ISD::CopyToReg)
222009467b48Spatrick     Cond = Cond.getOperand(2);
222109467b48Spatrick 
222209467b48Spatrick   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
222309467b48Spatrick     return false;
222409467b48Spatrick 
222509467b48Spatrick   MVT VT = Cond.getOperand(0).getSimpleValueType();
222609467b48Spatrick   if (VT == MVT::i32)
222709467b48Spatrick     return true;
222809467b48Spatrick 
222909467b48Spatrick   if (VT == MVT::i64) {
223009467b48Spatrick     auto ST = static_cast<const GCNSubtarget *>(Subtarget);
223109467b48Spatrick 
223209467b48Spatrick     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
223309467b48Spatrick     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
223409467b48Spatrick   }
223509467b48Spatrick 
223609467b48Spatrick   return false;
223709467b48Spatrick }
223809467b48Spatrick 
SelectBRCOND(SDNode * N)223909467b48Spatrick void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
224009467b48Spatrick   SDValue Cond = N->getOperand(1);
224109467b48Spatrick 
224209467b48Spatrick   if (Cond.isUndef()) {
224309467b48Spatrick     CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
224409467b48Spatrick                          N->getOperand(2), N->getOperand(0));
224509467b48Spatrick     return;
224609467b48Spatrick   }
224709467b48Spatrick 
224809467b48Spatrick   const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget);
224909467b48Spatrick   const SIRegisterInfo *TRI = ST->getRegisterInfo();
225009467b48Spatrick 
225109467b48Spatrick   bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
225209467b48Spatrick   unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2253097a140dSpatrick   Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
225409467b48Spatrick   SDLoc SL(N);
225509467b48Spatrick 
225609467b48Spatrick   if (!UseSCCBr) {
225709467b48Spatrick     // This is the case that we are selecting to S_CBRANCH_VCCNZ.  We have not
225809467b48Spatrick     // analyzed what generates the vcc value, so we do not know whether vcc
225909467b48Spatrick     // bits for disabled lanes are 0.  Thus we need to mask out bits for
226009467b48Spatrick     // disabled lanes.
226109467b48Spatrick     //
226209467b48Spatrick     // For the case that we select S_CBRANCH_SCC1 and it gets
226309467b48Spatrick     // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
226409467b48Spatrick     // SIInstrInfo::moveToVALU which inserts the S_AND).
226509467b48Spatrick     //
226609467b48Spatrick     // We could add an analysis of what generates the vcc value here and omit
226709467b48Spatrick     // the S_AND when is unnecessary. But it would be better to add a separate
226809467b48Spatrick     // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
226909467b48Spatrick     // catches both cases.
227009467b48Spatrick     Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32
227109467b48Spatrick                                                          : AMDGPU::S_AND_B64,
227209467b48Spatrick                      SL, MVT::i1,
227309467b48Spatrick                      CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO
227409467b48Spatrick                                                         : AMDGPU::EXEC,
227509467b48Spatrick                                          MVT::i1),
227609467b48Spatrick                     Cond),
227709467b48Spatrick                    0);
227809467b48Spatrick   }
227909467b48Spatrick 
228009467b48Spatrick   SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
228109467b48Spatrick   CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
228209467b48Spatrick                        N->getOperand(2), // Basic Block
228309467b48Spatrick                        VCC.getValue(0));
228409467b48Spatrick }
228509467b48Spatrick 
SelectFMAD_FMA(SDNode * N)228609467b48Spatrick void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
228709467b48Spatrick   MVT VT = N->getSimpleValueType(0);
228809467b48Spatrick   bool IsFMA = N->getOpcode() == ISD::FMA;
228909467b48Spatrick   if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
229009467b48Spatrick                          !Subtarget->hasFmaMixInsts()) ||
229109467b48Spatrick       ((IsFMA && Subtarget->hasMadMixInsts()) ||
229209467b48Spatrick        (!IsFMA && Subtarget->hasFmaMixInsts()))) {
229309467b48Spatrick     SelectCode(N);
229409467b48Spatrick     return;
229509467b48Spatrick   }
229609467b48Spatrick 
229709467b48Spatrick   SDValue Src0 = N->getOperand(0);
229809467b48Spatrick   SDValue Src1 = N->getOperand(1);
229909467b48Spatrick   SDValue Src2 = N->getOperand(2);
230009467b48Spatrick   unsigned Src0Mods, Src1Mods, Src2Mods;
230109467b48Spatrick 
230209467b48Spatrick   // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
230309467b48Spatrick   // using the conversion from f16.
230409467b48Spatrick   bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
230509467b48Spatrick   bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
230609467b48Spatrick   bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
230709467b48Spatrick 
2308097a140dSpatrick   assert((IsFMA || !Mode.allFP32Denormals()) &&
230909467b48Spatrick          "fmad selected with denormals enabled");
231009467b48Spatrick   // TODO: We can select this with f32 denormals enabled if all the sources are
231109467b48Spatrick   // converted from f16 (in which case fmad isn't legal).
231209467b48Spatrick 
231309467b48Spatrick   if (Sel0 || Sel1 || Sel2) {
231409467b48Spatrick     // For dummy operands.
231509467b48Spatrick     SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
231609467b48Spatrick     SDValue Ops[] = {
231709467b48Spatrick       CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
231809467b48Spatrick       CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
231909467b48Spatrick       CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
232009467b48Spatrick       CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
232109467b48Spatrick       Zero, Zero
232209467b48Spatrick     };
232309467b48Spatrick 
232409467b48Spatrick     CurDAG->SelectNodeTo(N,
232509467b48Spatrick                          IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
232609467b48Spatrick                          MVT::f32, Ops);
232709467b48Spatrick   } else {
232809467b48Spatrick     SelectCode(N);
232909467b48Spatrick   }
233009467b48Spatrick }
233109467b48Spatrick 
SelectDSAppendConsume(SDNode * N,unsigned IntrID)233209467b48Spatrick void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
233309467b48Spatrick   // The address is assumed to be uniform, so if it ends up in a VGPR, it will
233409467b48Spatrick   // be copied to an SGPR with readfirstlane.
233509467b48Spatrick   unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
233609467b48Spatrick     AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
233709467b48Spatrick 
233809467b48Spatrick   SDValue Chain = N->getOperand(0);
233909467b48Spatrick   SDValue Ptr = N->getOperand(2);
234009467b48Spatrick   MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
234109467b48Spatrick   MachineMemOperand *MMO = M->getMemOperand();
234209467b48Spatrick   bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
234309467b48Spatrick 
234409467b48Spatrick   SDValue Offset;
234509467b48Spatrick   if (CurDAG->isBaseWithConstantOffset(Ptr)) {
234609467b48Spatrick     SDValue PtrBase = Ptr.getOperand(0);
234709467b48Spatrick     SDValue PtrOffset = Ptr.getOperand(1);
234809467b48Spatrick 
234909467b48Spatrick     const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
235073471bf0Spatrick     if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
235109467b48Spatrick       N = glueCopyToM0(N, PtrBase);
235209467b48Spatrick       Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
235309467b48Spatrick     }
235409467b48Spatrick   }
235509467b48Spatrick 
235609467b48Spatrick   if (!Offset) {
235709467b48Spatrick     N = glueCopyToM0(N, Ptr);
235809467b48Spatrick     Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
235909467b48Spatrick   }
236009467b48Spatrick 
236109467b48Spatrick   SDValue Ops[] = {
236209467b48Spatrick     Offset,
236309467b48Spatrick     CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
236409467b48Spatrick     Chain,
236509467b48Spatrick     N->getOperand(N->getNumOperands() - 1) // New glue
236609467b48Spatrick   };
236709467b48Spatrick 
236809467b48Spatrick   SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
236909467b48Spatrick   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
237009467b48Spatrick }
237109467b48Spatrick 
2372*d415bd75Srobert // We need to handle this here because tablegen doesn't support matching
2373*d415bd75Srobert // instructions with multiple outputs.
SelectDSBvhStackIntrinsic(SDNode * N)2374*d415bd75Srobert void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
2375*d415bd75Srobert   unsigned Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2376*d415bd75Srobert   SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4),
2377*d415bd75Srobert                    N->getOperand(5), N->getOperand(0)};
2378*d415bd75Srobert 
2379*d415bd75Srobert   MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
2380*d415bd75Srobert   MachineMemOperand *MMO = M->getMemOperand();
2381*d415bd75Srobert   SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2382*d415bd75Srobert   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
2383*d415bd75Srobert }
2384*d415bd75Srobert 
gwsIntrinToOpcode(unsigned IntrID)238509467b48Spatrick static unsigned gwsIntrinToOpcode(unsigned IntrID) {
238609467b48Spatrick   switch (IntrID) {
238709467b48Spatrick   case Intrinsic::amdgcn_ds_gws_init:
238809467b48Spatrick     return AMDGPU::DS_GWS_INIT;
238909467b48Spatrick   case Intrinsic::amdgcn_ds_gws_barrier:
239009467b48Spatrick     return AMDGPU::DS_GWS_BARRIER;
239109467b48Spatrick   case Intrinsic::amdgcn_ds_gws_sema_v:
239209467b48Spatrick     return AMDGPU::DS_GWS_SEMA_V;
239309467b48Spatrick   case Intrinsic::amdgcn_ds_gws_sema_br:
239409467b48Spatrick     return AMDGPU::DS_GWS_SEMA_BR;
239509467b48Spatrick   case Intrinsic::amdgcn_ds_gws_sema_p:
239609467b48Spatrick     return AMDGPU::DS_GWS_SEMA_P;
239709467b48Spatrick   case Intrinsic::amdgcn_ds_gws_sema_release_all:
239809467b48Spatrick     return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
239909467b48Spatrick   default:
240009467b48Spatrick     llvm_unreachable("not a gws intrinsic");
240109467b48Spatrick   }
240209467b48Spatrick }
240309467b48Spatrick 
SelectDS_GWS(SDNode * N,unsigned IntrID)240409467b48Spatrick void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
240509467b48Spatrick   if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
240609467b48Spatrick       !Subtarget->hasGWSSemaReleaseAll()) {
240709467b48Spatrick     // Let this error.
240809467b48Spatrick     SelectCode(N);
240909467b48Spatrick     return;
241009467b48Spatrick   }
241109467b48Spatrick 
241209467b48Spatrick   // Chain, intrinsic ID, vsrc, offset
241309467b48Spatrick   const bool HasVSrc = N->getNumOperands() == 4;
241409467b48Spatrick   assert(HasVSrc || N->getNumOperands() == 3);
241509467b48Spatrick 
241609467b48Spatrick   SDLoc SL(N);
241709467b48Spatrick   SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
241809467b48Spatrick   int ImmOffset = 0;
241909467b48Spatrick   MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
242009467b48Spatrick   MachineMemOperand *MMO = M->getMemOperand();
242109467b48Spatrick 
242209467b48Spatrick   // Don't worry if the offset ends up in a VGPR. Only one lane will have
242309467b48Spatrick   // effect, so SIFixSGPRCopies will validly insert readfirstlane.
242409467b48Spatrick 
242509467b48Spatrick   // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
242609467b48Spatrick   // offset field) % 64. Some versions of the programming guide omit the m0
242709467b48Spatrick   // part, or claim it's from offset 0.
242809467b48Spatrick   if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
242909467b48Spatrick     // If we have a constant offset, try to use the 0 in m0 as the base.
243009467b48Spatrick     // TODO: Look into changing the default m0 initialization value. If the
243109467b48Spatrick     // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
243209467b48Spatrick     // the immediate offset.
243309467b48Spatrick     glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
243409467b48Spatrick     ImmOffset = ConstOffset->getZExtValue();
243509467b48Spatrick   } else {
243609467b48Spatrick     if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
243709467b48Spatrick       ImmOffset = BaseOffset.getConstantOperandVal(1);
243809467b48Spatrick       BaseOffset = BaseOffset.getOperand(0);
243909467b48Spatrick     }
244009467b48Spatrick 
244109467b48Spatrick     // Prefer to do the shift in an SGPR since it should be possible to use m0
244209467b48Spatrick     // as the result directly. If it's already an SGPR, it will be eliminated
244309467b48Spatrick     // later.
244409467b48Spatrick     SDNode *SGPROffset
244509467b48Spatrick       = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
244609467b48Spatrick                                BaseOffset);
244709467b48Spatrick     // Shift to offset in m0
244809467b48Spatrick     SDNode *M0Base
244909467b48Spatrick       = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
245009467b48Spatrick                                SDValue(SGPROffset, 0),
245109467b48Spatrick                                CurDAG->getTargetConstant(16, SL, MVT::i32));
245209467b48Spatrick     glueCopyToM0(N, SDValue(M0Base, 0));
245309467b48Spatrick   }
245409467b48Spatrick 
245509467b48Spatrick   SDValue Chain = N->getOperand(0);
245609467b48Spatrick   SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
245709467b48Spatrick 
245809467b48Spatrick   const unsigned Opc = gwsIntrinToOpcode(IntrID);
245909467b48Spatrick   SmallVector<SDValue, 5> Ops;
246009467b48Spatrick   if (HasVSrc)
246109467b48Spatrick     Ops.push_back(N->getOperand(2));
246209467b48Spatrick   Ops.push_back(OffsetField);
246309467b48Spatrick   Ops.push_back(Chain);
246409467b48Spatrick 
246509467b48Spatrick   SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
246609467b48Spatrick   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
246709467b48Spatrick }
246809467b48Spatrick 
SelectInterpP1F16(SDNode * N)2469097a140dSpatrick void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
2470097a140dSpatrick   if (Subtarget->getLDSBankCount() != 16) {
2471097a140dSpatrick     // This is a single instruction with a pattern.
2472097a140dSpatrick     SelectCode(N);
2473097a140dSpatrick     return;
2474097a140dSpatrick   }
2475097a140dSpatrick 
2476097a140dSpatrick   SDLoc DL(N);
2477097a140dSpatrick 
2478097a140dSpatrick   // This requires 2 instructions. It is possible to write a pattern to support
2479097a140dSpatrick   // this, but the generated isel emitter doesn't correctly deal with multiple
2480097a140dSpatrick   // output instructions using the same physical register input. The copy to m0
2481097a140dSpatrick   // is incorrectly placed before the second instruction.
2482097a140dSpatrick   //
2483097a140dSpatrick   // TODO: Match source modifiers.
2484097a140dSpatrick   //
2485097a140dSpatrick   // def : Pat <
2486097a140dSpatrick   //   (int_amdgcn_interp_p1_f16
2487097a140dSpatrick   //    (VOP3Mods f32:$src0, i32:$src0_modifiers),
2488097a140dSpatrick   //                             (i32 timm:$attrchan), (i32 timm:$attr),
2489097a140dSpatrick   //                             (i1 timm:$high), M0),
2490097a140dSpatrick   //   (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr,
2491097a140dSpatrick   //       timm:$attrchan, 0,
2492097a140dSpatrick   //       (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> {
2493097a140dSpatrick   //   let Predicates = [has16BankLDS];
2494097a140dSpatrick   // }
2495097a140dSpatrick 
2496097a140dSpatrick   // 16 bank LDS
2497097a140dSpatrick   SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0,
2498097a140dSpatrick                                       N->getOperand(5), SDValue());
2499097a140dSpatrick 
2500097a140dSpatrick   SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
2501097a140dSpatrick 
2502097a140dSpatrick   SDNode *InterpMov =
2503097a140dSpatrick     CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
2504097a140dSpatrick         CurDAG->getTargetConstant(2, DL, MVT::i32), // P0
2505097a140dSpatrick         N->getOperand(3),  // Attr
2506097a140dSpatrick         N->getOperand(2),  // Attrchan
2507097a140dSpatrick         ToM0.getValue(1) // In glue
2508097a140dSpatrick   });
2509097a140dSpatrick 
2510097a140dSpatrick   SDNode *InterpP1LV =
2511097a140dSpatrick     CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
2512097a140dSpatrick         CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
2513097a140dSpatrick         N->getOperand(1), // Src0
2514097a140dSpatrick         N->getOperand(3), // Attr
2515097a140dSpatrick         N->getOperand(2), // Attrchan
2516097a140dSpatrick         CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
2517097a140dSpatrick         SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high
2518097a140dSpatrick         N->getOperand(4), // high
2519097a140dSpatrick         CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp
2520097a140dSpatrick         CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod
2521097a140dSpatrick         SDValue(InterpMov, 1)
2522097a140dSpatrick   });
2523097a140dSpatrick 
2524097a140dSpatrick   CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0));
2525097a140dSpatrick }
2526097a140dSpatrick 
SelectINTRINSIC_W_CHAIN(SDNode * N)252709467b48Spatrick void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
252809467b48Spatrick   unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
252909467b48Spatrick   switch (IntrID) {
253009467b48Spatrick   case Intrinsic::amdgcn_ds_append:
253109467b48Spatrick   case Intrinsic::amdgcn_ds_consume: {
253209467b48Spatrick     if (N->getValueType(0) != MVT::i32)
253309467b48Spatrick       break;
253409467b48Spatrick     SelectDSAppendConsume(N, IntrID);
253509467b48Spatrick     return;
253609467b48Spatrick   }
2537*d415bd75Srobert   case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2538*d415bd75Srobert     SelectDSBvhStackIntrinsic(N);
2539*d415bd75Srobert     return;
254009467b48Spatrick   }
254109467b48Spatrick 
254209467b48Spatrick   SelectCode(N);
254309467b48Spatrick }
254409467b48Spatrick 
SelectINTRINSIC_WO_CHAIN(SDNode * N)254509467b48Spatrick void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
254609467b48Spatrick   unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
254709467b48Spatrick   unsigned Opcode;
254809467b48Spatrick   switch (IntrID) {
254909467b48Spatrick   case Intrinsic::amdgcn_wqm:
255009467b48Spatrick     Opcode = AMDGPU::WQM;
255109467b48Spatrick     break;
255209467b48Spatrick   case Intrinsic::amdgcn_softwqm:
255309467b48Spatrick     Opcode = AMDGPU::SOFT_WQM;
255409467b48Spatrick     break;
255509467b48Spatrick   case Intrinsic::amdgcn_wwm:
255673471bf0Spatrick   case Intrinsic::amdgcn_strict_wwm:
255773471bf0Spatrick     Opcode = AMDGPU::STRICT_WWM;
255873471bf0Spatrick     break;
255973471bf0Spatrick   case Intrinsic::amdgcn_strict_wqm:
256073471bf0Spatrick     Opcode = AMDGPU::STRICT_WQM;
256109467b48Spatrick     break;
2562097a140dSpatrick   case Intrinsic::amdgcn_interp_p1_f16:
2563097a140dSpatrick     SelectInterpP1F16(N);
2564097a140dSpatrick     return;
256509467b48Spatrick   default:
256609467b48Spatrick     SelectCode(N);
256709467b48Spatrick     return;
256809467b48Spatrick   }
256909467b48Spatrick 
257009467b48Spatrick   SDValue Src = N->getOperand(1);
257109467b48Spatrick   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
257209467b48Spatrick }
257309467b48Spatrick 
SelectINTRINSIC_VOID(SDNode * N)257409467b48Spatrick void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
257509467b48Spatrick   unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
257609467b48Spatrick   switch (IntrID) {
257709467b48Spatrick   case Intrinsic::amdgcn_ds_gws_init:
257809467b48Spatrick   case Intrinsic::amdgcn_ds_gws_barrier:
257909467b48Spatrick   case Intrinsic::amdgcn_ds_gws_sema_v:
258009467b48Spatrick   case Intrinsic::amdgcn_ds_gws_sema_br:
258109467b48Spatrick   case Intrinsic::amdgcn_ds_gws_sema_p:
258209467b48Spatrick   case Intrinsic::amdgcn_ds_gws_sema_release_all:
258309467b48Spatrick     SelectDS_GWS(N, IntrID);
258409467b48Spatrick     return;
258509467b48Spatrick   default:
258609467b48Spatrick     break;
258709467b48Spatrick   }
258809467b48Spatrick 
258909467b48Spatrick   SelectCode(N);
259009467b48Spatrick }
259109467b48Spatrick 
SelectVOP3ModsImpl(SDValue In,SDValue & Src,unsigned & Mods,bool AllowAbs) const259209467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
259373471bf0Spatrick                                             unsigned &Mods,
259473471bf0Spatrick                                             bool AllowAbs) const {
259509467b48Spatrick   Mods = 0;
259609467b48Spatrick   Src = In;
259709467b48Spatrick 
259809467b48Spatrick   if (Src.getOpcode() == ISD::FNEG) {
259909467b48Spatrick     Mods |= SISrcMods::NEG;
260009467b48Spatrick     Src = Src.getOperand(0);
260109467b48Spatrick   }
260209467b48Spatrick 
260373471bf0Spatrick   if (AllowAbs && Src.getOpcode() == ISD::FABS) {
260409467b48Spatrick     Mods |= SISrcMods::ABS;
260509467b48Spatrick     Src = Src.getOperand(0);
260609467b48Spatrick   }
260709467b48Spatrick 
260809467b48Spatrick   return true;
260909467b48Spatrick }
261009467b48Spatrick 
SelectVOP3Mods(SDValue In,SDValue & Src,SDValue & SrcMods) const261109467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
261209467b48Spatrick                                         SDValue &SrcMods) const {
261309467b48Spatrick   unsigned Mods;
261409467b48Spatrick   if (SelectVOP3ModsImpl(In, Src, Mods)) {
261509467b48Spatrick     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
261609467b48Spatrick     return true;
261709467b48Spatrick   }
261809467b48Spatrick 
261909467b48Spatrick   return false;
262009467b48Spatrick }
262109467b48Spatrick 
SelectVOP3BMods(SDValue In,SDValue & Src,SDValue & SrcMods) const262273471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
262373471bf0Spatrick                                          SDValue &SrcMods) const {
262473471bf0Spatrick   unsigned Mods;
262573471bf0Spatrick   if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
262673471bf0Spatrick     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
262773471bf0Spatrick     return true;
262873471bf0Spatrick   }
262973471bf0Spatrick 
263073471bf0Spatrick   return false;
263173471bf0Spatrick }
263273471bf0Spatrick 
SelectVOP3NoMods(SDValue In,SDValue & Src) const263309467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
263409467b48Spatrick   if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
263509467b48Spatrick     return false;
263609467b48Spatrick 
263709467b48Spatrick   Src = In;
263809467b48Spatrick   return true;
263909467b48Spatrick }
264009467b48Spatrick 
SelectVINTERPModsImpl(SDValue In,SDValue & Src,SDValue & SrcMods,bool OpSel) const2641*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
2642*d415bd75Srobert                                                SDValue &SrcMods,
2643*d415bd75Srobert                                                bool OpSel) const {
2644*d415bd75Srobert   unsigned Mods;
2645*d415bd75Srobert   if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
2646*d415bd75Srobert     if (OpSel)
2647*d415bd75Srobert       Mods |= SISrcMods::OP_SEL_0;
2648*d415bd75Srobert     SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2649*d415bd75Srobert     return true;
2650*d415bd75Srobert   }
2651*d415bd75Srobert 
2652*d415bd75Srobert   return false;
2653*d415bd75Srobert }
2654*d415bd75Srobert 
SelectVINTERPMods(SDValue In,SDValue & Src,SDValue & SrcMods) const2655*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
2656*d415bd75Srobert                                            SDValue &SrcMods) const {
2657*d415bd75Srobert   return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ false);
2658*d415bd75Srobert }
2659*d415bd75Srobert 
SelectVINTERPModsHi(SDValue In,SDValue & Src,SDValue & SrcMods) const2660*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
2661*d415bd75Srobert                                              SDValue &SrcMods) const {
2662*d415bd75Srobert   return SelectVINTERPModsImpl(In, Src, SrcMods, /* OpSel */ true);
2663*d415bd75Srobert }
2664*d415bd75Srobert 
SelectVOP3Mods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const266509467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
266609467b48Spatrick                                          SDValue &SrcMods, SDValue &Clamp,
266709467b48Spatrick                                          SDValue &Omod) const {
266809467b48Spatrick   SDLoc DL(In);
266909467b48Spatrick   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
267009467b48Spatrick   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
267109467b48Spatrick 
267209467b48Spatrick   return SelectVOP3Mods(In, Src, SrcMods);
267309467b48Spatrick }
267409467b48Spatrick 
SelectVOP3BMods0(SDValue In,SDValue & Src,SDValue & SrcMods,SDValue & Clamp,SDValue & Omod) const267573471bf0Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
267673471bf0Spatrick                                           SDValue &SrcMods, SDValue &Clamp,
267773471bf0Spatrick                                           SDValue &Omod) const {
267873471bf0Spatrick   SDLoc DL(In);
267973471bf0Spatrick   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
268073471bf0Spatrick   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
268173471bf0Spatrick 
268273471bf0Spatrick   return SelectVOP3BMods(In, Src, SrcMods);
268373471bf0Spatrick }
268473471bf0Spatrick 
SelectVOP3OMods(SDValue In,SDValue & Src,SDValue & Clamp,SDValue & Omod) const268509467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
268609467b48Spatrick                                          SDValue &Clamp, SDValue &Omod) const {
268709467b48Spatrick   Src = In;
268809467b48Spatrick 
268909467b48Spatrick   SDLoc DL(In);
269009467b48Spatrick   Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
269109467b48Spatrick   Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
269209467b48Spatrick 
269309467b48Spatrick   return true;
269409467b48Spatrick }
269509467b48Spatrick 
SelectVOP3PMods(SDValue In,SDValue & Src,SDValue & SrcMods,bool IsDOT) const269609467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
2697*d415bd75Srobert                                          SDValue &SrcMods, bool IsDOT) const {
269809467b48Spatrick   unsigned Mods = 0;
269909467b48Spatrick   Src = In;
270009467b48Spatrick 
270109467b48Spatrick   if (Src.getOpcode() == ISD::FNEG) {
270209467b48Spatrick     Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
270309467b48Spatrick     Src = Src.getOperand(0);
270409467b48Spatrick   }
270509467b48Spatrick 
2706*d415bd75Srobert   if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 &&
2707*d415bd75Srobert       (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
270809467b48Spatrick     unsigned VecMods = Mods;
270909467b48Spatrick 
271009467b48Spatrick     SDValue Lo = stripBitcast(Src.getOperand(0));
271109467b48Spatrick     SDValue Hi = stripBitcast(Src.getOperand(1));
271209467b48Spatrick 
271309467b48Spatrick     if (Lo.getOpcode() == ISD::FNEG) {
271409467b48Spatrick       Lo = stripBitcast(Lo.getOperand(0));
271509467b48Spatrick       Mods ^= SISrcMods::NEG;
271609467b48Spatrick     }
271709467b48Spatrick 
271809467b48Spatrick     if (Hi.getOpcode() == ISD::FNEG) {
271909467b48Spatrick       Hi = stripBitcast(Hi.getOperand(0));
272009467b48Spatrick       Mods ^= SISrcMods::NEG_HI;
272109467b48Spatrick     }
272209467b48Spatrick 
272309467b48Spatrick     if (isExtractHiElt(Lo, Lo))
272409467b48Spatrick       Mods |= SISrcMods::OP_SEL_0;
272509467b48Spatrick 
272609467b48Spatrick     if (isExtractHiElt(Hi, Hi))
272709467b48Spatrick       Mods |= SISrcMods::OP_SEL_1;
272809467b48Spatrick 
272973471bf0Spatrick     unsigned VecSize = Src.getValueSizeInBits();
273009467b48Spatrick     Lo = stripExtractLoElt(Lo);
273109467b48Spatrick     Hi = stripExtractLoElt(Hi);
273209467b48Spatrick 
273373471bf0Spatrick     if (Lo.getValueSizeInBits() > VecSize) {
273473471bf0Spatrick       Lo = CurDAG->getTargetExtractSubreg(
273573471bf0Spatrick         (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
273673471bf0Spatrick         MVT::getIntegerVT(VecSize), Lo);
273773471bf0Spatrick     }
273873471bf0Spatrick 
273973471bf0Spatrick     if (Hi.getValueSizeInBits() > VecSize) {
274073471bf0Spatrick       Hi = CurDAG->getTargetExtractSubreg(
274173471bf0Spatrick         (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
274273471bf0Spatrick         MVT::getIntegerVT(VecSize), Hi);
274373471bf0Spatrick     }
274473471bf0Spatrick 
274573471bf0Spatrick     assert(Lo.getValueSizeInBits() <= VecSize &&
274673471bf0Spatrick            Hi.getValueSizeInBits() <= VecSize);
274773471bf0Spatrick 
274809467b48Spatrick     if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
274909467b48Spatrick       // Really a scalar input. Just select from the low half of the register to
275009467b48Spatrick       // avoid packing.
275109467b48Spatrick 
275273471bf0Spatrick       if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) {
275309467b48Spatrick         Src = Lo;
275473471bf0Spatrick       } else {
275573471bf0Spatrick         assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
275673471bf0Spatrick 
275773471bf0Spatrick         SDLoc SL(In);
275873471bf0Spatrick         SDValue Undef = SDValue(
275973471bf0Spatrick           CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
276073471bf0Spatrick                                  Lo.getValueType()), 0);
276173471bf0Spatrick         auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
276273471bf0Spatrick                                     : AMDGPU::SReg_64RegClassID;
276373471bf0Spatrick         const SDValue Ops[] = {
276473471bf0Spatrick           CurDAG->getTargetConstant(RC, SL, MVT::i32),
276573471bf0Spatrick           Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
276673471bf0Spatrick           Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
276773471bf0Spatrick 
276873471bf0Spatrick         Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
276973471bf0Spatrick                                              Src.getValueType(), Ops), 0);
277073471bf0Spatrick       }
277109467b48Spatrick       SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
277209467b48Spatrick       return true;
277309467b48Spatrick     }
277409467b48Spatrick 
277573471bf0Spatrick     if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) {
277673471bf0Spatrick       uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
277773471bf0Spatrick                       .bitcastToAPInt().getZExtValue();
277873471bf0Spatrick       if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
277973471bf0Spatrick         Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
278073471bf0Spatrick         SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
278173471bf0Spatrick         return true;
278273471bf0Spatrick       }
278373471bf0Spatrick     }
278473471bf0Spatrick 
278509467b48Spatrick     Mods = VecMods;
278609467b48Spatrick   }
278709467b48Spatrick 
278809467b48Spatrick   // Packed instructions do not have abs modifiers.
278909467b48Spatrick   Mods |= SISrcMods::OP_SEL_1;
279009467b48Spatrick 
279109467b48Spatrick   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
279209467b48Spatrick   return true;
279309467b48Spatrick }
279409467b48Spatrick 
SelectVOP3PModsDOT(SDValue In,SDValue & Src,SDValue & SrcMods) const2795*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
2796*d415bd75Srobert                                             SDValue &SrcMods) const {
2797*d415bd75Srobert   return SelectVOP3PMods(In, Src, SrcMods, true);
2798*d415bd75Srobert }
2799*d415bd75Srobert 
SelectDotIUVOP3PMods(SDValue In,SDValue & Src) const2800*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const {
2801*d415bd75Srobert   const ConstantSDNode *C = cast<ConstantSDNode>(In);
2802*d415bd75Srobert   // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
2803*d415bd75Srobert   // 1 promotes packed values to signed, 0 treats them as unsigned.
2804*d415bd75Srobert   assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
2805*d415bd75Srobert 
2806*d415bd75Srobert   unsigned Mods = SISrcMods::OP_SEL_1;
2807*d415bd75Srobert   unsigned SrcSign = C->getAPIntValue().getZExtValue();
2808*d415bd75Srobert   if (SrcSign == 1)
2809*d415bd75Srobert     Mods ^= SISrcMods::NEG;
2810*d415bd75Srobert 
2811*d415bd75Srobert   Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2812*d415bd75Srobert   return true;
2813*d415bd75Srobert }
2814*d415bd75Srobert 
SelectWMMAOpSelVOP3PMods(SDValue In,SDValue & Src) const2815*d415bd75Srobert bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
2816*d415bd75Srobert                                                   SDValue &Src) const {
2817*d415bd75Srobert   const ConstantSDNode *C = cast<ConstantSDNode>(In);
2818*d415bd75Srobert   assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
2819*d415bd75Srobert 
2820*d415bd75Srobert   unsigned Mods = SISrcMods::OP_SEL_1;
2821*d415bd75Srobert   unsigned SrcVal = C->getAPIntValue().getZExtValue();
2822*d415bd75Srobert   if (SrcVal == 1)
2823*d415bd75Srobert     Mods |= SISrcMods::OP_SEL_0;
2824*d415bd75Srobert 
2825*d415bd75Srobert   Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
2826*d415bd75Srobert   return true;
2827*d415bd75Srobert }
2828*d415bd75Srobert 
SelectVOP3OpSel(SDValue In,SDValue & Src,SDValue & SrcMods) const282909467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
283009467b48Spatrick                                          SDValue &SrcMods) const {
283109467b48Spatrick   Src = In;
283209467b48Spatrick   // FIXME: Handle op_sel
283309467b48Spatrick   SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
283409467b48Spatrick   return true;
283509467b48Spatrick }
283609467b48Spatrick 
SelectVOP3OpSelMods(SDValue In,SDValue & Src,SDValue & SrcMods) const283709467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
283809467b48Spatrick                                              SDValue &SrcMods) const {
283909467b48Spatrick   // FIXME: Handle op_sel
284009467b48Spatrick   return SelectVOP3Mods(In, Src, SrcMods);
284109467b48Spatrick }
284209467b48Spatrick 
284309467b48Spatrick // The return value is not whether the match is possible (which it always is),
284409467b48Spatrick // but whether or not it a conversion is really used.
SelectVOP3PMadMixModsImpl(SDValue In,SDValue & Src,unsigned & Mods) const284509467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
284609467b48Spatrick                                                    unsigned &Mods) const {
284709467b48Spatrick   Mods = 0;
284809467b48Spatrick   SelectVOP3ModsImpl(In, Src, Mods);
284909467b48Spatrick 
285009467b48Spatrick   if (Src.getOpcode() == ISD::FP_EXTEND) {
285109467b48Spatrick     Src = Src.getOperand(0);
285209467b48Spatrick     assert(Src.getValueType() == MVT::f16);
285309467b48Spatrick     Src = stripBitcast(Src);
285409467b48Spatrick 
285509467b48Spatrick     // Be careful about folding modifiers if we already have an abs. fneg is
285609467b48Spatrick     // applied last, so we don't want to apply an earlier fneg.
285709467b48Spatrick     if ((Mods & SISrcMods::ABS) == 0) {
285809467b48Spatrick       unsigned ModsTmp;
285909467b48Spatrick       SelectVOP3ModsImpl(Src, Src, ModsTmp);
286009467b48Spatrick 
286109467b48Spatrick       if ((ModsTmp & SISrcMods::NEG) != 0)
286209467b48Spatrick         Mods ^= SISrcMods::NEG;
286309467b48Spatrick 
286409467b48Spatrick       if ((ModsTmp & SISrcMods::ABS) != 0)
286509467b48Spatrick         Mods |= SISrcMods::ABS;
286609467b48Spatrick     }
286709467b48Spatrick 
286809467b48Spatrick     // op_sel/op_sel_hi decide the source type and source.
286909467b48Spatrick     // If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
287009467b48Spatrick     // If the sources's op_sel is set, it picks the high half of the source
287109467b48Spatrick     // register.
287209467b48Spatrick 
287309467b48Spatrick     Mods |= SISrcMods::OP_SEL_1;
287409467b48Spatrick     if (isExtractHiElt(Src, Src)) {
287509467b48Spatrick       Mods |= SISrcMods::OP_SEL_0;
287609467b48Spatrick 
287709467b48Spatrick       // TODO: Should we try to look for neg/abs here?
287809467b48Spatrick     }
287909467b48Spatrick 
288009467b48Spatrick     return true;
288109467b48Spatrick   }
288209467b48Spatrick 
288309467b48Spatrick   return false;
288409467b48Spatrick }
288509467b48Spatrick 
SelectVOP3PMadMixMods(SDValue In,SDValue & Src,SDValue & SrcMods) const288609467b48Spatrick bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
288709467b48Spatrick                                                SDValue &SrcMods) const {
288809467b48Spatrick   unsigned Mods = 0;
288909467b48Spatrick   SelectVOP3PMadMixModsImpl(In, Src, Mods);
289009467b48Spatrick   SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
289109467b48Spatrick   return true;
289209467b48Spatrick }
289309467b48Spatrick 
getHi16Elt(SDValue In) const289409467b48Spatrick SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
289509467b48Spatrick   if (In.isUndef())
289609467b48Spatrick     return CurDAG->getUNDEF(MVT::i32);
289709467b48Spatrick 
289809467b48Spatrick   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
289909467b48Spatrick     SDLoc SL(In);
290009467b48Spatrick     return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
290109467b48Spatrick   }
290209467b48Spatrick 
290309467b48Spatrick   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
290409467b48Spatrick     SDLoc SL(In);
290509467b48Spatrick     return CurDAG->getConstant(
290609467b48Spatrick       C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
290709467b48Spatrick   }
290809467b48Spatrick 
290909467b48Spatrick   SDValue Src;
291009467b48Spatrick   if (isExtractHiElt(In, Src))
291109467b48Spatrick     return Src;
291209467b48Spatrick 
291309467b48Spatrick   return SDValue();
291409467b48Spatrick }
291509467b48Spatrick 
isVGPRImm(const SDNode * N) const291609467b48Spatrick bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
291709467b48Spatrick   assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn);
291809467b48Spatrick 
291909467b48Spatrick   const SIRegisterInfo *SIRI =
292009467b48Spatrick     static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
292109467b48Spatrick   const SIInstrInfo * SII =
292209467b48Spatrick     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
292309467b48Spatrick 
292409467b48Spatrick   unsigned Limit = 0;
292509467b48Spatrick   bool AllUsesAcceptSReg = true;
292609467b48Spatrick   for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
292709467b48Spatrick     Limit < 10 && U != E; ++U, ++Limit) {
292809467b48Spatrick     const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
292909467b48Spatrick 
293009467b48Spatrick     // If the register class is unknown, it could be an unknown
293109467b48Spatrick     // register class that needs to be an SGPR, e.g. an inline asm
293209467b48Spatrick     // constraint
293309467b48Spatrick     if (!RC || SIRI->isSGPRClass(RC))
293409467b48Spatrick       return false;
293509467b48Spatrick 
293609467b48Spatrick     if (RC != &AMDGPU::VS_32RegClass) {
293709467b48Spatrick       AllUsesAcceptSReg = false;
293809467b48Spatrick       SDNode * User = *U;
293909467b48Spatrick       if (User->isMachineOpcode()) {
294009467b48Spatrick         unsigned Opc = User->getMachineOpcode();
2941*d415bd75Srobert         const MCInstrDesc &Desc = SII->get(Opc);
294209467b48Spatrick         if (Desc.isCommutable()) {
294309467b48Spatrick           unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo();
294409467b48Spatrick           unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
294509467b48Spatrick           if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) {
294609467b48Spatrick             unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
294709467b48Spatrick             const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo);
294809467b48Spatrick             if (CommutedRC == &AMDGPU::VS_32RegClass)
294909467b48Spatrick               AllUsesAcceptSReg = true;
295009467b48Spatrick           }
295109467b48Spatrick         }
295209467b48Spatrick       }
2953*d415bd75Srobert       // If "AllUsesAcceptSReg == false" so far we haven't succeeded
295409467b48Spatrick       // commuting current user. This means have at least one use
295509467b48Spatrick       // that strictly require VGPR. Thus, we will not attempt to commute
295609467b48Spatrick       // other user instructions.
295709467b48Spatrick       if (!AllUsesAcceptSReg)
295809467b48Spatrick         break;
295909467b48Spatrick     }
296009467b48Spatrick   }
296109467b48Spatrick   return !AllUsesAcceptSReg && (Limit < 10);
296209467b48Spatrick }
296309467b48Spatrick 
isUniformLoad(const SDNode * N) const296409467b48Spatrick bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const {
296509467b48Spatrick   auto Ld = cast<LoadSDNode>(N);
296609467b48Spatrick 
2967*d415bd75Srobert   if (N->isDivergent() && !AMDGPUInstrInfo::isUniformMMO(Ld->getMemOperand()))
2968*d415bd75Srobert     return false;
2969*d415bd75Srobert 
2970*d415bd75Srobert   return Ld->getAlign() >= Align(4) &&
2971*d415bd75Srobert          ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
2972*d415bd75Srobert            Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ||
2973*d415bd75Srobert           (Subtarget->getScalarizeGlobalBehavior() &&
297409467b48Spatrick            Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
2975097a140dSpatrick            Ld->isSimple() &&
2976*d415bd75Srobert            static_cast<const SITargetLowering *>(getTargetLowering())
2977*d415bd75Srobert                ->isMemOpHasNoClobberedMemOperand(N)));
297809467b48Spatrick }
297909467b48Spatrick 
PostprocessISelDAG()298009467b48Spatrick void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
298109467b48Spatrick   const AMDGPUTargetLowering& Lowering =
298209467b48Spatrick     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
298309467b48Spatrick   bool IsModified = false;
298409467b48Spatrick   do {
298509467b48Spatrick     IsModified = false;
298609467b48Spatrick 
298709467b48Spatrick     // Go over all selected nodes and try to fold them a bit more
298809467b48Spatrick     SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
298909467b48Spatrick     while (Position != CurDAG->allnodes_end()) {
299009467b48Spatrick       SDNode *Node = &*Position++;
299109467b48Spatrick       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
299209467b48Spatrick       if (!MachineNode)
299309467b48Spatrick         continue;
299409467b48Spatrick 
299509467b48Spatrick       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
299609467b48Spatrick       if (ResNode != Node) {
299709467b48Spatrick         if (ResNode)
299809467b48Spatrick           ReplaceUses(Node, ResNode);
299909467b48Spatrick         IsModified = true;
300009467b48Spatrick       }
300109467b48Spatrick     }
300209467b48Spatrick     CurDAG->RemoveDeadNodes();
300309467b48Spatrick   } while (IsModified);
300409467b48Spatrick }
300509467b48Spatrick 
3006*d415bd75Srobert char AMDGPUDAGToDAGISel::ID = 0;
3007