1 //===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Defines an instruction selector for the AMDGPU target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H 15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H 16 17 #include "GCNSubtarget.h" 18 #include "SIMachineFunctionInfo.h" 19 #include "SIModeRegisterDefaults.h" 20 #include "llvm/Analysis/ValueTracking.h" 21 #include "llvm/CodeGen/SelectionDAGISel.h" 22 #include "llvm/Target/TargetMachine.h" 23 24 namespace llvm { 25 26 static inline bool getConstantValue(SDValue N, uint32_t &Out) { 27 // This is only used for packed vectors, where using 0 for undef should 28 // always be good. 29 if (N.isUndef()) { 30 Out = 0; 31 return true; 32 } 33 34 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { 35 Out = C->getAPIntValue().getSExtValue(); 36 return true; 37 } 38 39 if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { 40 Out = C->getValueAPF().bitcastToAPInt().getSExtValue(); 41 return true; 42 } 43 44 return false; 45 } 46 47 // TODO: Handle undef as zero 48 static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG) { 49 assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2); 50 uint32_t LHSVal, RHSVal; 51 if (getConstantValue(N->getOperand(0), LHSVal) && 52 getConstantValue(N->getOperand(1), RHSVal)) { 53 SDLoc SL(N); 54 uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16); 55 return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), 56 DAG.getTargetConstant(K, SL, MVT::i32)); 57 } 58 59 return nullptr; 60 } 61 62 /// AMDGPU specific code to select AMDGPU machine instructions for 63 /// SelectionDAG operations. 64 class AMDGPUDAGToDAGISel : public SelectionDAGISel { 65 // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can 66 // make the right decision when generating code for different targets. 67 const GCNSubtarget *Subtarget; 68 69 // Default FP mode for the current function. 70 SIModeRegisterDefaults Mode; 71 72 // Instructions that will be lowered with a final instruction that zeros the 73 // high result bits. 74 bool fp16SrcZerosHighBits(unsigned Opc) const; 75 76 public: 77 AMDGPUDAGToDAGISel() = delete; 78 79 explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOptLevel OptLevel); 80 81 bool runOnMachineFunction(MachineFunction &MF) override; 82 bool matchLoadD16FromBuildVector(SDNode *N) const; 83 void PreprocessISelDAG() override; 84 void Select(SDNode *N) override; 85 void PostprocessISelDAG() override; 86 87 protected: 88 void SelectBuildVector(SDNode *N, unsigned RegClassID); 89 void SelectVectorShuffle(SDNode *N); 90 91 private: 92 std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; 93 94 bool isInlineImmediate(const SDNode *N) const; 95 96 bool isInlineImmediate(const APInt &Imm) const { 97 return Subtarget->getInstrInfo()->isInlineConstant(Imm); 98 } 99 100 bool isInlineImmediate(const APFloat &Imm) const { 101 return Subtarget->getInstrInfo()->isInlineConstant(Imm); 102 } 103 104 bool isVGPRImm(const SDNode *N) const; 105 bool isUniformLoad(const SDNode *N) const; 106 bool isUniformBr(const SDNode *N) const; 107 108 // Returns true if ISD::AND SDNode `N`'s masking of the shift amount operand's 109 // `ShAmtBits` bits is unneeded. 110 bool isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const; 111 112 bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, 113 SDValue &RHS) const; 114 115 MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; 116 117 SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; 118 SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; 119 SDNode *glueCopyToM0LDSInit(SDNode *N) const; 120 121 const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; 122 virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); 123 virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); 124 bool isDSOffsetLegal(SDValue Base, unsigned Offset) const; 125 bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1, 126 unsigned Size) const; 127 128 bool isFlatScratchBaseLegal(SDValue Addr) const; 129 bool isFlatScratchBaseLegalSV(SDValue Addr) const; 130 bool isFlatScratchBaseLegalSVImm(SDValue Addr) const; 131 bool isSOffsetLegalWithImmOffset(SDValue *SOffset, bool Imm32Only, 132 bool IsBuffer, int64_t ImmOffset = 0) const; 133 134 bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; 135 bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 136 SDValue &Offset1) const; 137 bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, 138 SDValue &Offset1) const; 139 bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0, 140 SDValue &Offset1, unsigned Size) const; 141 bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 142 SDValue &SOffset, SDValue &Offset, SDValue &Offen, 143 SDValue &Idxen, SDValue &Addr64) const; 144 bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, 145 SDValue &SOffset, SDValue &Offset) const; 146 bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc, 147 SDValue &VAddr, SDValue &SOffset, 148 SDValue &ImmOffset) const; 149 bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc, 150 SDValue &Soffset, SDValue &Offset) const; 151 152 bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, 153 SDValue &Offset) const; 154 bool SelectBUFSOffset(SDValue Addr, SDValue &SOffset) const; 155 156 bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr, 157 SDValue &Offset, uint64_t FlatVariant) const; 158 bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, 159 SDValue &Offset) const; 160 bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr, 161 SDValue &Offset) const; 162 bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr, 163 SDValue &Offset) const; 164 bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, 165 SDValue &VOffset, SDValue &Offset) const; 166 bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, 167 SDValue &Offset) const; 168 bool checkFlatScratchSVSSwizzleBug(SDValue VAddr, SDValue SAddr, 169 uint64_t ImmOffset) const; 170 bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr, 171 SDValue &SAddr, SDValue &Offset) const; 172 173 bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue *SOffset, 174 SDValue *Offset, bool Imm32Only = false, 175 bool IsBuffer = false, bool HasSOffset = false, 176 int64_t ImmOffset = 0) const; 177 SDValue Expand32BitAddress(SDValue Addr) const; 178 bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset, 179 SDValue *Offset, bool Imm32Only = false, 180 bool IsBuffer = false, bool HasSOffset = false, 181 int64_t ImmOffset = 0) const; 182 bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset, 183 SDValue *Offset, bool Imm32Only = false) const; 184 bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 185 bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; 186 bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const; 187 bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset, 188 SDValue &Offset) const; 189 bool SelectSMRDBufferImm(SDValue N, SDValue &Offset) const; 190 bool SelectSMRDBufferImm32(SDValue N, SDValue &Offset) const; 191 bool SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset, 192 SDValue &Offset) const; 193 bool SelectSMRDPrefetchImm(SDValue Addr, SDValue &SBase, 194 SDValue &Offset) const; 195 bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; 196 197 bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods, 198 bool IsCanonicalizing = true, 199 bool AllowAbs = true) const; 200 bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 201 bool SelectVOP3ModsNonCanonicalizing(SDValue In, SDValue &Src, 202 SDValue &SrcMods) const; 203 bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 204 bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; 205 bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, 206 SDValue &Clamp, SDValue &Omod) const; 207 bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 208 SDValue &Clamp, SDValue &Omod) const; 209 bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, 210 SDValue &Clamp, SDValue &Omod) const; 211 212 bool SelectVINTERPModsImpl(SDValue In, SDValue &Src, SDValue &SrcMods, 213 bool OpSel) const; 214 bool SelectVINTERPMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 215 bool SelectVINTERPModsHi(SDValue In, SDValue &Src, SDValue &SrcMods) const; 216 217 bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp, 218 SDValue &Omod) const; 219 220 bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods, 221 bool IsDOT = false) const; 222 bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const; 223 224 bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const; 225 bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const; 226 227 bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src, 228 SDValue &SrcMods) const; 229 bool SelectWMMAModsF16Neg(SDValue In, SDValue &Src, SDValue &SrcMods) const; 230 bool SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src, 231 SDValue &SrcMods) const; 232 bool SelectWMMAVISrc(SDValue In, SDValue &Src) const; 233 234 bool SelectSWMMACIndex8(SDValue In, SDValue &Src, SDValue &IndexKey) const; 235 bool SelectSWMMACIndex16(SDValue In, SDValue &Src, SDValue &IndexKey) const; 236 237 bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; 238 239 bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 240 bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, 241 unsigned &Mods) const; 242 bool SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src, 243 SDValue &SrcMods) const; 244 bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; 245 246 bool SelectBITOP3(SDValue In, SDValue &Src0, SDValue &Src1, SDValue &Src2, 247 SDValue &Tbl) const; 248 249 SDValue getHi16Elt(SDValue In) const; 250 251 SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; 252 253 void SelectADD_SUB_I64(SDNode *N); 254 void SelectAddcSubb(SDNode *N); 255 void SelectUADDO_USUBO(SDNode *N); 256 void SelectDIV_SCALE(SDNode *N); 257 void SelectMAD_64_32(SDNode *N); 258 void SelectMUL_LOHI(SDNode *N); 259 void SelectFMA_W_CHAIN(SDNode *N); 260 void SelectFMUL_W_CHAIN(SDNode *N); 261 SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset, 262 uint32_t Width); 263 void SelectS_BFEFromShifts(SDNode *N); 264 void SelectS_BFE(SDNode *N); 265 bool isCBranchSCC(const SDNode *N) const; 266 void SelectBRCOND(SDNode *N); 267 void SelectFMAD_FMA(SDNode *N); 268 void SelectFP_EXTEND(SDNode *N); 269 void SelectDSAppendConsume(SDNode *N, unsigned IntrID); 270 void SelectDSBvhStackIntrinsic(SDNode *N); 271 void SelectDS_GWS(SDNode *N, unsigned IntrID); 272 void SelectInterpP1F16(SDNode *N); 273 void SelectINTRINSIC_W_CHAIN(SDNode *N); 274 void SelectINTRINSIC_WO_CHAIN(SDNode *N); 275 void SelectINTRINSIC_VOID(SDNode *N); 276 void SelectWAVE_ADDRESS(SDNode *N); 277 void SelectSTACKRESTORE(SDNode *N); 278 279 protected: 280 // Include the pieces autogenerated from the target description. 281 #include "AMDGPUGenDAGISel.inc" 282 }; 283 284 class AMDGPUISelDAGToDAGPass : public SelectionDAGISelPass { 285 public: 286 AMDGPUISelDAGToDAGPass(TargetMachine &TM); 287 288 PreservedAnalyses run(MachineFunction &MF, 289 MachineFunctionAnalysisManager &MFAM); 290 }; 291 292 class AMDGPUDAGToDAGISelLegacy : public SelectionDAGISelLegacy { 293 public: 294 static char ID; 295 296 AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel); 297 298 bool runOnMachineFunction(MachineFunction &MF) override; 299 void getAnalysisUsage(AnalysisUsage &AU) const override; 300 StringRef getPassName() const override; 301 }; 302 303 } // namespace llvm 304 305 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H 306