1 //===- AMDGPUInstructionSelector --------------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file declares the targeting of the InstructionSelector class for 10 /// AMDGPU. 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H 14 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H 15 16 #include "SIDefines.h" 17 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 18 #include "llvm/IR/InstrTypes.h" 19 20 namespace { 21 #define GET_GLOBALISEL_PREDICATE_BITSET 22 #define AMDGPUSubtarget GCNSubtarget 23 #include "AMDGPUGenGlobalISel.inc" 24 #undef GET_GLOBALISEL_PREDICATE_BITSET 25 #undef AMDGPUSubtarget 26 } 27 28 namespace llvm { 29 30 namespace AMDGPU { 31 struct ImageDimIntrinsicInfo; 32 } 33 34 class AMDGPURegisterBankInfo; 35 class AMDGPUTargetMachine; 36 class BlockFrequencyInfo; 37 class ProfileSummaryInfo; 38 class GCNSubtarget; 39 class MachineInstr; 40 class MachineIRBuilder; 41 class MachineOperand; 42 class MachineRegisterInfo; 43 class RegisterBank; 44 class SIInstrInfo; 45 class SIRegisterInfo; 46 class TargetRegisterClass; 47 48 class AMDGPUInstructionSelector final : public InstructionSelector { 49 private: 50 MachineRegisterInfo *MRI; 51 const GCNSubtarget *Subtarget; 52 53 public: 54 AMDGPUInstructionSelector(const GCNSubtarget &STI, 55 const AMDGPURegisterBankInfo &RBI, 56 const AMDGPUTargetMachine &TM); 57 58 bool select(MachineInstr &I) override; 59 static const char *getName(); 60 61 void setupMF(MachineFunction &MF, GISelKnownBits *KB, 62 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, 63 BlockFrequencyInfo *BFI) override; 64 65 private: 66 struct GEPInfo { 67 SmallVector<unsigned, 2> SgprParts; 68 SmallVector<unsigned, 2> VgprParts; 69 int64_t Imm = 0; 70 }; 71 72 bool isSGPR(Register Reg) const; 73 74 bool isInstrUniform(const MachineInstr &MI) const; 75 bool isVCC(Register Reg, const MachineRegisterInfo &MRI) const; 76 77 const RegisterBank *getArtifactRegBank( 78 Register Reg, const MachineRegisterInfo &MRI, 79 const TargetRegisterInfo &TRI) const; 80 81 /// tblgen-erated 'select' implementation. 82 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 83 84 MachineOperand getSubOperand64(MachineOperand &MO, 85 const TargetRegisterClass &SubRC, 86 unsigned SubIdx) const; 87 88 bool constrainCopyLikeIntrin(MachineInstr &MI, unsigned NewOpc) const; 89 bool selectCOPY(MachineInstr &I) const; 90 bool selectCOPY_SCC_VCC(MachineInstr &I) const; 91 bool selectCOPY_VCC_SCC(MachineInstr &I) const; 92 bool selectReadAnyLane(MachineInstr &I) const; 93 bool selectPHI(MachineInstr &I) const; 94 bool selectG_TRUNC(MachineInstr &I) const; 95 bool selectG_SZA_EXT(MachineInstr &I) const; 96 bool selectG_FPEXT(MachineInstr &I) const; 97 bool selectG_FNEG(MachineInstr &I) const; 98 bool selectG_FABS(MachineInstr &I) const; 99 bool selectG_AND_OR_XOR(MachineInstr &I) const; 100 bool selectG_ADD_SUB(MachineInstr &I) const; 101 bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const; 102 bool selectG_AMDGPU_MAD_64_32(MachineInstr &I) const; 103 bool selectG_EXTRACT(MachineInstr &I) const; 104 bool selectG_FMA_FMAD(MachineInstr &I) const; 105 bool selectG_MERGE_VALUES(MachineInstr &I) const; 106 bool selectG_UNMERGE_VALUES(MachineInstr &I) const; 107 bool selectG_BUILD_VECTOR(MachineInstr &I) const; 108 bool selectG_IMPLICIT_DEF(MachineInstr &I) const; 109 bool selectG_INSERT(MachineInstr &I) const; 110 bool selectG_SBFX_UBFX(MachineInstr &I) const; 111 112 bool selectInterpP1F16(MachineInstr &MI) const; 113 bool selectWritelane(MachineInstr &MI) const; 114 bool selectDivScale(MachineInstr &MI) const; 115 bool selectIntrinsicCmp(MachineInstr &MI) const; 116 bool selectBallot(MachineInstr &I) const; 117 bool selectRelocConstant(MachineInstr &I) const; 118 bool selectGroupStaticSize(MachineInstr &I) const; 119 bool selectReturnAddress(MachineInstr &I) const; 120 bool selectG_INTRINSIC(MachineInstr &I) const; 121 122 bool selectEndCfIntrinsic(MachineInstr &MI) const; 123 bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const; 124 bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const; 125 bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const; 126 bool selectInitWholeWave(MachineInstr &MI) const; 127 bool selectSBarrier(MachineInstr &MI) const; 128 bool selectDSBvhStackIntrinsic(MachineInstr &MI) const; 129 130 bool selectImageIntrinsic(MachineInstr &MI, 131 const AMDGPU::ImageDimIntrinsicInfo *Intr) const; 132 bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const; 133 int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const; 134 bool selectG_ICMP_or_FCMP(MachineInstr &I) const; 135 bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const; 136 void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, 137 SmallVectorImpl<GEPInfo> &AddrInfo) const; 138 139 void initM0(MachineInstr &I) const; 140 bool selectG_LOAD_STORE_ATOMICRMW(MachineInstr &I) const; 141 bool selectG_SELECT(MachineInstr &I) const; 142 bool selectG_BRCOND(MachineInstr &I) const; 143 bool selectG_GLOBAL_VALUE(MachineInstr &I) const; 144 bool selectG_PTRMASK(MachineInstr &I) const; 145 bool selectG_EXTRACT_VECTOR_ELT(MachineInstr &I) const; 146 bool selectG_INSERT_VECTOR_ELT(MachineInstr &I) const; 147 bool selectBufferLoadLds(MachineInstr &MI) const; 148 bool selectGlobalLoadLds(MachineInstr &MI) const; 149 bool selectBVHIntrinsic(MachineInstr &I) const; 150 bool selectSMFMACIntrin(MachineInstr &I) const; 151 bool selectPermlaneSwapIntrin(MachineInstr &I, Intrinsic::ID IntrID) const; 152 bool selectWaveAddress(MachineInstr &I) const; 153 bool selectBITOP3(MachineInstr &I) const; 154 bool selectStackRestore(MachineInstr &MI) const; 155 bool selectNamedBarrierInit(MachineInstr &I, Intrinsic::ID IID) const; 156 bool selectNamedBarrierInst(MachineInstr &I, Intrinsic::ID IID) const; 157 bool selectSBarrierSignalIsfirst(MachineInstr &I, Intrinsic::ID IID) const; 158 bool selectSGetBarrierState(MachineInstr &I, Intrinsic::ID IID) const; 159 bool selectSBarrierLeave(MachineInstr &I) const; 160 161 std::pair<Register, unsigned> selectVOP3ModsImpl(Register Src, 162 bool IsCanonicalizing = true, 163 bool AllowAbs = true, 164 bool OpSel = false) const; 165 166 Register copyToVGPRIfSrcFolded(Register Src, unsigned Mods, 167 MachineOperand Root, MachineInstr *InsertPt, 168 bool ForceVGPR = false) const; 169 170 InstructionSelector::ComplexRendererFns 171 selectVCSRC(MachineOperand &Root) const; 172 173 InstructionSelector::ComplexRendererFns 174 selectVSRC0(MachineOperand &Root) const; 175 176 InstructionSelector::ComplexRendererFns 177 selectVOP3Mods0(MachineOperand &Root) const; 178 InstructionSelector::ComplexRendererFns 179 selectVOP3BMods0(MachineOperand &Root) const; 180 InstructionSelector::ComplexRendererFns 181 selectVOP3OMods(MachineOperand &Root) const; 182 InstructionSelector::ComplexRendererFns 183 selectVOP3Mods(MachineOperand &Root) const; 184 InstructionSelector::ComplexRendererFns 185 selectVOP3ModsNonCanonicalizing(MachineOperand &Root) const; 186 InstructionSelector::ComplexRendererFns 187 selectVOP3BMods(MachineOperand &Root) const; 188 189 ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const; 190 191 std::pair<Register, unsigned> 192 selectVOP3PModsImpl(Register Src, const MachineRegisterInfo &MRI, 193 bool IsDOT = false) const; 194 195 InstructionSelector::ComplexRendererFns 196 selectVOP3PMods(MachineOperand &Root) const; 197 198 InstructionSelector::ComplexRendererFns 199 selectVOP3PModsDOT(MachineOperand &Root) const; 200 201 InstructionSelector::ComplexRendererFns 202 selectVOP3PModsNeg(MachineOperand &Root) const; 203 204 InstructionSelector::ComplexRendererFns 205 selectWMMAOpSelVOP3PMods(MachineOperand &Root) const; 206 207 InstructionSelector::ComplexRendererFns 208 selectWMMAModsF32NegAbs(MachineOperand &Root) const; 209 InstructionSelector::ComplexRendererFns 210 selectWMMAModsF16Neg(MachineOperand &Root) const; 211 InstructionSelector::ComplexRendererFns 212 selectWMMAModsF16NegAbs(MachineOperand &Root) const; 213 InstructionSelector::ComplexRendererFns 214 selectWMMAVISrc(MachineOperand &Root) const; 215 InstructionSelector::ComplexRendererFns 216 selectSWMMACIndex8(MachineOperand &Root) const; 217 InstructionSelector::ComplexRendererFns 218 selectSWMMACIndex16(MachineOperand &Root) const; 219 220 InstructionSelector::ComplexRendererFns 221 selectVOP3OpSelMods(MachineOperand &Root) const; 222 223 InstructionSelector::ComplexRendererFns 224 selectVINTERPMods(MachineOperand &Root) const; 225 InstructionSelector::ComplexRendererFns 226 selectVINTERPModsHi(MachineOperand &Root) const; 227 228 bool selectSmrdOffset(MachineOperand &Root, Register &Base, Register *SOffset, 229 int64_t *Offset) const; 230 InstructionSelector::ComplexRendererFns 231 selectSmrdImm(MachineOperand &Root) const; 232 InstructionSelector::ComplexRendererFns 233 selectSmrdImm32(MachineOperand &Root) const; 234 InstructionSelector::ComplexRendererFns 235 selectSmrdSgpr(MachineOperand &Root) const; 236 InstructionSelector::ComplexRendererFns 237 selectSmrdSgprImm(MachineOperand &Root) const; 238 239 std::pair<Register, int> selectFlatOffsetImpl(MachineOperand &Root, 240 uint64_t FlatVariant) const; 241 242 InstructionSelector::ComplexRendererFns 243 selectFlatOffset(MachineOperand &Root) const; 244 InstructionSelector::ComplexRendererFns 245 selectGlobalOffset(MachineOperand &Root) const; 246 InstructionSelector::ComplexRendererFns 247 selectScratchOffset(MachineOperand &Root) const; 248 249 InstructionSelector::ComplexRendererFns 250 selectGlobalSAddr(MachineOperand &Root) const; 251 252 InstructionSelector::ComplexRendererFns 253 selectScratchSAddr(MachineOperand &Root) const; 254 bool checkFlatScratchSVSSwizzleBug(Register VAddr, Register SAddr, 255 uint64_t ImmOffset) const; 256 InstructionSelector::ComplexRendererFns 257 selectScratchSVAddr(MachineOperand &Root) const; 258 259 InstructionSelector::ComplexRendererFns 260 selectMUBUFScratchOffen(MachineOperand &Root) const; 261 InstructionSelector::ComplexRendererFns 262 selectMUBUFScratchOffset(MachineOperand &Root) const; 263 264 bool isDSOffsetLegal(Register Base, int64_t Offset) const; 265 bool isDSOffset2Legal(Register Base, int64_t Offset0, int64_t Offset1, 266 unsigned Size) const; 267 bool isFlatScratchBaseLegal(Register Addr) const; 268 bool isFlatScratchBaseLegalSV(Register Addr) const; 269 bool isFlatScratchBaseLegalSVImm(Register Addr) const; 270 271 std::pair<Register, unsigned> 272 selectDS1Addr1OffsetImpl(MachineOperand &Root) const; 273 InstructionSelector::ComplexRendererFns 274 selectDS1Addr1Offset(MachineOperand &Root) const; 275 276 InstructionSelector::ComplexRendererFns 277 selectDS64Bit4ByteAligned(MachineOperand &Root) const; 278 279 InstructionSelector::ComplexRendererFns 280 selectDS128Bit8ByteAligned(MachineOperand &Root) const; 281 282 std::pair<Register, unsigned> selectDSReadWrite2Impl(MachineOperand &Root, 283 unsigned size) const; 284 InstructionSelector::ComplexRendererFns 285 selectDSReadWrite2(MachineOperand &Root, unsigned size) const; 286 287 std::pair<Register, int64_t> 288 getPtrBaseWithConstantOffset(Register Root, 289 const MachineRegisterInfo &MRI) const; 290 291 // Parse out a chain of up to two g_ptr_add instructions. 292 // g_ptr_add (n0, _) 293 // g_ptr_add (n0, (n1 = g_ptr_add n2, n3)) 294 struct MUBUFAddressData { 295 Register N0, N2, N3; 296 int64_t Offset = 0; 297 }; 298 299 bool shouldUseAddr64(MUBUFAddressData AddrData) const; 300 301 void splitIllegalMUBUFOffset(MachineIRBuilder &B, 302 Register &SOffset, int64_t &ImmOffset) const; 303 304 MUBUFAddressData parseMUBUFAddress(Register Src) const; 305 306 bool selectMUBUFAddr64Impl(MachineOperand &Root, Register &VAddr, 307 Register &RSrcReg, Register &SOffset, 308 int64_t &Offset) const; 309 310 bool selectMUBUFOffsetImpl(MachineOperand &Root, Register &RSrcReg, 311 Register &SOffset, int64_t &Offset) const; 312 313 InstructionSelector::ComplexRendererFns 314 selectBUFSOffset(MachineOperand &Root) const; 315 316 InstructionSelector::ComplexRendererFns 317 selectMUBUFAddr64(MachineOperand &Root) const; 318 319 InstructionSelector::ComplexRendererFns 320 selectMUBUFOffset(MachineOperand &Root) const; 321 322 ComplexRendererFns selectSMRDBufferImm(MachineOperand &Root) const; 323 ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const; 324 ComplexRendererFns selectSMRDBufferSgprImm(MachineOperand &Root) const; 325 326 std::pair<Register, unsigned> selectVOP3PMadMixModsImpl(MachineOperand &Root, 327 bool &Matched) const; 328 ComplexRendererFns selectVOP3PMadMixModsExt(MachineOperand &Root) const; 329 ComplexRendererFns selectVOP3PMadMixMods(MachineOperand &Root) const; 330 331 void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, 332 int OpIdx = -1) const; 333 334 void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 335 int OpIdx) const; 336 void renderZextBoolTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 337 int OpIdx) const; 338 339 void renderOpSelTImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 340 int OpIdx) const; 341 342 void renderSrcAndDstSelToOpSelXForm_0_0(MachineInstrBuilder &MIB, 343 const MachineInstr &MI, 344 int OpIdx) const; 345 346 void renderSrcAndDstSelToOpSelXForm_0_1(MachineInstrBuilder &MIB, 347 const MachineInstr &MI, 348 int OpIdx) const; 349 350 void renderSrcAndDstSelToOpSelXForm_1_0(MachineInstrBuilder &MIB, 351 const MachineInstr &MI, 352 int OpIdx) const; 353 354 void renderSrcAndDstSelToOpSelXForm_1_1(MachineInstrBuilder &MIB, 355 const MachineInstr &MI, 356 int OpIdx) const; 357 358 void renderDstSelToOpSelXForm(MachineInstrBuilder &MIB, 359 const MachineInstr &MI, int OpIdx) const; 360 361 void renderSrcSelToOpSelXForm(MachineInstrBuilder &MIB, 362 const MachineInstr &MI, int OpIdx) const; 363 364 void renderSrcAndDstSelToOpSelXForm_2_0(MachineInstrBuilder &MIB, 365 const MachineInstr &MI, 366 int OpIdx) const; 367 368 void renderDstSelToOpSel3XFormXForm(MachineInstrBuilder &MIB, 369 const MachineInstr &MI, int OpIdx) const; 370 371 void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 372 int OpIdx) const; 373 374 void renderBitcastFPImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 375 int OpIdx) const; 376 377 void renderBitcastFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, 378 int OpIdx) const { 379 renderBitcastFPImm(MIB, MI, OpIdx); 380 } 381 void renderBitcastFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, 382 int OpIdx) const { 383 renderBitcastFPImm(MIB, MI, OpIdx); 384 } 385 386 void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 387 int OpIdx) const; 388 void renderExtractCPol(MachineInstrBuilder &MIB, const MachineInstr &MI, 389 int OpIdx) const; 390 void renderExtractSWZ(MachineInstrBuilder &MIB, const MachineInstr &MI, 391 int OpIdx) const; 392 void renderExtractCpolSetGLC(MachineInstrBuilder &MIB, const MachineInstr &MI, 393 int OpIdx) const; 394 395 void renderFrameIndex(MachineInstrBuilder &MIB, const MachineInstr &MI, 396 int OpIdx) const; 397 398 void renderFPPow2ToExponent(MachineInstrBuilder &MIB, const MachineInstr &MI, 399 int OpIdx) const; 400 401 void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI, 402 int OpIdx) const; 403 void renderScaledMAIIntrinsicOperand(MachineInstrBuilder &MIB, 404 const MachineInstr &MI, int OpIdx) const; 405 406 bool isInlineImmediate(const APInt &Imm) const; 407 bool isInlineImmediate(const APFloat &Imm) const; 408 409 // Returns true if TargetOpcode::G_AND MachineInstr `MI`'s masking of the 410 // shift amount operand's `ShAmtBits` bits is unneeded. 411 bool isUnneededShiftMask(const MachineInstr &MI, unsigned ShAmtBits) const; 412 413 const SIInstrInfo &TII; 414 const SIRegisterInfo &TRI; 415 const AMDGPURegisterBankInfo &RBI; 416 const AMDGPUTargetMachine &TM; 417 const GCNSubtarget &STI; 418 #define GET_GLOBALISEL_PREDICATES_DECL 419 #define AMDGPUSubtarget GCNSubtarget 420 #include "AMDGPUGenGlobalISel.inc" 421 #undef GET_GLOBALISEL_PREDICATES_DECL 422 #undef AMDGPUSubtarget 423 424 #define GET_GLOBALISEL_TEMPORARIES_DECL 425 #include "AMDGPUGenGlobalISel.inc" 426 #undef GET_GLOBALISEL_TEMPORARIES_DECL 427 }; 428 429 } // End llvm namespace. 430 #endif 431