1//===-- SIInstrInfo.td -----------------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def isWave32 : Predicate<"Subtarget->isWave32()">, 10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>; 11def isWave64 : Predicate<"Subtarget->isWave64()">, 12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>; 13 14class AMDGPUMnemonicAlias<string From, string To, string VariantName = ""> 15 : MnemonicAlias<From, To, VariantName>, PredicateControl; 16 17// Except for the NONE field, this must be kept in sync with the 18// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the 19// getMCOpcodeGen table. 20def SIEncodingFamily { 21 int NONE = -1; 22 int SI = 0; 23 int VI = 1; 24 int SDWA = 2; 25 int SDWA9 = 3; 26 int GFX80 = 4; 27 int GFX9 = 5; 28 int GFX10 = 6; 29 int SDWA10 = 7; 30 int GFX90A = 8; 31 int GFX940 = 9; 32 int GFX11 = 10; 33 int GFX12 = 11; 34} 35 36//===----------------------------------------------------------------------===// 37// Subtarget info 38//===----------------------------------------------------------------------===// 39 40class GFXGen<Predicate pred, string dn, string suffix, int sub> { 41 Predicate AssemblerPredicate = pred; 42 string DecoderNamespace = dn; 43 string Suffix = suffix; 44 int Subtarget = sub; 45} 46 47def GFX12Gen : GFXGen<isGFX12Only, "GFX12", "_gfx12", SIEncodingFamily.GFX12>; 48def GFX11Gen : GFXGen<isGFX11Only, "GFX11", "_gfx11", SIEncodingFamily.GFX11>; 49def GFX10Gen : GFXGen<isGFX10Only, "GFX10", "_gfx10", SIEncodingFamily.GFX10>; 50 51//===----------------------------------------------------------------------===// 52// SI DAG Nodes 53//===----------------------------------------------------------------------===// 54 55def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; 56 57def SDTSBufferLoad : SDTypeProfile<1, 3, 58 [ // vdata 59 SDTCisVT<1, v4i32>, // rsrc 60 SDTCisVT<2, i32>, // offset(imm) 61 SDTCisVT<3, i32>]>; // cachepolicy 62 63def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", SDTSBufferLoad, 64 [SDNPMayLoad, SDNPMemOperand]>; 65 66def SIsbuffer_load_byte : SDNode<"AMDGPUISD::SBUFFER_LOAD_BYTE", SDTSBufferLoad, 67 [SDNPMayLoad, SDNPMemOperand]>; 68 69def SIsbuffer_load_ubyte 70 : SDNode<"AMDGPUISD::SBUFFER_LOAD_UBYTE", SDTSBufferLoad, 71 [SDNPMayLoad, SDNPMemOperand]>; 72 73def SIsbuffer_load_short 74 : SDNode<"AMDGPUISD::SBUFFER_LOAD_SHORT", SDTSBufferLoad, 75 [SDNPMayLoad, SDNPMemOperand]>; 76 77def SIsbuffer_load_ushort 78 : SDNode<"AMDGPUISD::SBUFFER_LOAD_USHORT", SDTSBufferLoad, 79 [SDNPMayLoad, SDNPMemOperand]>; 80 81def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", 82 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, 83 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] 84>; 85 86def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ 87 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> 88]>; 89 90// load_d16_{lo|hi} ptr, tied_input 91def SIload_d16 : SDTypeProfile<1, 2, [ 92 SDTCisPtrTy<1>, 93 SDTCisSameAs<0, 2> 94]>; 95 96 97def SDTtbuffer_load : SDTypeProfile<1, 8, 98 [ // vdata 99 SDTCisVT<1, v4i32>, // rsrc 100 SDTCisVT<2, i32>, // vindex(VGPR) 101 SDTCisVT<3, i32>, // voffset(VGPR) 102 SDTCisVT<4, i32>, // soffset(SGPR) 103 SDTCisVT<5, i32>, // offset(imm) 104 SDTCisVT<6, i32>, // format(imm) 105 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 106 SDTCisVT<8, i1> // idxen(imm) 107 ]>; 108 109def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load, 110 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 111def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16", 112 SDTtbuffer_load, 113 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 114 115def SDTtbuffer_store : SDTypeProfile<0, 9, 116 [ // vdata 117 SDTCisVT<1, v4i32>, // rsrc 118 SDTCisVT<2, i32>, // vindex(VGPR) 119 SDTCisVT<3, i32>, // voffset(VGPR) 120 SDTCisVT<4, i32>, // soffset(SGPR) 121 SDTCisVT<5, i32>, // offset(imm) 122 SDTCisVT<6, i32>, // format(imm) 123 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 124 SDTCisVT<8, i1> // idxen(imm) 125 ]>; 126 127def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store, 128 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 129def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16", 130 SDTtbuffer_store, 131 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 132 133def SDTBufferLoad : SDTypeProfile<1, 7, 134 [ // vdata 135 SDTCisVT<1, v4i32>, // rsrc 136 SDTCisVT<2, i32>, // vindex(VGPR) 137 SDTCisVT<3, i32>, // voffset(VGPR) 138 SDTCisVT<4, i32>, // soffset(SGPR) 139 SDTCisVT<5, i32>, // offset(imm) 140 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 141 SDTCisVT<7, i1>]>; // idxen(imm) 142 143def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, 144 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 145def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad, 146 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 147def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad, 148 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 149def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad, 150 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 151def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad, 152 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 153def SIbuffer_load_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_TFE", SDTBufferLoad, 154 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 155def SIbuffer_load_ubyte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE_TFE", SDTBufferLoad, 156 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 157def SIbuffer_load_ushort_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT_TFE", SDTBufferLoad, 158 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 159def SIbuffer_load_byte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE_TFE", SDTBufferLoad, 160 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 161def SIbuffer_load_short_tfe: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT_TFE", SDTBufferLoad, 162 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 163def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, 164 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 165def SIbuffer_load_format_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_TFE", SDTBufferLoad, 166 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 167def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16", 168 SDTBufferLoad, 169 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 170 171def SDTBufferStore : SDTypeProfile<0, 8, 172 [ // vdata 173 SDTCisVT<1, v4i32>, // rsrc 174 SDTCisVT<2, i32>, // vindex(VGPR) 175 SDTCisVT<3, i32>, // voffset(VGPR) 176 SDTCisVT<4, i32>, // soffset(SGPR) 177 SDTCisVT<5, i32>, // offset(imm) 178 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 179 SDTCisVT<7, i1>]>; // idxen(imm) 180 181def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, 182 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 183def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE", 184 SDTBufferStore, 185 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 186def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT", 187 SDTBufferStore, 188 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 189def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", 190 SDTBufferStore, 191 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 192def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16", 193 SDTBufferStore, 194 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 195 196multiclass SDBufferAtomic<string opcode> { 197 def "" : SDNode <opcode, 198 SDTypeProfile<1, 8, 199 [SDTCisVT<2, v4i32>, // rsrc 200 SDTCisVT<3, i32>, // vindex(VGPR) 201 SDTCisVT<4, i32>, // voffset(VGPR) 202 SDTCisVT<5, i32>, // soffset(SGPR) 203 SDTCisVT<6, i32>, // offset(imm) 204 SDTCisVT<7, i32>, // cachepolicy(imm) 205 SDTCisVT<8, i1>]>, // idxen(imm) 206 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 207 >; 208 def "_noret" : PatFrag< 209 (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, 210 node:$offset, node:$cachepolicy, node:$idxen), 211 (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, 212 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 213 node:$idxen)> { 214 let HasNoUse = true; 215 } 216} 217 218defm SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; 219defm SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; 220defm SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; 221defm SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; 222defm SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; 223defm SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; 224defm SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; 225defm SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; 226defm SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; 227defm SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; 228defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; 229defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; 230defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; 231defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; 232defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; 233defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; 234defm SIbuffer_atomic_cond_sub_u32 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_COND_SUB_U32">; 235 236def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", 237 SDTypeProfile<1, 9, 238 [SDTCisVT<3, v4i32>, // rsrc 239 SDTCisVT<4, i32>, // vindex(VGPR) 240 SDTCisVT<5, i32>, // voffset(VGPR) 241 SDTCisVT<6, i32>, // soffset(SGPR) 242 SDTCisVT<7, i32>, // offset(imm) 243 SDTCisVT<8, i32>, // cachepolicy(imm) 244 SDTCisVT<9, i1>]>, // idxen(imm) 245 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 246>; 247 248def SIbuffer_atomic_cmpswap_noret : PatFrag< 249 (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, 250 node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), 251 (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, 252 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 253 node:$idxen)> { 254 let HasNoUse = true; 255} 256 257class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, 258 SDTypeProfile<0, 2, 259 [SDTCisPtrTy<0>, // vaddr 260 SDTCisVT<1, ty>]>, // vdata 261 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 262>; 263 264def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", 265 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> 266>; 267 268def SIlds : SDNode<"AMDGPUISD::LDS", 269 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> 270>; 271 272def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO", 273 SIload_d16, 274 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 275>; 276 277def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8", 278 SIload_d16, 279 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 280>; 281 282def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8", 283 SIload_d16, 284 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 285>; 286 287def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI", 288 SIload_d16, 289 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 290>; 291 292def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8", 293 SIload_d16, 294 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 295>; 296 297def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", 298 SIload_d16, 299 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 300>; 301 302def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", 303 SDTypeProfile<0 ,1, [SDTCisInt<0>]>, 304 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 305>; 306 307 308// Optimize v_mfma_scale* instructions to avoid the scale if the 309// scales are known 0. 310class UnscaledMFMAOptimizationPat<SDPatternOperator intrin> : PatFrag< 311 (ops node:$srca, node:$srcb, node:$srcc, 312 node:$cbsz, node:$blgp), 313 (intrin $srca, $srcb, $srcc, $cbsz, $blgp, 314 srcvalue, 0, srcvalue, 0) 315>; 316 317def mfma_f32_16x16x128_f8f6f4 : UnscaledMFMAOptimizationPat<int_amdgcn_mfma_scale_f32_16x16x128_f8f6f4>; 318def mfma_f32_32x32x64_f8f6f4 : UnscaledMFMAOptimizationPat<int_amdgcn_mfma_scale_f32_32x32x64_f8f6f4>; 319 320//===----------------------------------------------------------------------===// 321// ValueType helpers 322//===----------------------------------------------------------------------===// 323 324class isIntType<ValueType SrcVT> { 325 bit ret = !and(SrcVT.isInteger, !ne(SrcVT.Value, i1.Value)); 326} 327 328def SDTSBufferPrefetch : SDTypeProfile<0, 3, 329 [SDTCisVT<0, v4i32>, // rsrc 330 SDTCisVT<1, i32>, // offset(imm) 331 SDTCisVT<2, i32>]>; // length 332 333def SIsbuffer_prefetch : SDNode<"AMDGPUISD::SBUFFER_PREFETCH_DATA", SDTSBufferPrefetch, 334 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 335 336//===----------------------------------------------------------------------===// 337// SDNodes PatFrags for loads/stores with a glue input. 338// This is for SDNodes and PatFrag for local loads and stores to 339// enable s_mov_b32 m0, -1 to be glued to the memory instructions. 340// 341// These mirror the regular load/store PatFrags and rely on special 342// processing during Select() to add the glued copy. 343// 344//===----------------------------------------------------------------------===// 345 346def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad, 347 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 348>; 349 350def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, 351 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 352>; 353 354def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { 355 let IsLoad = 1; 356 let IsUnindexed = 1; 357} 358 359def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { 360 let IsLoad = 1; 361 let IsNonExtLoad = 1; 362} 363 364def atomic_load_zext_glue : 365 PatFrag<(ops node:$ptr), (AMDGPUatomic_ld_glue node:$ptr)> { 366 let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? 367 let IsZeroExtLoad = true; 368} 369 370def atomic_load_sext_glue : 371 PatFrag<(ops node:$ptr), (AMDGPUatomic_ld_glue node:$ptr)> { 372 let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic? 373 let IsSignExtLoad = true; 374} 375 376def atomic_load_8_glue : PatFrag<(ops node:$ptr), 377 (AMDGPUatomic_ld_glue node:$ptr)> { 378 let IsAtomic = 1; 379 let MemoryVT = i8; 380} 381 382def atomic_load_16_glue : PatFrag<(ops node:$ptr), 383 (AMDGPUatomic_ld_glue node:$ptr)> { 384 let IsAtomic = 1; 385 let MemoryVT = i16; 386} 387 388def atomic_load_32_glue : PatFrag<(ops node:$ptr), 389 (AMDGPUatomic_ld_glue node:$ptr)> { 390 let IsAtomic = 1; 391 let MemoryVT = i32; 392} 393 394def atomic_load_64_glue : PatFrag<(ops node:$ptr), 395 (AMDGPUatomic_ld_glue node:$ptr)> { 396 let IsAtomic = 1; 397 let MemoryVT = i64; 398} 399 400def atomic_load_zext_8_glue : PatFrag<(ops node:$ptr), 401 (atomic_load_zext_glue node:$ptr)> { 402 let IsAtomic = 1; 403 let MemoryVT = i8; 404} 405 406def atomic_load_sext_8_glue : PatFrag<(ops node:$ptr), 407 (atomic_load_sext_glue node:$ptr)> { 408 let IsAtomic = 1; 409 let MemoryVT = i8; 410} 411 412def atomic_load_zext_16_glue : PatFrag<(ops node:$ptr), 413 (atomic_load_zext_glue node:$ptr)> { 414 let IsAtomic = 1; 415 let MemoryVT = i16; 416} 417 418def atomic_load_sext_16_glue : PatFrag<(ops node:$ptr), 419 (atomic_load_sext_glue node:$ptr)> { 420 let IsAtomic = 1; 421 let MemoryVT = i16; 422} 423 424def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 425 let IsLoad = 1; 426 let IsAnyExtLoad = 1; 427} 428 429def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 430 let IsLoad = 1; 431 let IsSignExtLoad = 1; 432} 433 434def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 435 let IsLoad = 1; 436 let IsZeroExtLoad = 1; 437} 438 439def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 440 let IsLoad = 1; 441 let MemoryVT = i8; 442} 443 444def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 445 let IsLoad = 1; 446 let MemoryVT = i8; 447} 448 449def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 450 let IsLoad = 1; 451 let MemoryVT = i16; 452} 453 454def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 455 let IsLoad = 1; 456 let MemoryVT = i16; 457} 458 459def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 460 let IsLoad = 1; 461 let MemoryVT = i8; 462} 463 464def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 465 let IsLoad = 1; 466 let MemoryVT = i16; 467} 468 469 470let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 471def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { 472 let IsNonExtLoad = 1; 473} 474 475def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; 476def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; 477def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; 478 479def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; 480def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; 481def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; 482} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces 483 484def load_align8_local_m0 : PatFrag<(ops node:$ptr), 485 (load_local_m0 node:$ptr)> { 486 let IsLoad = 1; 487 int MinAlignment = 8; 488} 489 490def load_align16_local_m0 : PatFrag<(ops node:$ptr), 491 (load_local_m0 node:$ptr)> { 492 let IsLoad = 1; 493 int MinAlignment = 16; 494} 495 496let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 497def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr), 498 (atomic_load_8_glue node:$ptr)>; 499def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr), 500 (atomic_load_16_glue node:$ptr)>; 501def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), 502 (atomic_load_32_glue node:$ptr)>; 503def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), 504 (atomic_load_64_glue node:$ptr)>; 505 506def atomic_load_zext_8_local_m0 : PatFrag<(ops node:$ptr), 507 (atomic_load_zext_8_glue node:$ptr)>; 508def atomic_load_sext_8_local_m0 : PatFrag<(ops node:$ptr), 509 (atomic_load_sext_8_glue node:$ptr)>; 510def atomic_load_zext_16_local_m0 : PatFrag<(ops node:$ptr), 511 (atomic_load_zext_16_glue node:$ptr)>; 512def atomic_load_sext_16_local_m0 : PatFrag<(ops node:$ptr), 513 (atomic_load_sext_16_glue node:$ptr)>; 514} // End let AddressSpaces = LoadAddress_local.AddrSpaces 515 516 517def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, 518 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 519>; 520 521def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, 522 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 523>; 524 525def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), 526 (AMDGPUst_glue node:$val, node:$ptr)> { 527 let IsStore = 1; 528 let IsUnindexed = 1; 529} 530 531def store_glue : PatFrag<(ops node:$val, node:$ptr), 532 (unindexedstore_glue node:$val, node:$ptr)> { 533 let IsStore = 1; 534 let IsTruncStore = 0; 535} 536 537def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), 538 (unindexedstore_glue node:$val, node:$ptr)> { 539 let IsStore = 1; 540 let IsTruncStore = 1; 541} 542 543def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), 544 (truncstore_glue node:$val, node:$ptr)> { 545 let IsStore = 1; 546 let MemoryVT = i8; 547 let IsTruncStore = 1; 548} 549 550def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), 551 (truncstore_glue node:$val, node:$ptr)> { 552 let IsStore = 1; 553 let MemoryVT = i16; 554 let IsTruncStore = 1; 555} 556 557let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 558def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), 559 (store_glue node:$val, node:$ptr)>; 560def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 561 (truncstorei8_glue node:$val, node:$ptr)>; 562def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 563 (truncstorei16_glue node:$val, node:$ptr)>; 564} 565 566def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr), 567 (store_local_m0 node:$value, node:$ptr)>, 568 Aligned<8> { 569 let IsStore = 1; 570} 571 572def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr), 573 (store_local_m0 node:$value, node:$ptr)>, 574 Aligned<16> { 575 let IsStore = 1; 576} 577 578let PredicateCode = [{return cast<MemSDNode>(N)->getAlign() < 4;}], 579 GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}], 580 AddressSpaces = [ AddrSpaces.Local ] in { 581def load_align_less_than_4_local : PatFrag<(ops node:$ptr), 582 (load_local node:$ptr)> { 583 let IsLoad = 1; 584 let IsNonExtLoad = 1; 585} 586 587def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr), 588 (load_local_m0 node:$ptr)> { 589 let IsLoad = 1; 590 let IsNonExtLoad = 1; 591} 592 593def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr), 594 (store_local node:$value, node:$ptr)> { 595 let IsStore = 1; 596 let IsTruncStore = 0; 597} 598 599def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr), 600 (store_local_m0 node:$value, node:$ptr)> { 601 let IsStore = 1; 602 let IsTruncStore = 0; 603} 604} 605 606def atomic_store_8_glue : PatFrag < 607 (ops node:$ptr, node:$value), 608 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 609 let IsAtomic = 1; 610 let MemoryVT = i8; 611} 612 613def atomic_store_16_glue : PatFrag < 614 (ops node:$ptr, node:$value), 615 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 616 let IsAtomic = 1; 617 let MemoryVT = i16; 618} 619 620def atomic_store_32_glue : PatFrag < 621 (ops node:$ptr, node:$value), 622 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 623 let IsAtomic = 1; 624 let MemoryVT = i32; 625} 626 627def atomic_store_64_glue : PatFrag < 628 (ops node:$ptr, node:$value), 629 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 630 let IsAtomic = 1; 631 let MemoryVT = i64; 632} 633 634let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 635def atomic_store_8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 636 (atomic_store_8_glue node:$val, node:$ptr)>; 637def atomic_store_16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 638 (atomic_store_16_glue node:$val, node:$ptr)>; 639def atomic_store_32_local_m0 : PatFrag<(ops node:$val, node:$ptr), 640 (atomic_store_32_glue node:$val, node:$ptr)>; 641def atomic_store_64_local_m0 : PatFrag<(ops node:$val, node:$ptr), 642 (atomic_store_64_glue node:$val, node:$ptr)>; 643} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces 644 645 646//===----------------------------------------------------------------------===// 647// SDNodes PatFrags for a16 loads and stores with 3 components. 648// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory 649// load/store size. 650//===----------------------------------------------------------------------===// 651 652class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 653 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 654 node:$auxiliary, node:$idxen), 655 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 656 node:$auxiliary, node:$idxen)> { 657 let IsLoad = 1; 658 let MemoryVT = vt; 659} 660 661class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 662 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 663 node:$auxiliary, node:$idxen), 664 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 665 node:$auxiliary, node:$idxen)> { 666 let IsStore = 1; 667 let MemoryVT = vt; 668} 669 670class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 671 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 672 node:$format, node:$auxiliary, node:$idxen), 673 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 674 node:$format, node:$auxiliary, node:$idxen)> { 675 let IsLoad = 1; 676 let MemoryVT = vt; 677} 678 679class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 680 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 681 node:$format, node:$auxiliary, node:$idxen), 682 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 683 node:$format, node:$auxiliary, node:$idxen)> { 684 let IsStore = 1; 685 let MemoryVT = vt; 686} 687 688//===----------------------------------------------------------------------===// 689// SDNodes PatFrags for d16 loads 690//===----------------------------------------------------------------------===// 691 692class LoadD16Frag <SDPatternOperator op> : PatFrag< 693 (ops node:$ptr, node:$tied_in), 694 (op node:$ptr, node:$tied_in)> { 695 let IsLoad = 1; 696} 697 698foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 699let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 700 701def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>; 702 703def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> { 704 let MemoryVT = i8; 705} 706 707def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> { 708 let MemoryVT = i8; 709} 710 711def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>; 712 713def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> { 714 let MemoryVT = i8; 715} 716 717def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> { 718 let MemoryVT = i8; 719} 720 721} // End let AddressSpaces = ... 722} // End foreach AddrSpace 723 724def lshr_rev : PatFrag < 725 (ops node:$src1, node:$src0), 726 (srl $src0, $src1) 727>; 728 729def ashr_rev : PatFrag < 730 (ops node:$src1, node:$src0), 731 (sra $src0, $src1) 732>; 733 734def lshl_rev : PatFrag < 735 (ops node:$src1, node:$src0), 736 (shl $src0, $src1) 737>; 738 739def add_ctpop : PatFrag < 740 (ops node:$src0, node:$src1), 741 (add (ctpop $src0), $src1) 742>; 743 744def xnor : PatFrag < 745 (ops node:$src0, node:$src1), 746 (not (xor $src0, $src1)) 747>; 748 749foreach I = 1-4 in { 750def shl#I#_add : PatFrag < 751 (ops node:$src0, node:$src1), 752 (add (shl_oneuse $src0, (i32 I)), $src1)> { 753 // FIXME: Poor substitute for disabling pattern in SelectionDAG 754 let PredicateCode = [{return false;}]; 755 let GISelPredicateCode = [{return true;}]; 756} 757} 758 759multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, 760 SDTypeProfile tc = SDTAtomic2, 761 bit IsInt = 1> { 762 763 def _glue : SDNode < 764 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, 765 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 766 >; 767 768 let AddressSpaces = StoreAddress_local.AddrSpaces in { 769 770 if IsInt then { 771 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; 772 defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; 773 } else { 774 defm _local_m0 : binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>; 775 defm _local_m0 : noret_binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>; 776 } 777 } 778 779 let AddressSpaces = StoreAddress_region.AddrSpaces in { 780 if IsInt then { 781 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; 782 defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; 783 } else { 784 defm _region_m0 : binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>; 785 defm _region_m0 : noret_binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>; 786 } 787 } 788} 789 790defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; 791defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; 792defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">; 793defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">; 794defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; 795defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; 796defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; 797defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">; 798defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; 799defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; 800defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; 801defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; 802defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; 803defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 0, SDTAtomic2_f32, 0>; 804defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 0, SDTAtomic2_f32, 0>; 805 806def as_i1timm : SDNodeXForm<timm, [{ 807 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); 808}]>; 809 810def as_i1timm_zext : SDNodeXForm<timm, [{ 811 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32); 812}]>; 813 814def as_i8imm : SDNodeXForm<imm, [{ 815 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8); 816}]>; 817 818def as_i8timm : SDNodeXForm<timm, [{ 819 return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 820}]>; 821 822def as_i16imm : SDNodeXForm<imm, [{ 823 return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 824}]>; 825 826def as_i16timm : SDNodeXForm<timm, [{ 827 // Explicit cast, as this is used with both signed and unsigned immediates. 828 return CurDAG->getSignedTargetConstant(int16_t(N->getSExtValue()), SDLoc(N), 829 MVT::i16); 830}]>; 831 832def as_i32imm: SDNodeXForm<imm, [{ 833 return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 834}]>; 835 836def as_i32timm: SDNodeXForm<timm, [{ 837 return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 838}]>; 839 840def as_i64imm: SDNodeXForm<imm, [{ 841 return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); 842}]>; 843 844def cond_as_i32imm: SDNodeXForm<cond, [{ 845 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32); 846}]>; 847 848// Copied from the AArch64 backend: 849def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 850return CurDAG->getTargetConstant( 851 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 852}]>; 853 854def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{ 855 auto FI = cast<FrameIndexSDNode>(N); 856 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32); 857}]>; 858 859// Copied from the AArch64 backend: 860def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 861return CurDAG->getTargetConstant( 862 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 863}]>; 864 865def as_hw_round_mode : SDNodeXForm<timm, [{ 866 // "round.towardzero" -> TowardZero 0 -> FP_ROUND_ROUND_TO_ZERO 3 867 // "round.tonearest" -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0 868 // "round.upward" -> TowardPositive 2 -> FP_ROUND_ROUND_TO_INF 1 869 // "round.downward -> TowardNegative 3 -> FP_ROUND_ROUND_TO_NEGINF 2 870 return CurDAG->getTargetConstant((N->getSExtValue() + 3) % 4, SDLoc(N), 871 MVT::i32); 872}]>; 873 874def SupportedRoundMode : TImmLeaf<i32, [{ 875 return Imm == (int)RoundingMode::TowardZero || 876 Imm == (int)RoundingMode::NearestTiesToEven || 877 Imm == (int)RoundingMode::TowardPositive || 878 Imm == (int)RoundingMode::TowardNegative; 879}]>; 880 881class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{ 882 uint64_t Imm = N->getZExtValue(); 883 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1; 884 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1); 885}]>; 886 887def SIMM16bit : TImmLeaf <i32, 888 [{return isInt<16>(Imm) || isUInt<16>(Imm);}], 889 as_i16timm 890>; 891 892def i64imm_32bit : ImmLeaf<i64, [{ 893 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 894}]>; 895 896def InlineImm64 : IntImmLeaf<i64, [{ 897 return isInlineImmediate(Imm); 898}]>; 899 900def InlineImmFP32 : FPImmLeaf<f32, [{ 901 return isInlineImmediate(Imm); 902}]>; 903 904def InlineImmFP64 : FPImmLeaf<f64, [{ 905 return isInlineImmediate(Imm); 906}]>; 907 908 909class VGPRImm <dag frag> : PatLeaf<frag, [{ 910 return isVGPRImm(N); 911}]> { 912 let GISelPredicateCode = [{return true;}]; 913} 914 915def NegateImm : SDNodeXForm<imm, [{ 916 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); 917}]>; 918 919// TODO: When FP inline imm values work? 920def NegSubInlineConst32 : ImmLeaf<i32, [{ 921 return Imm < -16 && Imm >= -64; 922}], NegateImm>; 923 924def NegSubInlineIntConst16 : ImmLeaf<i16, [{ 925 return Imm < -16 && Imm >= -64; 926}], NegateImm>; 927 928def ShiftAmt32Imm : ImmLeaf <i32, [{ 929 return Imm < 32; 930}]>; 931 932def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{ 933 return fp16SrcZerosHighBits(N->getOpcode()); 934}]>; 935 936def MFMALdScaleXForm : SDNodeXForm<timm, [{ 937 unsigned Val = N->getZExtValue(); 938 unsigned New = 0; 939 if (Val & 0x1) 940 New |= SISrcMods::OP_SEL_0; 941 if (Val & 0x2) 942 New |= SISrcMods::OP_SEL_1; 943 return CurDAG->getTargetConstant(New, SDLoc(N), MVT::i32); 944}]>; 945 946def is_canonicalized : PatLeaf<(fAny srcvalue:$src), [{ 947 const SITargetLowering &Lowering = 948 *static_cast<const SITargetLowering *>(getTargetLowering()); 949 return Lowering.isCanonicalized(*CurDAG, SDValue(N, 0)); 950}]> { 951 let GISelPredicateCode = [{ 952 const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 953 MF.getSubtarget().getTargetLowering()); 954 const MachineOperand &Dst = MI.getOperand(0); 955 assert(Dst.isDef()); 956 return TLI->isCanonicalized(Dst.getReg(), MF); 957 }]; 958} 959 960//===----------------------------------------------------------------------===// 961// MUBUF/SMEM Patterns 962//===----------------------------------------------------------------------===// 963 964def extract_cpol : SDNodeXForm<timm, [{ 965 return CurDAG->getTargetConstant( 966 N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 967 ? AMDGPU::CPol::ALL 968 : AMDGPU::CPol::ALL_pregfx12), 969 SDLoc(N), MVT::i8); 970}]>; 971 972def extract_swz : SDNodeXForm<timm, [{ 973 const bool Swizzle = 974 N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 975 ? AMDGPU::CPol::SWZ 976 : AMDGPU::CPol::SWZ_pregfx12); 977 return CurDAG->getTargetConstant(Swizzle, SDLoc(N), MVT::i8); 978}]>; 979 980def extract_cpol_set_glc : SDNodeXForm<timm, [{ 981 const uint32_t cpol = N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 982 ? AMDGPU::CPol::ALL 983 : AMDGPU::CPol::ALL_pregfx12); 984 return CurDAG->getTargetConstant(cpol | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8); 985}]>; 986 987//===----------------------------------------------------------------------===// 988// Custom Operands 989//===----------------------------------------------------------------------===// 990 991def SOPPBrTarget : CustomOperand<OtherVT> { 992 let PrintMethod = "printOperand"; 993 let EncoderMethod = "getSOPPBrEncoding"; 994 let DecoderMethod = "decodeSOPPBrTarget"; 995 let OperandType = "OPERAND_PCREL"; 996} 997 998def si_ga : Operand<iPTR>; 999 1000def InterpSlot : CustomOperand<i32>; 1001 1002// It appears to be necessary to create a separate operand for this to 1003// be able to parse attr<num> with no space. 1004def InterpAttr : CustomOperand<i32>; 1005 1006def InterpAttrChan : ImmOperand<i32>; 1007 1008def SplitBarrier : ImmOperand<i32> { 1009 let OperandNamespace = "AMDGPU"; 1010 let OperandType = "OPERAND_INLINE_SPLIT_BARRIER_INT32"; 1011 let DecoderMethod = "decodeSplitBarrier"; 1012 let PrintMethod = "printOperand"; 1013} 1014 1015def VReg32OrOffClass : AsmOperandClass { 1016 let Name = "VReg32OrOff"; 1017 let ParserMethod = "parseVReg32OrOff"; 1018} 1019 1020def SendMsg : CustomOperand<i32>; 1021 1022def Swizzle : CustomOperand<i16, 1>; 1023 1024def Endpgm : CustomOperand<i16, 1>; 1025 1026def SWaitCnt : CustomOperand<i32>; 1027 1028def DepCtr : CustomOperand<i32>; 1029 1030def SDelayALU : CustomOperand<i32>; 1031 1032include "SIInstrFormats.td" 1033include "VIInstrFormats.td" 1034 1035def BoolReg : AsmOperandClass { 1036 let Name = "BoolReg"; 1037 let ParserMethod = "parseBoolReg"; 1038 let RenderMethod = "addRegOperands"; 1039} 1040 1041class BoolRC : RegisterOperand<SReg_1> { 1042 let ParserMatchClass = BoolReg; 1043 let DecoderMethod = "decodeBoolReg"; 1044} 1045 1046def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 1047 let ParserMatchClass = BoolReg; 1048 let DecoderMethod = "decodeBoolReg"; 1049} 1050 1051def VOPDstS64orS32 : BoolRC { 1052 let PrintMethod = "printVOPDst"; 1053} 1054 1055// SCSrc_i1 is the operand for pseudo instructions only. 1056// Boolean immediates shall not be exposed to codegen instructions. 1057def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 1058 let OperandNamespace = "AMDGPU"; 1059 let OperandType = "OPERAND_REG_IMM_INT32"; 1060 let ParserMatchClass = BoolReg; 1061 let DecoderMethod = "decodeBoolReg"; 1062} 1063 1064// ===----------------------------------------------------------------------===// 1065// ExpSrc* Special cases for exp src operands which are printed as 1066// "off" depending on en operand. 1067// ===----------------------------------------------------------------------===// 1068 1069def ExpSrc0 : RegisterOperand<VGPR_32> { 1070 let PrintMethod = "printExpSrc0"; 1071 let ParserMatchClass = VReg32OrOffClass; 1072} 1073 1074def ExpSrc1 : RegisterOperand<VGPR_32> { 1075 let PrintMethod = "printExpSrc1"; 1076 let ParserMatchClass = VReg32OrOffClass; 1077} 1078 1079def ExpSrc2 : RegisterOperand<VGPR_32> { 1080 let PrintMethod = "printExpSrc2"; 1081 let ParserMatchClass = VReg32OrOffClass; 1082} 1083 1084def ExpSrc3 : RegisterOperand<VGPR_32> { 1085 let PrintMethod = "printExpSrc3"; 1086 let ParserMatchClass = VReg32OrOffClass; 1087} 1088 1089class SDWASrc<ValueType vt> : RegisterOperand<VS_32> { 1090 let OperandNamespace = "AMDGPU"; 1091 string Type = !if(vt.isFP, "FP", "INT"); 1092 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size; 1093 let DecoderMethod = "decodeSDWASrc"#vt.Size; 1094 let EncoderMethod = "getSDWASrcEncoding"; 1095} 1096 1097def SDWASrc_i32 : SDWASrc<i32>; 1098def SDWASrc_i16 : SDWASrc<i16>; 1099def SDWASrc_f32 : SDWASrc<f32>; 1100def SDWASrc_f16 : SDWASrc<f16>; 1101 1102def SDWAVopcDst : BoolRC { 1103 let OperandNamespace = "AMDGPU"; 1104 let OperandType = "OPERAND_SDWA_VOPC_DST"; 1105 let EncoderMethod = "getSDWAVopcDstEncoding"; 1106 let DecoderMethod = "decodeSDWAVopcDst"; 1107 let PrintMethod = "printVOPDst"; 1108} 1109 1110class NamedIntOperand<string prefix, bit Optional = 1, string name = NAME> 1111 : CustomOperand<i32, Optional, name> { 1112 string Prefix = prefix; 1113 1114 let PredicateMethod = 1115 "getPredicate([](const AMDGPUOperand &Op) -> bool { "# 1116 "return Op.isImmTy(AMDGPUOperand::"#ImmTy#"); })"; 1117 1118 string Validator = "[](int64_t V) { return true; }"; 1119 string ConvertMethod = "[](int64_t &V) { return "#Validator#"(V); }"; 1120 let ParserMethod = 1121 "[this](OperandVector &Operands) -> ParseStatus { "# 1122 "return parseIntWithPrefix(\""#Prefix#"\", Operands, "# 1123 "AMDGPUOperand::"#ImmTy#", "#ConvertMethod#"); }"; 1124 1125 bit PrintInHex = 0; 1126 bit AlwaysPrint = 0; 1127 let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, " 1128 "const MCSubtargetInfo &STI, raw_ostream &O) { " 1129 "printNamedInt(MI, OpNo, STI, O, \""#Prefix#"\", "# 1130 !if(PrintInHex, "true", "false")#", "# 1131 !if(AlwaysPrint, "true", "false")#"); }"; 1132} 1133 1134class NamedBitOperand<string Id, string Name = NAME> 1135 : CustomOperand<i1, 1, Name> { 1136 let PredicateMethod = "isImmTy<AMDGPUOperand::"#ImmTy#">"; 1137 let ParserMethod = 1138 "[this](OperandVector &Operands) -> ParseStatus { "# 1139 "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }"; 1140 let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, "# 1141 "const MCSubtargetInfo &STI, raw_ostream &O) { "# 1142 "printNamedBit(MI, OpNo, O, \""#Id#"\"); }"; 1143} 1144 1145class DefaultOperand<CustomOperand Op, int Value> 1146 : OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>, 1147 CustomOperandProps<1> { 1148 let ParserMatchClass = Op.ParserMatchClass; 1149 let PrintMethod = Op.PrintMethod; 1150} 1151 1152class SDWAOperand<string Id, string Name = NAME> 1153 : CustomOperand<i32, 1, Name> { 1154 let ParserMethod = 1155 "[this](OperandVector &Operands) -> ParseStatus { "# 1156 "return parseSDWASel(Operands, \""#Id#"\", AMDGPUOperand::"#ImmTy#"); }"; 1157} 1158 1159class ArrayOperand0<string Id, string Name = NAME> 1160 : OperandWithDefaultOps<i32, (ops (i32 0))>, 1161 CustomOperandProps<1, Name> { 1162 let ParserMethod = 1163 "[this](OperandVector &Operands) -> ParseStatus { "# 1164 "return parseOperandArrayWithPrefix(\""#Id#"\", Operands, "# 1165 "AMDGPUOperand::"#ImmTy#"); }"; 1166} 1167 1168let ImmTy = "ImmTyOffset" in 1169def flat_offset : CustomOperand<i32, 1, "FlatOffset">; 1170let PrintMethod = "printOffset" in 1171def Offset : NamedIntOperand<"offset">; 1172let Validator = "isUInt<8>" in { 1173def Offset0 : NamedIntOperand<"offset0">; 1174def Offset1 : NamedIntOperand<"offset1">; 1175} 1176 1177def gds : NamedBitOperand<"gds", "GDS">; 1178 1179def omod : CustomOperand<i32, 1, "OModSI">; 1180def omod0 : DefaultOperand<omod, 0>; 1181 1182// We need to make the cases with a default of 0 distinct from no 1183// default to help deal with some cases where the operand appears 1184// before a mandatory operand. 1185def Clamp : NamedBitOperand<"clamp">; 1186def Clamp0 : DefaultOperand<Clamp, 0>; 1187def highmod : NamedBitOperand<"high", "High">; 1188 1189def CPol : CustomOperand<i32, 1>; 1190def CPol_0 : DefaultOperand<CPol, 0>; 1191def CPol_GLC1 : DefaultOperand<CPol, 1>; 1192def CPol_GLC : ValuePredicatedOperand<CPol, "Op.getImm() & CPol::GLC">; 1193def CPol_NonGLC : ValuePredicatedOperand<CPol, "!(Op.getImm() & CPol::GLC)", 1>; 1194def CPol_GLC_WithDefault : DefaultOperand<CPol_GLC, !shl(1, CPolBit.GLC)>; 1195def CPol_NonGLC_WithDefault : DefaultOperand<CPol_NonGLC, 0>; 1196 1197def TFE : NamedBitOperand<"tfe">; 1198def UNorm : NamedBitOperand<"unorm">; 1199def DA : NamedBitOperand<"da">; 1200def R128A16 : CustomOperand<i1, 1>; 1201def A16 : NamedBitOperand<"a16">; 1202def D16 : NamedBitOperand<"d16">; 1203def LWE : NamedBitOperand<"lwe">; 1204def exp_compr : NamedBitOperand<"compr", "ExpCompr">; 1205def exp_vm : NamedBitOperand<"vm", "ExpVM">; 1206 1207def FORMAT : CustomOperand<i8>; 1208 1209let PrintInHex = 1 in 1210def DMask : NamedIntOperand<"dmask">; 1211 1212def Dim : CustomOperand<i8, /*optional=*/1>; 1213 1214def dst_sel : SDWAOperand<"dst_sel", "SDWADstSel">; 1215def src0_sel : SDWAOperand<"src0_sel", "SDWASrc0Sel">; 1216def src1_sel : SDWAOperand<"src1_sel", "SDWASrc1Sel">; 1217def dst_unused : CustomOperand<i32, 1, "SDWADstUnused">; 1218 1219def op_sel0 : ArrayOperand0<"op_sel", "OpSel">; 1220def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">; 1221def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">; 1222def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">; 1223 1224def IndexKey16bit : CustomOperand<i32, 1>; 1225def IndexKey8bit : CustomOperand<i32, 1>; 1226 1227def dpp8 : CustomOperand<i32, 0, "DPP8">; 1228def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">; 1229 1230let DefaultValue = "0xf", PrintInHex = 1, AlwaysPrint = 1 in { 1231def DppRowMask : NamedIntOperand<"row_mask">; 1232def DppBankMask : NamedIntOperand<"bank_mask">; 1233} 1234def DppBoundCtrl : NamedIntOperand<"bound_ctrl"> { 1235 let ConvertMethod = "[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }"; 1236 let PrintMethod = "printDppBoundCtrl"; 1237} 1238 1239let DecoderMethod = "decodeDpp8FI", PrintMethod = "printDppFI" in 1240def Dpp8FI : NamedIntOperand<"fi", 1, "DppFI">; 1241let PrintMethod = "printDppFI" in 1242def Dpp16FI : NamedIntOperand<"fi", 1, "DppFI">; 1243 1244def blgp : CustomOperand<i32, 1, "BLGP">; 1245def CBSZ : NamedIntOperand<"cbsz"> { 1246 let Validator = "isUInt<3>"; 1247} 1248def ABID : NamedIntOperand<"abid"> { 1249 let Validator = "isUInt<4>"; 1250} 1251def hwreg : CustomOperand<i32, 0, "Hwreg">; 1252 1253def exp_tgt : CustomOperand<i32, 0, "ExpTgt">; 1254 1255let AlwaysPrint = 1 in { 1256def WaitVDST : NamedIntOperand<"wait_vdst"> { 1257 let Validator = "isUInt<4>"; 1258} 1259def WaitEXP : NamedIntOperand<"wait_exp"> { 1260 let Validator = "isUInt<3>"; 1261} 1262def WaitVAVDst : NamedIntOperand<"wait_va_vdst"> { 1263 let Validator = "isUInt<4>"; 1264} 1265def WaitVMVSrc : NamedIntOperand<"wait_vm_vsrc"> { 1266 let Validator = "isUInt<1>"; 1267} 1268} // End AlwaysPrint = 1 1269 1270def ByteSel : NamedIntOperand<"byte_sel"> { 1271 let Validator = "isUInt<2>"; 1272} 1273 1274let PrintMethod = "printBitOp3" in 1275def BitOp3 : NamedIntOperand<"bitop3">; 1276def bitop3_0 : DefaultOperand<BitOp3, 0>; 1277 1278class KImmFPOperand<ValueType vt> : ImmOperand<vt> { 1279 let OperandNamespace = "AMDGPU"; 1280 let OperandType = "OPERAND_KIMM"#vt.Size; 1281 let PrintMethod = "printU"#vt.Size#"ImmOperand"; 1282 let DecoderMethod = "decodeOperand_KImmFP"; 1283} 1284 1285// 32-bit VALU immediate operand that uses the constant bus. 1286def KImmFP32 : KImmFPOperand<i32>; 1287 1288// 32-bit VALU immediate operand with a 16-bit value that uses the 1289// constant bus. 1290def KImmFP16 : KImmFPOperand<i16>; 1291 1292class FPInputModsMatchClass <int opSize> : AsmOperandClass { 1293 let Name = "RegOrImmWithFP"#opSize#"InputMods"; 1294 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1295 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; 1296} 1297 1298class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> { 1299 let Name = "RegOrInlineImmWithFP"#opSize#"InputMods"; 1300 let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods"; 1301} 1302 1303def FP16InputModsMatchClass : FPInputModsMatchClass<16>; 1304class FPT16InputModsMatchClass<bit IsFake16> : FPInputModsMatchClass<16> { 1305 let Name = !if(IsFake16, "RegOrImmWithFPFake16InputMods", 1306 "RegOrImmWithFPT16InputMods"); 1307 let PredicateMethod = "isRegOrImmWithFPT16InputMods<" # 1308 !if(IsFake16, "true", "false") # ">"; 1309} 1310def FP32InputModsMatchClass : FPInputModsMatchClass<32>; 1311def FP64InputModsMatchClass : FPInputModsMatchClass<64>; 1312 1313class FP16VCSrcInputModsMatchClass<bit IsFake16> 1314 : FPVCSrcInputModsMatchClass<16> { 1315 let Name = !if(IsFake16, "RegOrInlineImmWithFPFake16InputMods", 1316 "RegOrInlineImmWithFPT16InputMods"); 1317 let PredicateMethod = "isRegOrInlineImmWithFP16InputMods<" # 1318 !if(IsFake16, "true", "false") # ">"; 1319} 1320def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; 1321 1322class InputMods <AsmOperandClass matchClass> : Operand <i32> { 1323 let OperandNamespace = "AMDGPU"; 1324 let OperandType = "OPERAND_INPUT_MODS"; 1325 let ParserMatchClass = matchClass; 1326} 1327 1328class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> { 1329 let PrintMethod = "printOperandAndFPInputMods"; 1330} 1331 1332def FP16InputMods : FPInputMods<FP16InputModsMatchClass>; 1333class FPT16InputMods<bit IsFake16> : FPInputMods<FPT16InputModsMatchClass<IsFake16>> { 1334 let EncoderMethod = "getMachineOpValueT16"; 1335} 1336def FP32InputMods : FPInputMods<FP32InputModsMatchClass>; 1337def FP32T16DstInputMods : FPInputMods<FP32InputModsMatchClass> { 1338 let EncoderMethod = "getMachineOpValueT16"; 1339} 1340def FP64InputMods : FPInputMods<FP64InputModsMatchClass>; 1341 1342class FPT16VCSrcInputMods<bit IsFake16 = 1> 1343 : FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>> { 1344 let EncoderMethod = "getMachineOpValueT16"; 1345} 1346def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>; 1347 1348class IntInputModsMatchClass <int opSize> : AsmOperandClass { 1349 let Name = "RegOrImmWithInt"#opSize#"InputMods"; 1350 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1351 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; 1352} 1353class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> { 1354 let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; 1355 let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; 1356} 1357class IntT16InputModsMatchClass<bit IsFake16> : IntInputModsMatchClass<16> { 1358 let Name = !if(IsFake16, "RegOrImmWithIntFake16InputMods", 1359 "RegOrImmWithIntT16InputMods"); 1360 let PredicateMethod = "isRegOrImmWithIntT16InputMods<" # 1361 !if(IsFake16, "true", "false") # ">"; 1362} 1363def Int32InputModsMatchClass : IntInputModsMatchClass<32>; 1364def Int64InputModsMatchClass : IntInputModsMatchClass<64>; 1365def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; 1366class IntT16VCSrcInputModsMatchClass<bit IsFake16> : IntInputModsMatchClass<16> { 1367 let Name = !if(IsFake16, "RegOrInlineImmWithIntFake16InputMods", 1368 "RegOrInlineImmWithIntT16InputMods"); 1369 let PredicateMethod = "isRegOrInlineImmWithIntT16InputMods<" # 1370 !if(IsFake16, "true", "false") # ">"; 1371} 1372 1373class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> { 1374 let PrintMethod = "printOperandAndIntInputMods"; 1375} 1376class IntT16InputMods<bit IsFake16> : IntInputMods<IntT16InputModsMatchClass<IsFake16>> { 1377 let EncoderMethod = "getMachineOpValueT16"; 1378} 1379def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; 1380def Int32T16DstInputMods : IntInputMods<Int32InputModsMatchClass> { 1381 let EncoderMethod = "getMachineOpValueT16"; 1382} 1383def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; 1384def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>; 1385class IntT16VCSrcInputMods<bit IsFake16 = 1> 1386 : IntInputMods<IntT16VCSrcInputModsMatchClass<IsFake16>> { 1387 let EncoderMethod = "getMachineOpValueT16"; 1388} 1389 1390class OpSelModsMatchClass : AsmOperandClass { 1391 let Name = "OpSelMods"; 1392 let ParserMethod = "parseRegOrImm"; 1393 let PredicateMethod = "isRegOrImm"; 1394} 1395 1396def IntOpSelModsMatchClass : OpSelModsMatchClass; 1397def IntOpSelMods : InputMods<IntOpSelModsMatchClass>; 1398 1399class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1400 let Name = "SDWAWithFP"#opSize#"InputMods"; 1401 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1402 let PredicateMethod = "isSDWAFP"#opSize#"Operand"; 1403} 1404 1405def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>; 1406def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>; 1407 1408class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> : 1409 InputMods <matchClass> { 1410 let PrintMethod = "printOperandAndFPInputMods"; 1411} 1412 1413def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>; 1414def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>; 1415 1416def FPVRegInputModsMatchClass : AsmOperandClass { 1417 let Name = "VRegWithFPInputMods"; 1418 let ParserMethod = "parseRegWithFPInputMods"; 1419 let PredicateMethod = "isVRegWithInputMods"; 1420} 1421 1422def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> { 1423 let PrintMethod = "printOperandAndFPInputMods"; 1424} 1425 1426def FPVRegT16DstInputMods : InputMods <FPVRegInputModsMatchClass> { 1427 let PrintMethod = "printOperandAndFPInputMods"; 1428 let EncoderMethod = "getMachineOpValueT16"; 1429} 1430 1431class FPT16_Lo128VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass { 1432 let Name = !if(IsFake16, "Fake16_Lo128VRegWithFPInputMods", 1433 "T16_Lo128VRegWithFPInputMods"); 1434 let ParserMethod = "parseRegWithFPInputMods"; 1435 let PredicateMethod = "isT16_Lo128VRegWithInputMods<" # 1436 !if(IsFake16, "true", "false") # ">"; 1437} 1438 1439class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass { 1440 let Name = !if(IsFake16, "Fake16VRegWithFPInputMods", 1441 "T16VRegWithFPInputMods"); 1442 let ParserMethod = "parseRegWithFPInputMods"; 1443 let PredicateMethod = "isT16VRegWithInputMods<" # 1444 !if(IsFake16, "true", "false") # ">"; 1445} 1446 1447class FPT16_Lo128VRegInputMods<bit IsFake16 = 1> 1448 : InputMods <FPT16_Lo128VRegInputModsMatchClass<IsFake16>> { 1449 let PrintMethod = "printOperandAndFPInputMods"; 1450 let EncoderMethod = "getMachineOpValueT16Lo128"; 1451} 1452 1453class FPT16VRegInputMods<bit IsFake16 = 1> 1454 : InputMods <FPT16VRegInputModsMatchClass<IsFake16>> { 1455 let PrintMethod = "printOperandAndFPInputMods"; 1456 let EncoderMethod = "getMachineOpValueT16"; 1457} 1458 1459class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1460 let Name = "SDWAWithInt"#opSize#"InputMods"; 1461 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1462 let PredicateMethod = "isSDWAInt"#opSize#"Operand"; 1463} 1464 1465def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; 1466def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; 1467def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> { 1468 let Name = "SDWAWithBin32InputMods"; 1469 let ParserMethod = "parseRegOrImm"; 1470} 1471 1472class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : 1473 InputMods <matchClass> { 1474 let PrintMethod = "printOperandAndIntInputMods"; 1475} 1476 1477def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>; 1478def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>; 1479def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>; 1480 1481def IntVRegInputModsMatchClass : AsmOperandClass { 1482 let Name = "VRegWithIntInputMods"; 1483 let ParserMethod = "parseRegWithIntInputMods"; 1484 let PredicateMethod = "isVRegWithInputMods"; 1485} 1486 1487class IntT16_Lo128VRegInputModsMatchClass<bit IsFake16 = 1> : AsmOperandClass { 1488 let Name = !if(IsFake16, "Fake16_Lo128VRegWithIntInputMods", 1489 "T16_Lo128VRegWithIntInputMods"); 1490 let ParserMethod = "parseRegWithIntInputMods"; 1491 let PredicateMethod = "isT16_Lo128VRegWithInputMods<" # 1492 !if(IsFake16, "true", "false") # ">"; 1493} 1494 1495class IntT16VRegInputModsMatchClass<bit IsFake16 = 1> : AsmOperandClass { 1496 let Name = !if(IsFake16, "Fake16VRegWithIntInputMods", 1497 "T16VRegWithIntInputMods"); 1498 let ParserMethod = "parseRegWithIntInputMods"; 1499 let PredicateMethod = "isT16VRegWithInputMods<" # 1500 !if(IsFake16, "true", "false") # ">"; 1501} 1502 1503class IntT16_Lo128VRegInputMods<bit IsFake16 = 1> 1504 : InputMods <IntT16_Lo128VRegInputModsMatchClass<IsFake16>> { 1505 let PrintMethod = "printOperandAndIntInputMods"; 1506 let EncoderMethod = "getMachineOpValueT16Lo128"; 1507} 1508 1509class IntT16VRegInputMods<bit IsFake16 = 1> 1510 : InputMods <IntT16VRegInputModsMatchClass<IsFake16>> { 1511 let PrintMethod = "printOperandAndIntInputMods"; 1512 let EncoderMethod = "getMachineOpValueT16"; 1513} 1514 1515def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> { 1516 let PrintMethod = "printOperandAndIntInputMods"; 1517} 1518 1519def IntVRegT16DstInputMods : InputMods <IntVRegInputModsMatchClass> { 1520 let PrintMethod = "printOperandAndIntInputMods"; 1521 let EncoderMethod = "getMachineOpValueT16"; 1522} 1523 1524class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass { 1525 let Name = "PackedFP"#opSize#"InputMods"; 1526 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1527 let PredicateMethod = "isPackedFP"#opSize#"InputMods"; 1528} 1529 1530class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass { 1531 let Name = "PackedInt"#opSize#"InputMods"; 1532 let ParserMethod = "parseRegOrImm"; 1533 let PredicateMethod = "isRegOrImm"; 1534// let PredicateMethod = "isPackedInt"#opSize#"InputMods"; 1535} 1536 1537def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>; 1538def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>; 1539def PackedF32InputModsMatchClass : PackedFPInputModsMatchClass<32>; 1540 1541class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> { 1542 let PrintMethod = "printOperandAndFPInputMods"; 1543} 1544 1545class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> { 1546 //let PrintMethod = "printPackedIntInputMods"; 1547} 1548 1549def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>; 1550def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>; 1551def PackedF32InputMods : PackedFPInputMods<PackedF32InputModsMatchClass>; 1552 1553def MFMALdScaleModifierOp : TImmLeaf<i32, [{ 1554 return isUInt<2>(Imm); 1555}], MFMALdScaleXForm>; 1556 1557//===----------------------------------------------------------------------===// 1558// Complex patterns 1559//===----------------------------------------------------------------------===// 1560 1561def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">; 1562def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">; 1563def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">; 1564 1565def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">; 1566 1567def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; 1568 1569// Modifiers for floating point instructions. 1570def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; 1571 1572// VOP3 modifiers used for instructions that do not read canonicalized 1573// floating point values (i.e. integer operations with FP source 1574// modifiers) 1575def VOP3ModsNonCanonicalizing : ComplexPattern<untyped, 2, 1576 "SelectVOP3ModsNonCanonicalizing">; 1577 1578def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">; 1579 1580def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">; 1581 1582def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">; 1583 1584def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">; 1585def VOP3PModsNeg : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">; 1586def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">; 1587 1588def WMMAModsF32NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">; 1589def WMMAModsF16Neg : ComplexPattern<untyped, 2, "SelectWMMAModsF16Neg">; 1590def WMMAModsF16NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF16NegAbs">; 1591def WMMAVISrc : ComplexPattern<untyped, 1, "SelectWMMAVISrc">; 1592def SWMMACIndex8 : ComplexPattern<untyped, 2, "SelectSWMMACIndex8">; 1593def SWMMACIndex16 : ComplexPattern<untyped, 2, "SelectSWMMACIndex16">; 1594 1595def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">; 1596 1597def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; 1598 1599def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">; 1600def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; 1601 1602def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">; 1603def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">; 1604 1605//===----------------------------------------------------------------------===// 1606// SI assembler operands 1607//===----------------------------------------------------------------------===// 1608 1609def SIOperand { 1610 int ZERO = 0x80; 1611 int VCC = 0x6A; 1612 int FLAT_SCR = 0x68; 1613} 1614 1615// This should be kept in sync with SISrcMods enum 1616def SRCMODS { 1617 int NONE = 0; 1618 int NEG = 1; 1619 int ABS = 2; 1620 int NEG_ABS = 3; 1621 1622 int NEG_HI = ABS; 1623 int OP_SEL_0 = 4; 1624 int OP_SEL_1 = 8; 1625 int DST_OP_SEL = 8; 1626} 1627 1628def DSTCLAMP { 1629 int NONE = 0; 1630 int ENABLE = 1; 1631} 1632 1633def DSTOMOD { 1634 int NONE = 0; 1635} 1636 1637def HWREG { 1638 int MODE = 1; 1639 int STATUS = 2; 1640 int TRAPSTS = 3; 1641 int HW_ID = 4; 1642 int GPR_ALLOC = 5; 1643 int LDS_ALLOC = 6; 1644 int IB_STS = 7; 1645 int MEM_BASES = 15; 1646 int TBA_LO = 16; 1647 int TBA_HI = 17; 1648 int TMA_LO = 18; 1649 int TMA_HI = 19; 1650 int FLAT_SCR_LO = 20; 1651 int FLAT_SCR_HI = 21; 1652 int XNACK_MASK = 22; 1653 int POPS_PACKER = 25; 1654 int SHADER_CYCLES = 29; 1655} 1656 1657class getHwRegImm<int Reg, int Offset = 0, int Size = 32> { 1658 int ret = !and(!or(Reg, 1659 !shl(Offset, 6), 1660 !shl(!add(Size, -1), 11)), 65535); 1661} 1662 1663//===----------------------------------------------------------------------===// 1664// 1665// SI Instruction multiclass helpers. 1666// 1667// Instructions with _32 take 32-bit operands. 1668// Instructions with _64 take 64-bit operands. 1669// 1670// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit 1671// encoding is the standard encoding, but instruction that make use of 1672// any of the instruction modifiers must use the 64-bit encoding. 1673// 1674// Instructions with _e32 use the 32-bit encoding. 1675// Instructions with _e64 use the 64-bit encoding. 1676// 1677//===----------------------------------------------------------------------===// 1678 1679class SIMCInstr <string pseudo, int subtarget> { 1680 string PseudoInstr = pseudo; 1681 int Subtarget = subtarget; 1682} 1683 1684//===----------------------------------------------------------------------===// 1685// Vector ALU classes 1686//===----------------------------------------------------------------------===// 1687 1688class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> { 1689 int ret = 1690 !if (!eq(Src0.Value, untyped.Value), 0, 1691 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 1692 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2 1693 3))); // VOP3 1694} 1695 1696// Returns the register class to use for the destination of VOP[123C] 1697// instructions for the given VT. 1698class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> { 1699 defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16, 1700 VOPDstOperand_t16Lo128), 1701 VOPDstOperand<VGPR_32>); 1702 RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VOPDstOperand<VReg_1024>, 1703 !eq(VT.Size, 512) : VOPDstOperand<VReg_512>, 1704 !eq(VT.Size, 256) : VOPDstOperand<VReg_256>, 1705 !eq(VT.Size, 192) : VOPDstOperand<VReg_192>, 1706 !eq(VT.Size, 128) : VOPDstOperand<VReg_128>, 1707 !eq(VT.Size, 64) : VOPDstOperand<VReg_64>, 1708 !eq(VT.Size, 32) : VOPDstOperand<VGPR_32>, 1709 !eq(VT.Size, 16) : op16, 1710 1 : VOPDstS64orS32); // else VT == i1 1711} 1712 1713class getVALUDstForVT_fake16<ValueType VT> { 1714 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1715 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1716 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1717 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>, 1718 VOPDstS64orS32)))); // else VT == i1 1719} 1720 1721// Returns the register class to use for the destination of VOP[12C] 1722// instructions with SDWA extension 1723class getSDWADstForVT<ValueType VT> { 1724 RegisterOperand ret = !if(!eq(VT.Size, 1), 1725 SDWAVopcDst, // VOPC 1726 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst 1727} 1728 1729// Returns the register class to use for source 0 of VOP[12C] 1730// instructions for the given VT. 1731class getVOPSrc0ForVT<ValueType VT, bit IsTrue16, bit IsFake16 = 1> { 1732 RegisterOperand ret = 1733 !cond(!eq(VT, i64) : VSrc_b64, 1734 !eq(VT, f64) : VSrc_f64, 1735 !eq(VT, i32) : VSrc_b32, 1736 !eq(VT, f32) : VSrc_f32, 1737 !eq(VT, i16) : !if(IsTrue16, 1738 !if(IsFake16, VSrcFake16_b16_Lo128, VSrcT_b16_Lo128), 1739 VSrc_b16), 1740 !eq(VT, f16) : !if(IsTrue16, 1741 !if(IsFake16, VSrcFake16_f16_Lo128, VSrcT_f16_Lo128), 1742 VSrc_f16), 1743 !eq(VT, bf16) : !if(IsTrue16, 1744 !if(IsFake16, VSrcFake16_bf16_Lo128, VSrcT_bf16_Lo128), 1745 VSrc_bf16), 1746 !eq(VT, v2i16) : VSrc_v2b16, 1747 !eq(VT, v2f16) : VSrc_v2f16, 1748 !eq(VT, v2bf16) : VSrc_v2bf16, 1749 !eq(VT, v4f16) : AVSrc_64, 1750 !eq(VT, v4bf16) : AVSrc_64, 1751 1 : VSrc_b32); 1752} 1753 1754class getSOPSrcForVT<ValueType VT> { 1755 RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32); 1756} 1757 1758// Returns the vreg register class to use for source operand given VT 1759class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> { 1760 RegisterOperand ret = 1761 !cond(!eq(VT.Size, 512) : RegisterOperand<VReg_512>, 1762 !eq(VT.Size, 192) : RegisterOperand<VReg_192>, 1763 !eq(VT.Size, 128) : RegisterOperand<VReg_128>, 1764 !eq(VT.Size, 96) : RegisterOperand<VReg_96>, 1765 !eq(VT.Size, 64) : RegisterOperand<VReg_64>, 1766 !eq(VT.Size, 48) : RegisterOperand<VReg_64>, 1767 !eq(VT.Size, 16) : !if(IsTrue16, 1768 !if(IsFake16, VGPRSrc_32_Lo128, VGPRSrc_16_Lo128), 1769 RegisterOperand<VGPR_32>), 1770 1 : RegisterOperand<VGPR_32>); 1771} 1772 1773class getSDWASrcForVT <ValueType VT> { 1774 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); 1775 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); 1776 RegisterOperand ret = !if(VT.isFP, retFlt, retInt); 1777} 1778 1779// Returns the register class to use for sources of VOP3 instructions for the 1780// given VT. 1781class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> { 1782 RegisterOperand ret = 1783 !cond(!eq(VT, f64) : VSrc_f64, 1784 !eq(VT, f32) : VSrc_f32, 1785 !eq(VT, f16) : !if(IsTrue16, VSrcT_f16, VSrc_f16), 1786 !eq(VT, bf16) : !if(IsTrue16, VSrcT_bf16, VSrc_bf16), 1787 !eq(VT, i16) : !if(IsTrue16, VSrcT_b16, VSrc_b16), 1788 !eq(VT, i1) : SSrc_i1, 1789 !eq(VT, v2f32) : VSrc_v2f32, 1790 !eq(VT, v2i32) : VSrc_v2b32, 1791 !eq(VT, v2f16) : VSrc_v2f16, 1792 !eq(VT, v2bf16) : VSrc_v2bf16, 1793 !eq(VT, v2i16) : VSrc_v2b16, 1794 !eq(VT, v4f16) : AVSrc_64, 1795 !eq(VT, v4bf16) : AVSrc_64, 1796 !eq(VT.Size, 1024) : VRegSrc_1024, 1797 !eq(VT.Size, 512) : VRegSrc_512, 1798 !eq(VT.Size, 256) : VRegSrc_256, 1799 !eq(VT.Size, 192) : VRegSrc_192, 1800 !eq(VT.Size, 128) : VRegSrc_128, 1801 !eq(VT.Size, 96) : VRegSrc_96, 1802 !eq(VT.Size, 64) : VSrc_b64, 1803 1 : VSrc_b32); 1804} 1805 1806// Returns the vreg register class to use for sources of VOP3 instructions for the 1807// given VT. 1808class getVOP3VRegSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> { 1809 RegisterOperand ret = 1810 !cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>, 1811 !eq(VT.Size, 96) : RegisterOperand<VReg_96>, 1812 !eq(VT.Size, 64) : RegisterOperand<VReg_64>, 1813 !eq(VT.Size, 48) : RegisterOperand<VReg_64>, 1814 !eq(VT.Size, 16) : !if(IsTrue16, 1815 !if(IsFake16, RegisterOperand<VGPR_32>, 1816 RegisterOperand<VGPR_16>), 1817 RegisterOperand<VGPR_32>), 1818 1 : RegisterOperand<VGPR_32>); 1819} 1820 1821// Src2 of VOP3 DPP instructions cannot be a literal 1822class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> { 1823 RegisterOperand ret = 1824 !cond(!eq(VT, i1) : SSrc_i1, 1825 !eq(VT, i16) : !if (IsFake16, VCSrc_b16, VCSrcT_b16), 1826 !eq(VT, f16) : !if (IsFake16, VCSrc_f16, VCSrcT_f16), 1827 !eq(VT, bf16) : !if (IsFake16, VCSrc_bf16, VCSrcT_bf16), 1828 !eq(VT, v2i16) : VCSrc_v2b16, 1829 !eq(VT, v2f16) : VCSrc_v2f16, 1830 !eq(VT, v2bf16) : VCSrc_v2bf16, 1831 !eq(VT, f32) : VCSrc_f32, 1832 1 : VCSrc_b32); 1833} 1834 1835// Float or packed int 1836class isModifierType<ValueType SrcVT> { 1837 bit ret = !or(!eq(SrcVT.Value, f16.Value), 1838 !eq(SrcVT.Value, bf16.Value), 1839 !eq(SrcVT.Value, f32.Value), 1840 !eq(SrcVT.Value, f64.Value), 1841 !eq(SrcVT.Value, v2f16.Value), 1842 !eq(SrcVT.Value, v2i16.Value), 1843 !eq(SrcVT.Value, v2bf16.Value), 1844 !eq(SrcVT.Value, v2f32.Value), 1845 !eq(SrcVT.Value, v2i32.Value), 1846 !eq(SrcVT.Value, v4f16.Value), 1847 !eq(SrcVT.Value, v4i16.Value), 1848 !eq(SrcVT.Value, v4bf16.Value), 1849 !eq(SrcVT.Value, v4f32.Value), 1850 !eq(SrcVT.Value, v4i32.Value), 1851 !eq(SrcVT.Value, v8f16.Value), 1852 !eq(SrcVT.Value, v8i16.Value), 1853 !eq(SrcVT.Value, v8bf16.Value), 1854 !eq(SrcVT.Value, v8f32.Value), 1855 !eq(SrcVT.Value, v8i32.Value), 1856 !eq(SrcVT.Value, v16f16.Value), 1857 !eq(SrcVT.Value, v16i16.Value), 1858 !eq(SrcVT.Value, v16bf16.Value)); 1859} 1860 1861// Return type of input modifiers operand for specified input operand. 1862// True16: If the destination is a 16-bit value, the src0 modifier must hold 1863// dst's opsel bit. Use a dummy value for DstVT if getting the mod for a src operand besides 0. 1864// 64-bit src types are not implemented for True16 dst. 1865class getSrc0Mod <ValueType VT, ValueType DstVT, bit IsTrue16 = 0, bit IsFake16 = 1> { 1866 defvar T16Dst = !if(!eq(VT.Size, 64), 1867 !if(VT.isFP, FP64InputMods, Int64InputMods), 1868 !if(!eq(VT.Size, 16), 1869 !if(VT.isFP, !if(IsTrue16, FPT16InputMods<IsFake16>, FP16InputMods), 1870 !if(IsTrue16, IntT16InputMods<IsFake16>, IntOpSelMods)), 1871 !if(VT.isFP, FP32T16DstInputMods, Int32T16DstInputMods))); 1872 defvar Normal = !if(!eq(VT.Size, 64), 1873 !if(VT.isFP, FP64InputMods, Int64InputMods), 1874 !if(!eq(VT.Size, 16), 1875 !if(VT.isFP, !if(IsTrue16, FPT16InputMods<IsFake16>, FP16InputMods), 1876 !if(IsTrue16, IntT16InputMods<IsFake16>, IntOpSelMods)), 1877 !if(VT.isFP, FP32InputMods, Int32InputMods))); 1878 Operand ret = !if(!and(IsTrue16, !eq(DstVT.Size, 16)), T16Dst, Normal); 1879} 1880 1881class getSrcMod<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> : getSrc0Mod<VT, f128/*Dummy Arg*/, IsTrue16, IsFake16>; 1882 1883// Return type of input modifiers operand specified input operand for DPP 1884class getSrcModDPP <ValueType VT> { 1885 Operand ret = !if(VT.isFP, FPVRegInputMods, IntVRegInputMods); 1886} 1887 1888class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> { 1889 Operand ret = 1890 !if (VT.isFP, 1891 !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), 1892 FPT16_Lo128VRegInputMods<IsFake16>, FPVRegInputMods), 1893 !if (!eq(VT.Value, i16.Value), 1894 IntT16_Lo128VRegInputMods<IsFake16>, IntVRegInputMods)); 1895} 1896 1897// Return type of input modifiers operand for specified input operand for DPP 1898// True16: If the destination is a 16-bit value, the src0 modifier must hold 1899// dst's opsel bit. Use a dummy value for DstVT if getting the mod for a src operand besides 0. 1900// 64-bit src types are not implemented for True16 dst. 1901class getSrc0ModVOP3DPP <ValueType VT, ValueType DstVT, bit IsFake16 = 1> { 1902 defvar T16Dst = 1903 !if (VT.isFP, 1904 !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), 1905 FPT16VRegInputMods<IsFake16>, FPVRegT16DstInputMods), 1906 !if (!eq(VT.Value, i16.Value), IntT16VRegInputMods<IsFake16>, 1907 IntVRegT16DstInputMods)); 1908 defvar Normal = 1909 !if (VT.isFP, 1910 !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), 1911 FPT16VRegInputMods<IsFake16>, FPVRegInputMods), 1912 !if (!eq(VT.Value, i16.Value), 1913 IntT16VRegInputMods<IsFake16>, 1914 IntVRegInputMods)); 1915 Operand ret = !if(!and(!not(IsFake16), !eq(DstVT.Size, 16)), T16Dst, Normal); 1916} 1917 1918// GFX11 only supports VGPR src1, but the restriction is done in AsmParser 1919// and GCNDPPCombine. 1920class getSrcModVOP3DPP<ValueType VT, bit IsFake16 = 1> { 1921 Operand ret = 1922 !if (VT.isFP, 1923 !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), 1924 FPT16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods), 1925 !if (!eq(VT.Value, i16.Value), 1926 IntT16VCSrcInputMods<IsFake16>, 1927 Int32VCSrcInputMods)); 1928} 1929 1930// Return type of input modifiers operand specified input operand for SDWA 1931class getSrcModSDWA <ValueType VT> { 1932 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, 1933 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods, 1934 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods, 1935 !if(!eq(VT.Value, bf16.Value), FP16SDWAInputMods, 1936 Int32SDWAInputMods)))); 1937} 1938 1939// Returns the input arguments for VOP[12C] instructions for the given SrcVT. 1940class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> { 1941 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 1942 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2 1943 (ins))); 1944} 1945 1946// Returns the input arguments for VOP3 instructions for the given SrcVT. 1947class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, 1948 RegisterOperand Src2RC, int NumSrcArgs, 1949 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1950 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1951 1952 dag ret = 1953 !if (!eq(NumSrcArgs, 0), 1954 // VOP1 without input operands (V_NOP, V_CLREXCP) 1955 (ins), 1956 /* else */ 1957 !if (!eq(NumSrcArgs, 1), 1958 !if (HasModifiers, 1959 // VOP1 with modifiers 1960 !if(HasOMod, 1961 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1962 Clamp0:$clamp, omod0:$omod), 1963 !if (HasClamp, 1964 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Clamp0:$clamp), 1965 (ins Src0Mod:$src0_modifiers, Src0RC:$src0))) 1966 /* else */, 1967 // VOP1 without modifiers 1968 !if(HasOMod, 1969 (ins Src0RC:$src0, Clamp0:$clamp, omod0:$omod), 1970 !if (HasClamp, 1971 (ins Src0RC:$src0, Clamp0:$clamp), 1972 (ins Src0RC:$src0))) 1973 /* endif */ ), 1974 !if (!eq(NumSrcArgs, 2), 1975 !if (HasModifiers, 1976 // VOP 2 with modifiers 1977 !if(HasOMod, 1978 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1979 Src1Mod:$src1_modifiers, Src1RC:$src1, 1980 Clamp0:$clamp, omod0:$omod), 1981 !con((ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1982 Src1Mod:$src1_modifiers, Src1RC:$src1), 1983 !if(HasClamp, (ins Clamp0:$clamp), (ins)))) 1984 /* else */, 1985 // VOP2 without modifiers 1986 !if (HasClamp, 1987 (ins Src0RC:$src0, Src1RC:$src1, Clamp0:$clamp), 1988 (ins Src0RC:$src0, Src1RC:$src1)) 1989 1990 /* endif */ ) 1991 /* NumSrcArgs == 3 */, 1992 !if (HasModifiers, 1993 !if (HasSrc2Mods, 1994 // VOP3 with modifiers 1995 !if (HasOMod, 1996 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1997 Src1Mod:$src1_modifiers, Src1RC:$src1, 1998 Src2Mod:$src2_modifiers, Src2RC:$src2, 1999 Clamp0:$clamp, omod0:$omod), 2000 !if (HasClamp, 2001 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2002 Src1Mod:$src1_modifiers, Src1RC:$src1, 2003 Src2Mod:$src2_modifiers, Src2RC:$src2, 2004 Clamp0:$clamp), 2005 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2006 Src1Mod:$src1_modifiers, Src1RC:$src1, 2007 Src2Mod:$src2_modifiers, Src2RC:$src2))), 2008 // VOP3 with modifiers except src2 2009 !if (HasOMod, 2010 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2011 Src1Mod:$src1_modifiers, Src1RC:$src1, 2012 Src2RC:$src2, Clamp0:$clamp, omod0:$omod), 2013 !if (HasClamp, 2014 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2015 Src1Mod:$src1_modifiers, Src1RC:$src1, 2016 Src2RC:$src2, Clamp0:$clamp), 2017 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2018 Src1Mod:$src1_modifiers, Src1RC:$src1, 2019 Src2RC:$src2)))) 2020 /* else */, 2021 // VOP3 without modifiers 2022 !if (HasClamp, 2023 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, Clamp0:$clamp), 2024 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)) 2025 /* endif */ )))); 2026} 2027 2028class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, 2029 RegisterOperand Src2RC, int NumSrcArgs, 2030 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 2031 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> { 2032 // getInst64 handles clamp and omod. implicit mutex between vop3p and omod 2033 dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, 2034 HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 2035 Src0Mod, Src1Mod, Src2Mod>.ret; 2036 dag opsel = (ins op_sel0:$op_sel); 2037 dag ret = !con(base, !if(HasOpSel, opsel, (ins))); 2038} 2039 2040class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, 2041 RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel, 2042 bit HasNeg, 2043 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 2044 dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, 2045 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, 2046 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret; 2047 2048 dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); 2049 dag vop3p_neg = !if(HasNeg, (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi), (ins)); 2050 2051 dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg); 2052 dag ret = !con(base, vop3pFields); 2053} 2054 2055class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, 2056 RegisterOperand Src2RC, int NumSrcArgs, 2057 bit HasClamp, bit HasOMod, 2058 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 2059 dag ret = getInsVOP3Base<Src0RC, Src1RC, 2060 Src2RC, NumSrcArgs, 2061 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod, 2062 Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret; 2063} 2064 2065class getInsDPPBase <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 2066 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 2067 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> { 2068 dag ret = !if(!eq(NumSrcArgs, 0), 2069 // VOP1 without input operands (V_NOP) 2070 (ins ), 2071 !con( 2072 !if(HasOld ,(ins OldRC:$old), (ins)), 2073 !if (!eq(NumSrcArgs, 1), 2074 !if (HasModifiers, 2075 // VOP1_DPP with modifiers 2076 (ins Src0Mod:$src0_modifiers, Src0RC:$src0) 2077 /* else */, 2078 // VOP1_DPP without modifiers 2079 (ins Src0RC:$src0) 2080 /* endif */), 2081 !if (!eq(NumSrcArgs, 2), 2082 !if (HasModifiers, 2083 // VOP2_DPP with modifiers 2084 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2085 Src1Mod:$src1_modifiers, Src1RC:$src1) 2086 /* else */, 2087 // VOP2_DPP without modifiers 2088 (ins Src0RC:$src0, Src1RC:$src1) 2089 ) 2090 /* NumSrcArgs == 3, VOP3 */, 2091 !if (HasModifiers, 2092 // VOP3_DPP with modifiers 2093 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2094 Src1Mod:$src1_modifiers, Src1RC:$src1, 2095 Src2Mod:$src2_modifiers, Src2RC:$src2) 2096 /* else */, 2097 // VOP3_DPP without modifiers 2098 (ins Src0RC:$src0, Src1RC:$src1, 2099 Src2RC:$src2) 2100 ) 2101 ) 2102 ) 2103 ) 2104 ); 2105} 2106 2107class getInsDPP <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 2108 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 2109 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 2110 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 2111 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 2112 (ins dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 2113 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl)); 2114} 2115 2116class getInsDPP16 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 2117 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 2118 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 2119 dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 2120 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 2121 (ins Dpp16FI:$fi)); 2122} 2123 2124class getInsDPP8 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 2125 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 2126 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 2127 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 2128 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 2129 (ins dpp8:$dpp8, Dpp8FI:$fi)); 2130} 2131 2132class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> { 2133 dag old = ( ins OldRC:$old ); 2134 dag base = VOP3Base; 2135 dag ret = !con( 2136 !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)), 2137 base 2138 ); 2139} 2140 2141class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 2142 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 2143 (ins dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 2144 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl)); 2145} 2146 2147class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 2148 dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 2149 (ins Dpp16FI:$fi)); 2150} 2151 2152class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 2153 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 2154 (ins dpp8:$dpp8, Dpp8FI:$fi)); 2155} 2156 2157// Ins for SDWA 2158class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, 2159 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, 2160 ValueType DstVT> { 2161 2162 dag ret = !if(!eq(NumSrcArgs, 0), 2163 // VOP1 without input operands (V_NOP) 2164 (ins), 2165 !if(!eq(NumSrcArgs, 1), 2166 // VOP1 2167 !if(!not(HasSDWAOMod), 2168 // VOP1_SDWA without omod 2169 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2170 Clamp:$clamp, 2171 dst_sel:$dst_sel, dst_unused:$dst_unused, 2172 src0_sel:$src0_sel), 2173 // VOP1_SDWA with omod 2174 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2175 Clamp:$clamp, omod:$omod, 2176 dst_sel:$dst_sel, dst_unused:$dst_unused, 2177 src0_sel:$src0_sel)), 2178 !if(!eq(NumSrcArgs, 2), 2179 !if(!eq(DstVT.Size, 1), 2180 // VOPC_SDWA 2181 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2182 Src1Mod:$src1_modifiers, Src1RC:$src1, 2183 Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), 2184 // VOP2_SDWA 2185 !if(!not(HasSDWAOMod), 2186 // VOP2_SDWA without omod 2187 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2188 Src1Mod:$src1_modifiers, Src1RC:$src1, 2189 Clamp:$clamp, 2190 dst_sel:$dst_sel, dst_unused:$dst_unused, 2191 src0_sel:$src0_sel, src1_sel:$src1_sel), 2192 // VOP2_SDWA with omod 2193 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2194 Src1Mod:$src1_modifiers, Src1RC:$src1, 2195 Clamp:$clamp, omod:$omod, 2196 dst_sel:$dst_sel, dst_unused:$dst_unused, 2197 src0_sel:$src0_sel, src1_sel:$src1_sel))), 2198 (ins)/* endif */))); 2199} 2200 2201// Outs for DPP 2202class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> { 2203 dag ret = !if(HasDst, 2204 !if(!eq(DstVT.Size, 1), 2205 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions 2206 (outs DstRCDPP:$vdst)), 2207 (outs)); // V_NOP 2208} 2209 2210// Outs for SDWA 2211class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> { 2212 dag ret = !if(HasDst, 2213 !if(!eq(DstVT.Size, 1), 2214 (outs DstRCSDWA:$sdst), 2215 (outs DstRCSDWA:$vdst)), 2216 (outs)); // V_NOP 2217} 2218 2219// Returns the assembly string for the inputs and outputs of a VOP[12C] 2220// instruction. 2221class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 2222 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 2223 string src0 = ", $src0"; 2224 string src1 = ", $src1"; 2225 string src2 = ", $src2"; 2226 string ret = !if(HasDst, dst, "") # 2227 !if(!eq(NumSrcArgs, 1), src0, "") # 2228 !if(!eq(NumSrcArgs, 2), src0#src1, "") # 2229 !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); 2230} 2231 2232class getAsmVOPDPart <int NumSrcArgs, string XorY> { 2233 string dst = "$vdst" # XorY; 2234 string src0 = ", $src0" # XorY; 2235 string src1 = ", $vsrc1" # XorY; 2236 string ret = dst # 2237 !if(!ge(NumSrcArgs, 1), src0, "") # 2238 !if(!ge(NumSrcArgs, 2), src1, ""); 2239} 2240 2241// Returns the assembly string for the inputs and outputs of a VOP3P 2242// instruction. 2243class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasNeg, 2244 bit HasClamp, bit HasOpSel> { 2245 string dst = !if(HasDst, "$vdst"# !if(!gt(NumSrcArgs, 0), ",", ""), ""); 2246 string src0 = !if(!eq(NumSrcArgs, 1), " $src0", " $src0,"); 2247 string src1 = !if(!eq(NumSrcArgs, 1), "", 2248 !if(!eq(NumSrcArgs, 2), " $src1", 2249 " $src1,")); 2250 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2251 2252 string mods = !if(HasNeg, "$neg_lo$neg_hi", ""); 2253 string clamp = !if(HasClamp, "$clamp", ""); 2254 string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", ""); 2255 2256 // Each modifier is printed as an array of bits for each operand, so 2257 // all operands are printed as part of src0_modifiers. 2258 string ret = dst#src0#src1#src2#opsel#mods#clamp; 2259} 2260 2261// FIXME-TRUE16 AsmVOP3OpSel will be deprecated after all 2262// VOP3 16 bit instructions are replaced to true16 format 2263class getAsmVOP3OpSel <int NumSrcArgs, 2264 bit HasClamp, 2265 bit HasOMod, 2266 bit Src0HasMods, 2267 bit Src1HasMods, 2268 bit Src2HasMods> { 2269 string dst = "$vdst"; 2270 2271 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2272 string isrc1 = !if(!eq(NumSrcArgs, 1), "", 2273 !if(!eq(NumSrcArgs, 2), " $src1", 2274 " $src1,")); 2275 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2276 2277 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2278 string fsrc1 = !if(!eq(NumSrcArgs, 1), "", 2279 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2280 " $src1_modifiers,")); 2281 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2282 2283 string src0 = !if(Src0HasMods, fsrc0, isrc0); 2284 string src1 = !if(Src1HasMods, fsrc1, isrc1); 2285 string src2 = !if(Src2HasMods, fsrc2, isrc2); 2286 2287 string clamp = !if(HasClamp, "$clamp", ""); 2288 string omod = !if(HasOMod, "$omod", ""); 2289 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod; 2290} 2291 2292class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2293 string dst = !if(HasDst, 2294 !if(!eq(DstVT.Size, 1), 2295 "$sdst", 2296 "$vdst"), 2297 ""); // use $sdst for VOPC 2298 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2299 string src1 = !if(!eq(NumSrcArgs, 1), "", 2300 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2301 " $src1_modifiers,")); 2302 string args = !if(!not(HasModifiers), 2303 getAsm32<0, NumSrcArgs, DstVT>.ret, 2304 ", "#src0#src1); 2305 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2306} 2307 2308class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2309 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi"; 2310} 2311 2312class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> 2313 : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{ 2314 let ret = dst#args#" $dpp8$fi"; 2315} 2316 2317class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp, 2318 bit HasOpSel, bit HasOMod, bit IsVOP3P, 2319 bit HasNeg, bit Src0HasMods, 2320 bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32, 2321 bit HasByteSel = 0> { 2322 string dst = !if(HasDst, 2323 !if(!eq(DstVT.Size, 1), 2324 "$sdst", 2325 "$vdst"), 2326 ""); // use $sdst for VOPC 2327 string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2328 string src1nomods = !if(!eq(NumSrcArgs, 1), "", 2329 !if(!eq(NumSrcArgs, 2), " $src1", 2330 " $src1,")); 2331 string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2332 2333 string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2334 string src1mods = !if(!eq(NumSrcArgs, 1), "", 2335 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2336 " $src1_modifiers,")); 2337 string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2338 2339 string src0 = !if(Src0HasMods, src0mods, src0nomods); 2340 string src1 = !if(Src1HasMods, src1mods, src1nomods); 2341 string src2 = !if(Src2HasMods, src2mods, src2nomods); 2342 string opsel = !if(HasOpSel, "$op_sel", ""); 2343 string bytesel = !if(HasByteSel, "$byte_sel", ""); 2344 string 3PMods = !if(IsVOP3P, 2345 !if(HasOpSel, "$op_sel_hi", "") 2346 #!if(HasNeg, "$neg_lo$neg_hi", ""), 2347 ""); 2348 string clamp = !if(HasClamp, "$clamp", ""); 2349 string omod = !if(HasOMod, "$omod", ""); 2350 2351 string ret = dst#!if(!eq(NumSrcArgs,0), 2352 "", 2353 !if(HasDst,", ", "")#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod); 2354} 2355 2356class getAsmVOP3DPP<string base> { 2357 string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2358} 2359 2360class getAsmVOP3DPP16<string base> { 2361 string ret = getAsmVOP3DPP<base>.ret # "$fi"; 2362} 2363 2364class getAsmVOP3DPP8<string base> { 2365 string ret = base # " $dpp8$fi"; 2366} 2367 2368 2369class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 2370 string dst = !if(HasDst, 2371 !if(!eq(DstVT.Size, 1), 2372 " vcc", // use vcc token as dst for VOPC instructions 2373 "$vdst"), 2374 ""); 2375 string src0 = "$src0_modifiers"; 2376 string src1 = "$src1_modifiers"; 2377 string args = !if(!eq(NumSrcArgs, 0), 2378 "", 2379 !if(!eq(NumSrcArgs, 1), 2380 ", "#src0#"$clamp", 2381 ", "#src0#", "#src1#"$clamp" 2382 ) 2383 ); 2384 string sdwa = !if(!eq(NumSrcArgs, 0), 2385 "", 2386 !if(!eq(NumSrcArgs, 1), 2387 " $dst_sel $dst_unused $src0_sel", 2388 !if(!eq(DstVT.Size, 1), 2389 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC 2390 " $dst_sel $dst_unused $src0_sel $src1_sel" 2391 ) 2392 ) 2393 ); 2394 string ret = dst#args#sdwa; 2395} 2396 2397class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs, 2398 ValueType DstVT = i32> { 2399 string dst = !if(HasDst, 2400 !if(!eq(DstVT.Size, 1), 2401 "$sdst", // VOPC 2402 "$vdst"), // VOP1/2 2403 ""); 2404 string src0 = "$src0_modifiers"; 2405 string src1 = "$src1_modifiers"; 2406 string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod"); 2407 string args = !if(!eq(NumSrcArgs, 0), "", 2408 !if(!eq(NumSrcArgs, 1), 2409 ", "#src0, 2410 ", "#src0#", "#src1 2411 ) 2412 ); 2413 string sdwa = !if(!eq(NumSrcArgs, 0), "", 2414 !if(!eq(NumSrcArgs, 1), 2415 out_mods#" $dst_sel $dst_unused $src0_sel", 2416 !if(!eq(DstVT.Size, 1), 2417 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC 2418 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" 2419 ) 2420 ) 2421 ); 2422 string ret = dst#args#sdwa; 2423} 2424 2425class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT, 2426 ValueType Src1VT> { 2427 bit ret = !if(!eq(NumSrcArgs, 3), 2428 0, 2429 !if(!eq(DstVT.Size, 64), 2430 1, 2431 !if(!eq(Src0VT.Size, 64), 2432 1, 2433 !if(!eq(Src1VT.Size, 64), 2434 1, 2435 0 2436 ) 2437 ) 2438 ) 2439 ); 2440} 2441 2442class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2443 ValueType Src1VT = i32> { 2444 bit ret = !if(!eq(NumSrcArgs, 3), 2445 0, // NumSrcArgs == 3 - No SDWA for VOP3 2446 !if(!eq(DstVT.Size, 64), 2447 0, // 64-bit dst - No SDWA for 64-bit operands 2448 !if(!eq(Src0VT.Size, 64), 2449 0, // 64-bit src0 2450 !if(!eq(Src1VT.Size, 64), 2451 0, // 64-bit src2 2452 1 2453 ) 2454 ) 2455 ) 2456 ); 2457} 2458 2459class getHasDPP <int NumSrcArgs> { 2460 bit ret = !if(!eq(NumSrcArgs, 3), 2461 0, // NumSrcArgs == 3 - No DPP for VOP3 2462 1); 2463} 2464 2465class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2466 ValueType Src1VT = i32> { 2467 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2468 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret)); 2469} 2470 2471class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2472 ValueType Src1VT = i32> { 2473 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2474 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2475} 2476 2477// Function that checks if instruction supports DPP and SDWA 2478class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2479 ValueType Src1VT = i32> { 2480 bit ret = !or(getHasDPP<NumSrcArgs>.ret, 2481 getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2482} 2483 2484// Return an AGPR+VGPR operand class for the given VGPR register class. 2485class getLdStRegisterOperand<RegisterClass RC> { 2486 RegisterOperand ret = 2487 !cond(!eq(RC.Size, 32) : AVLdSt_32, 2488 !eq(RC.Size, 64) : AVLdSt_64, 2489 !eq(RC.Size, 96) : AVLdSt_96, 2490 !eq(RC.Size, 128) : AVLdSt_128, 2491 !eq(RC.Size, 160) : AVLdSt_160, 2492 !eq(RC.Size, 1024) : AVLdSt_1024); 2493} 2494 2495class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32, 2496 ValueType Src1VT = i32, ValueType Src2VT = i32> { 2497 bit ret = !if(!eq(DstVT.Size, 64), 2498 0, // 64-bit dst No DPP for 64-bit operands 2499 !if(!eq(Src0VT.Size, 64), 2500 0, // 64-bit src0 2501 !if(!eq(Src1VT.Size, 64), 2502 0, // 64-bit src1 2503 !if(!eq(Src2VT.Size, 64), 2504 0, // 64-bit src2 2505 1 2506 ) 2507 ) 2508 ) 2509 ); 2510} 2511 2512 2513def PatGenMode { 2514 int NoPattern = 0; 2515 int Pattern = 1; 2516} 2517 2518class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { 2519 2520 field list<ValueType> ArgVT = _ArgVT; 2521 field bit EnableClamp = _EnableClamp; 2522 field bit IsTrue16 = 0; 2523 field bit IsRealTrue16 = 0; 2524 2525 field ValueType DstVT = ArgVT[0]; 2526 field ValueType Src0VT = ArgVT[1]; 2527 field ValueType Src1VT = ArgVT[2]; 2528 field ValueType Src2VT = ArgVT[3]; 2529 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; 2530 field RegisterOperand DstRCDPP = DstRC; 2531 field RegisterOperand DstRC64 = DstRC; 2532 field RegisterOperand DstRCVOP3DPP = DstRC64; 2533 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret; 2534 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT, IsTrue16>.ret; 2535 field RegisterOperand Src1RC32 = getVregSrcForVT<Src1VT>.ret; 2536 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; 2537 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret; 2538 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret; 2539 field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret; 2540 field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret; 2541 field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret; 2542 field RegisterOperand Src0VOP3DPP = VGPRSrc_32; 2543 field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret; 2544 field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret; 2545 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; 2546 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; 2547 field Operand Src0Mod = getSrc0Mod<Src0VT, DstVT>.ret; 2548 field Operand Src1Mod = getSrcMod<Src1VT>.ret; 2549 field Operand Src2Mod = getSrcMod<Src2VT>.ret; 2550 field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret; 2551 field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret; 2552 field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret; 2553 field Operand Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT>.ret; 2554 field Operand Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT>.ret; 2555 field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret; 2556 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; 2557 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; 2558 2559 2560 field bit IsMAI = 0; 2561 field bit IsVOP3P = 0; 2562 field bit IsDOT = 0; 2563 field bit IsSingle = 0; 2564 field bit IsWMMA = 0; 2565 field bit IsSWMMAC = 0; 2566 2567 field bit IsFP8SrcByteSel = 0; 2568 field bit IsFP8DstByteSel = 0; 2569 field bit HasFP8DstByteSel = 0; 2570 field bit HasFP4DstByteSel = 0; 2571 field bit IsFP8ByteSel = !or(IsFP8SrcByteSel, IsFP8DstByteSel); 2572 2573 field bit HasDst = !ne(DstVT.Value, untyped.Value); 2574 field bit HasDst32 = HasDst; 2575 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case 2576 field bit EmitDstSel = EmitDst; 2577 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret; 2578 field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value); 2579 field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value); 2580 field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value); 2581 2582 field bit HasSrc0FloatMods = Src0VT.isFP; 2583 field bit HasSrc1FloatMods = Src1VT.isFP; 2584 field bit HasSrc2FloatMods = Src2VT.isFP; 2585 2586 field bit HasSrc0IntMods = isIntType<Src0VT>.ret; 2587 field bit HasSrc1IntMods = isIntType<Src1VT>.ret; 2588 field bit HasSrc2IntMods = isIntType<Src2VT>.ret; 2589 2590 field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp); 2591 field bit HasSDWAClamp = EmitDst; 2592 field bit HasFPClamp = !and(DstVT.isFP, HasClamp); 2593 field bit HasIntClamp = !if(DstVT.isFP, 0, HasClamp); 2594 field bit HasClampLo = HasClamp; 2595 field bit HasClampHi = !and(DstVT.isVector, HasClamp); 2596 field bit HasHigh = 0; 2597 2598 field bit IsPacked = Src0VT.isVector; 2599 field bit HasOpSel = IsPacked; 2600 field bit HasOMod = !if(IsVOP3P, 0, DstVT.isFP); 2601 field bit HasSDWAOMod = DstVT.isFP; 2602 2603 field bit HasModifiers = !or(isModifierType<Src0VT>.ret, 2604 isModifierType<Src1VT>.ret, 2605 isModifierType<Src2VT>.ret, 2606 HasOMod); 2607 field bit HasNeg = HasModifiers; 2608 2609 field bit HasSrc0Mods = HasModifiers; 2610 field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0); 2611 field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); 2612 2613 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2614 field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret; 2615 field bit HasExtDPP = !or(getHasDPP<NumSrcArgs>.ret, HasExtVOP3DPP); 2616 field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2617 field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2618 field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2619 field bit HasExtSDWA9 = HasExtSDWA; 2620 field int NeedPatGen = PatGenMode.NoPattern; 2621 2622 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); 2623 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); 2624 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods); 2625 2626 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs)); 2627 2628 // VOP3b instructions are a special case with a second explicit 2629 // output. This is manually overridden for them. 2630 field dag Outs32 = Outs; 2631 field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs)); 2632 field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; 2633 field dag OutsDPP8 = OutsDPP; 2634 field dag OutsVOP3DPP = getOutsDPP<HasDst, DstVT, DstRCVOP3DPP>.ret; 2635 field dag OutsVOP3DPP8 = OutsVOP3DPP; 2636 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret; 2637 2638 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; 2639 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 2640 HasClamp, HasModifiers, HasSrc2Mods, 2641 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; 2642 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, 2643 NumSrcArgs, HasClamp, HasOpSel, HasNeg, 2644 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; 2645 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, 2646 NumSrcArgs, HasClamp, HasOMod, 2647 Src0Mod, Src1Mod, Src2Mod>.ret; 2648 field dag InsDPP = !if(HasExtDPP, 2649 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2650 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret, 2651 (ins)); 2652 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2653 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2654 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, 2655 NumSrcArgs, HasModifiers, 2656 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2657 defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, 2658 Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 2659 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret; 2660 defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP, 2661 Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, HasNeg, 2662 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret; 2663 2664 field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase); 2665 2666 field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2667 field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2668 field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2669 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, 2670 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA, 2671 DstVT>.ret; 2672 field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X); 2673 // It is a slight misnomer to use the deferred f32 operand type for non-float 2674 // operands, but this operand type will only be used if the other dual 2675 // component is FMAAK or FMAMK 2676 field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X); 2677 field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y); 2678 field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y); 2679 2680 2681 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; 2682 field string AsmDPP = !if(HasExtDPP, 2683 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, ""); 2684 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; 2685 // DPP8 encoding has no fields for modifiers, and it is enforced by setting 2686 // the asm operand name via this HasModifiers flag 2687 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret; 2688 field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp, 2689 HasOpSel, HasOMod, IsVOP3P, HasNeg, HasModifiers, HasModifiers, 2690 HasModifiers, DstVT, IsFP8ByteSel>.ret; 2691 field string Asm64 = AsmVOP3Base; 2692 field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasNeg, HasClamp, HasOpSel>.ret; 2693 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, 2694 HasClamp, 2695 HasOMod, 2696 HasSrc0FloatMods, 2697 HasSrc1FloatMods, 2698 HasSrc2FloatMods>.ret; 2699 field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret; 2700 field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret; 2701 field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret; 2702 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret; 2703 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret; 2704 field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret; 2705 field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret; 2706 field string TieRegDPP = "$old"; 2707 field bit IsSMFMAC = false; 2708 field bit HasAbid = !and(IsMAI, HasSrc1); 2709} 2710 2711 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { 2712 let HasExt = 0; 2713 let HasExtDPP = 0; 2714 let HasExtVOP3DPP = 0; 2715 let HasExt32BitDPP = 0; 2716 let HasExt64BitDPP = 0; 2717 let HasExtSDWA = 0; 2718 let HasExtSDWA9 = 0; 2719} 2720 2721class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> { 2722 let NeedPatGen = mode; 2723} 2724 2725// VOPC_Profile_t16, VOPC_NoSdst_Profile_t16, VOPC_Class_Profile_t16, 2726// VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this 2727// class, so copy changes to this class in those profiles 2728class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> { 2729 let IsTrue16 = 1; 2730 let IsRealTrue16 = 1; 2731 2732 let HasOpSel = 1; 2733 let HasModifiers = 1; // All instructions at least have OpSel. 2734 2735 // Most DstVT are 16-bit, but not all. 2736 let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret; 2737 let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2738 let Src1RC32 = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2739 let Src0DPP = getVregSrcForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2740 let Src1DPP = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2741 let Src2DPP = getVregSrcForVT<Src2VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2742 let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret; 2743 let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret; 2744 let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret; 2745 let Src0VOP3DPP = !if (!eq(Src0VT.Size, 16), VGPRSrc_16, VGPRSrc_32); 2746 let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0 /*IsFake16*/>.ret; 2747 let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0 /*IsFake16*/>.ret; 2748 let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret; 2749 let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0 /*IsFake16*/>.ret; 2750 let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0 /*IsFake16*/>.ret; 2751 2752 let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret; 2753 let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret; 2754 let Src1RC64 = getVOP3SrcForVT<Src1VT, 1 /*IsTrue16*/>.ret; 2755 let Src2RC64 = getVOP3SrcForVT<Src2VT, 1 /*IsTrue16*/>.ret; 2756 let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 2757 let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/, 0/*IsFake16*/>.ret; 2758 let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/, 0/*IsFake16*/>.ret; 2759} 2760 2761class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> { 2762 let IsTrue16 = 1; 2763 // Most DstVT are 16-bit, but not all 2764 let DstRC = getVALUDstForVT_fake16<DstVT>.ret; 2765 let DstRC64 = getVALUDstForVT<DstVT>.ret; 2766 let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2767 let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2768 let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2769 let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2770 let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret; 2771 let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret; 2772 let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret; 2773 let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2774 let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/, 1/*IsFake16*/>.ret; 2775 let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/, 1/*IsFake16*/>.ret; 2776 let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1 /*IsFake16*/>.ret; 2777 let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1 /*IsFake16*/>.ret; 2778 let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret; 2779 let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret; 2780 let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret; 2781} 2782 2783def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>; 2784def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; 2785def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; 2786def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; 2787 2788def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; 2789def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; 2790def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; 2791def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; 2792def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>; 2793 2794def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; 2795def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; 2796 2797def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; 2798def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>; 2799def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>; 2800 2801def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; 2802def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; 2803def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; 2804 2805def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; 2806def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; 2807def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; 2808def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; 2809 2810def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>; 2811def VOP_BF16_V2BF16_V2BF16_BF16: VOPProfile <[bf16, v2bf16, v2bf16, bf16]>; 2812def VOP_F32_V2BF16_V2BF16_F32 : VOPProfile <[f32, v2bf16, v2bf16, f32]>; 2813 2814def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; 2815 2816def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; 2817 2818def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; 2819def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; 2820def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; 2821def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>; 2822def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>; 2823def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>; 2824def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>; 2825def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>; 2826def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>; 2827def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>; 2828def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>; 2829def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>; 2830def VOP_F32_BF16 : VOPProfile <[f32, bf16, untyped, untyped]>; 2831 2832def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>; 2833def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>; 2834def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>; 2835def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>; 2836def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; 2837def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; 2838def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; 2839def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; 2840def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>; 2841def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; 2842def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; 2843def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>; 2844def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>; 2845def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>; 2846def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>; 2847def VOP_V6I32_V32F16_F32 : VOPProfile<[v6i32, v32f16, f32, untyped]>; 2848def VOP_V6I32_V32BF16_F32 : VOPProfile<[v6i32, v32bf16, f32, untyped]>; 2849def VOP_V6I32_V16F32_V16F32_F32 : VOPProfile<[v6i32, v16f32, v16f32, f32]>; 2850def VOP_V2F16_I32_F32 : VOPProfile<[v2f16, i32, f32, untyped]>; 2851def VOP_V2I16_F32_F32_F32 : VOPProfile<[v2i16, f32, f32, f32]>; 2852def VOP_V2I16_V2F16_F32 : VOPProfile<[v2i16, v2f16, f32, untyped]>; 2853def VOP_V2I16_V2BF16_F32 : VOPProfile<[v2i16, v2bf16, f32, untyped]>; 2854def VOP_I32_F32_F32_F32 : VOPProfile<[i32, f32, f32, f32]>; 2855def VOP_I32_V2F16_F32_F32 : VOPProfile<[i32, v2f16, f32, f32]>; 2856def VOP_I32_V2BF16_F32_F32: VOPProfile<[i32, v2bf16, f32, f32]>; 2857def VOP_BF16_F32_I32 : VOPProfile<[bf16, f32, i32, untyped]>; 2858def VOP_F16_F32_I32 : VOPProfile<[f16, f32, i32, untyped]>; 2859def VOP_I32_BF16_I32_F32 : VOPProfile<[i32, bf16, i32, f32]>; 2860def VOP_I32_F16_I32_F32 : VOPProfile<[i32, f16, i32, f32]>; 2861def VOP_I32_F32_I32_F32 : VOPProfile<[i32, f32, i32, f32]>; 2862 2863def VOP_V6I32_V32BF16_I32_F32 : VOPProfile<[v6i32, v32bf16, i32, f32]>; 2864def VOP_V6I32_V32F16_I32_F32 : VOPProfile<[v6i32, v32f16, i32, f32]>; 2865def VOP_V6I32_V32F32_I32_F32 : VOPProfile<[v6i32, v32f32, i32, f32]>; 2866 2867def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; 2868def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; 2869def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; 2870 2871def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>; 2872def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>; 2873def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; 2874def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; 2875def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; 2876def VOP_I32_I32_I32_I16 : VOPProfile <[i32, i32, i32, i16]>; 2877def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; 2878def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; 2879def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; 2880def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; 2881def VOP_I16_I32_I32_I32 : VOPProfile <[i16, i32, i32, i32]>; 2882 2883def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>; 2884def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; 2885 2886def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; 2887def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; 2888def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>; 2889def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; 2890def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; 2891def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>; 2892def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; 2893def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; 2894def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>; 2895def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; 2896def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; 2897def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; 2898 2899def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>; 2900def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>; 2901 2902def VOP_V2F32_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, v2f32]>; 2903def VOP_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, untyped]>; 2904def VOP_V2I32_V2I32_V2I32 : VOPProfile <[v2i32, v2i32, v2i32, untyped]>; 2905def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>; 2906def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>; 2907def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>; 2908 2909def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>; 2910def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>; 2911def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>; 2912def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>; 2913def VOP_V4F32_I64_I64_V4F32 : VOPProfile <[v4f32, i64, i64, v4f32]>; 2914def VOP_V16F32_I64_I64_V16F32 : VOPProfile <[v16f32, i64, i64, v16f32]>; 2915 2916def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>; 2917def VOP_V4F32_V8F16_V16F16_I32 : VOPProfile <[v4f32, v8f16, v16f16, i32]>; 2918def VOP_V4F32_V8BF16_V16BF16_I32 : VOPProfile <[v4f32, v8bf16, v16bf16, i32]>; 2919def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>; 2920def VOP_V16F32_V8F16_V16F16_I32 : VOPProfile <[v16f32, v8f16, v16f16, i32]>; 2921def VOP_V16F32_V8BF16_V16BF16_I32 : VOPProfile <[v16f32, v8bf16, v16bf16, i32]>; 2922def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>; 2923def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>; 2924def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>; 2925def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>; 2926def VOP_V4F32_V2I32_V4I32_I32 : VOPProfile <[v4f32, v2i32, v4i32, i32]>; 2927def VOP_V16F32_V2I32_V4I32_I32 : VOPProfile <[v16f32, v2i32, v4i32, i32]>; 2928def VOP_V4I32_V4I32_V8I32_I32 : VOPProfile <[v4i32, v4i32, v8i32, i32]>; 2929def VOP_V16I32_V4I32_V8I32_I32 : VOPProfile <[v16i32, v4i32, v8i32, i32]>; 2930def VOP_V4F32_V4I32_V8I32_I32 : VOPProfile <[v4f32, v4i32, v8i32, i32]>; 2931def VOP_V16F32_V4I32_V8I32_I32 : VOPProfile <[v16f32, v4i32, v8i32, i32]>; 2932 2933def VOP_V4F32_V8F16_V8F16_V4F32 : VOPProfile <[v4f32, v8f16, v8f16, v4f32]>; 2934def VOP_V16F32_V8F16_V8F16_V16F32 : VOPProfile <[v16f32, v8f16, v8f16, v16f32]>; 2935def VOP_V16F32_V8BF16_V8BF16_V16F32 : VOPProfile <[v16f32, v8bf16, v8bf16, v16f32]>; 2936def VOP_V4F32_V8BF16_V8BF16_V4F32 : VOPProfile <[v4f32, v8bf16, v8bf16, v4f32]>; 2937def VOP_V4F32_V8I32_V8I32_V4F32 : VOPProfile <[v4f32, v8i32, v8i32, v4f32]>; 2938 2939def VOP_V4F32_V8I32_V6I32_V4F32 : VOPProfile <[v4f32, v8i32, v6i32, v4f32]>; 2940def VOP_V4F32_V6I32_V8I32_V4F32 : VOPProfile <[v4f32, v6i32, v8i32, v4f32]>; 2941def VOP_V4F32_V6I32_V6I32_V4F32 : VOPProfile <[v4f32, v6i32, v6i32, v4f32]>; 2942 2943def VOP_V4F32_V8I32_V4I32_V4F32 : VOPProfile <[v4f32, v8i32, v4i32, v4f32]>; 2944def VOP_V4F32_V4I32_V8I32_V4F32 : VOPProfile <[v4f32, v4i32, v8i32, v4f32]>; 2945def VOP_V4F32_V6I32_V4I32_V4F32 : VOPProfile <[v4f32, v6i32, v4i32, v4f32]>; 2946def VOP_V4F32_V4I32_V6I32_V4F32 : VOPProfile <[v4f32, v4i32, v6i32, v4f32]>; 2947def VOP_V4F32_V4I32_V4I32_V4F32 : VOPProfile <[v4f32, v4i32, v4i32, v4f32]>; 2948 2949def VOP_V16F32_V8I32_V8I32_V16F32 : VOPProfile <[v16f32, v8i32, v8i32, v16f32]>; 2950def VOP_V16F32_V8I32_V6I32_V16F32 : VOPProfile <[v16f32, v8i32, v6i32, v16f32]>; 2951def VOP_V16F32_V6I32_V8I32_V16F32 : VOPProfile <[v16f32, v6i32, v8i32, v16f32]>; 2952def VOP_V16F32_V6I32_V6I32_V16F32 : VOPProfile <[v16f32, v6i32, v6i32, v16f32]>; 2953 2954def VOP_V16F32_V8I32_V4I32_V16F32 : VOPProfile <[v16f32, v8i32, v4i32, v16f32]>; 2955def VOP_V16F32_V4I32_V8I32_V16F32 : VOPProfile <[v16f32, v4i32, v8i32, v16f32]>; 2956def VOP_V16F32_V6I32_V4I32_V16F32 : VOPProfile <[v16f32, v6i32, v4i32, v16f32]>; 2957def VOP_V16F32_V4I32_V6I32_V16F32 : VOPProfile <[v16f32, v4i32, v6i32, v16f32]>; 2958def VOP_V16F32_V4I32_V4I32_V16F32 : VOPProfile <[v16f32, v4i32, v4i32, v16f32]>; 2959 2960def VOP_V4I32_V4I32_V4I32_V4I32 : VOPProfile <[v4i32, v4i32, v4i32, v4i32]>; 2961def VOP_V16I32_V4I32_V4I32_V16I32 : VOPProfile <[v16i32, v4i32, v4i32, v16i32]>; 2962 2963 2964class Commutable_REV <string revOp, bit isOrig> { 2965 string RevOp = revOp; 2966 bit IsOrig = isOrig; 2967} 2968 2969//===----------------------------------------------------------------------===// 2970// Interpolation opcodes 2971//===----------------------------------------------------------------------===// 2972 2973class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">; 2974 2975class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : 2976 VINTRPCommon <outs, ins, "", pattern>, 2977 SIMCInstr<opName, SIEncodingFamily.NONE> { 2978 let isPseudo = 1; 2979 let isCodeGenOnly = 1; 2980} 2981 2982// FIXME-GFX10: WIP. 2983class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins, 2984 string asm, int encodingFamily> : 2985 VINTRPCommon <outs, ins, asm, []>, 2986 VINTRPe <op>, 2987 SIMCInstr<opName, encodingFamily> { 2988} 2989 2990class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins, 2991 string asm> : 2992 VINTRPCommon <outs, ins, asm, []>, 2993 VINTRPe_vi <op>, 2994 SIMCInstr<opName, SIEncodingFamily.VI> { 2995 let AssemblerPredicate = isGFX8GFX9; 2996 let DecoderNamespace = "GFX8"; 2997} 2998 2999// FIXME-GFX10: WIP. 3000multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm, 3001 list<dag> pattern = []> { 3002 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>; 3003 3004 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 3005 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>; 3006 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 3007 3008 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>; 3009 3010 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 3011 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>; 3012 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 3013} 3014 3015//===----------------------------------------------------------------------===// 3016// Vector instruction mappings 3017//===----------------------------------------------------------------------===// 3018 3019// Maps an opcode in e32 form to its e64 equivalent 3020def getVOPe64 : InstrMapping { 3021 let FilterClass = "VOP"; 3022 let RowFields = ["OpName"]; 3023 let ColFields = ["Size", "VOP3"]; 3024 let KeyCol = ["4", "0"]; 3025 let ValueCols = [["8", "1"]]; 3026} 3027 3028// Maps an opcode in e64 form to its e32 equivalent 3029def getVOPe32 : InstrMapping { 3030 let FilterClass = "VOP"; 3031 let RowFields = ["OpName"]; 3032 let ColFields = ["Size", "VOP3"]; 3033 let KeyCol = ["8", "1"]; 3034 let ValueCols = [["4", "0"]]; 3035} 3036 3037// Maps ordinary instructions to their SDWA counterparts 3038def getSDWAOp : InstrMapping { 3039 let FilterClass = "VOP"; 3040 let RowFields = ["OpName"]; 3041 let ColFields = ["AsmVariantName"]; 3042 let KeyCol = ["Default"]; 3043 let ValueCols = [["SDWA"]]; 3044} 3045 3046// Maps SDWA instructions to their ordinary counterparts 3047def getBasicFromSDWAOp : InstrMapping { 3048 let FilterClass = "VOP"; 3049 let RowFields = ["OpName"]; 3050 let ColFields = ["AsmVariantName"]; 3051 let KeyCol = ["SDWA"]; 3052 let ValueCols = [["Default"]]; 3053} 3054 3055// Maps ordinary instructions to their DPP counterparts 3056def getDPPOp32 : InstrMapping { 3057 let FilterClass = "VOP"; 3058 let RowFields = ["OpName"]; 3059 let ColFields = ["AsmVariantName"]; 3060 let KeyCol = ["Default"]; 3061 let ValueCols = [["DPP"]]; 3062} 3063 3064def getDPPOp64 : InstrMapping { 3065 let FilterClass = "VOP"; 3066 let RowFields = ["OpName"]; 3067 let ColFields = ["AsmVariantName"]; 3068 let KeyCol = ["VOP3"]; 3069 let ValueCols = [["VOP3_DPP"]]; 3070} 3071 3072// Maps an commuted opcode to its original version 3073def getCommuteOrig : InstrMapping { 3074 let FilterClass = "Commutable_REV"; 3075 let RowFields = ["RevOp"]; 3076 let ColFields = ["IsOrig"]; 3077 let KeyCol = ["0"]; 3078 let ValueCols = [["1"]]; 3079} 3080 3081// Maps an original opcode to its commuted version 3082def getCommuteRev : InstrMapping { 3083 let FilterClass = "Commutable_REV"; 3084 let RowFields = ["RevOp"]; 3085 let ColFields = ["IsOrig"]; 3086 let KeyCol = ["1"]; 3087 let ValueCols = [["0"]]; 3088} 3089 3090def getMCOpcodeGen : InstrMapping { 3091 let FilterClass = "SIMCInstr"; 3092 let RowFields = ["PseudoInstr"]; 3093 let ColFields = ["Subtarget"]; 3094 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; 3095 // These columns must be kept in sync with the SIEncodingFamily enumeration. 3096 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], 3097 [!cast<string>(SIEncodingFamily.VI)], 3098 [!cast<string>(SIEncodingFamily.SDWA)], 3099 [!cast<string>(SIEncodingFamily.SDWA9)], 3100 // GFX80 encoding is added to work around a multiple matching 3101 // issue for buffer instructions with unpacked d16 data. This 3102 // does not actually change the encoding, and thus may be 3103 // removed later. 3104 [!cast<string>(SIEncodingFamily.GFX80)], 3105 [!cast<string>(SIEncodingFamily.GFX9)], 3106 [!cast<string>(SIEncodingFamily.GFX10)], 3107 [!cast<string>(SIEncodingFamily.SDWA10)], 3108 [!cast<string>(SIEncodingFamily.GFX90A)], 3109 [!cast<string>(SIEncodingFamily.GFX940)], 3110 [!cast<string>(SIEncodingFamily.GFX11)], 3111 [!cast<string>(SIEncodingFamily.GFX12)]]; 3112} 3113 3114// Get equivalent SOPK instruction. 3115def getSOPKOp : InstrMapping { 3116 let FilterClass = "SOPKInstTable"; 3117 let RowFields = ["BaseCmpOp"]; 3118 let ColFields = ["IsSOPK"]; 3119 let KeyCol = ["0"]; 3120 let ValueCols = [["1"]]; 3121} 3122 3123def getAddr64Inst : InstrMapping { 3124 let FilterClass = "MUBUFAddr64Table"; 3125 let RowFields = ["OpName"]; 3126 let ColFields = ["IsAddr64"]; 3127 let KeyCol = ["0"]; 3128 let ValueCols = [["1"]]; 3129} 3130 3131def getIfAddr64Inst : InstrMapping { 3132 let FilterClass = "MUBUFAddr64Table"; 3133 let RowFields = ["OpName"]; 3134 let ColFields = ["IsAddr64"]; 3135 let KeyCol = ["1"]; 3136 let ValueCols = [["1"]]; 3137} 3138 3139// Maps a GLOBAL to its SADDR form. 3140def getGlobalSaddrOp : InstrMapping { 3141 let FilterClass = "GlobalSaddrTable"; 3142 let RowFields = ["SaddrOp"]; 3143 let ColFields = ["IsSaddr"]; 3144 let KeyCol = ["0"]; 3145 let ValueCols = [["1"]]; 3146} 3147 3148// Maps a GLOBAL SADDR to its VADDR form. 3149def getGlobalVaddrOp : InstrMapping { 3150 let FilterClass = "GlobalSaddrTable"; 3151 let RowFields = ["SaddrOp"]; 3152 let ColFields = ["IsSaddr"]; 3153 let KeyCol = ["1"]; 3154 let ValueCols = [["0"]]; 3155} 3156 3157// Maps a v_cmpx opcode with sdst to opcode without sdst. 3158def getVCMPXNoSDstOp : InstrMapping { 3159 let FilterClass = "VCMPXNoSDstTable"; 3160 let RowFields = ["NoSDstOp"]; 3161 let ColFields = ["HasSDst"]; 3162 let KeyCol = ["1"]; 3163 let ValueCols = [["0"]]; 3164} 3165 3166// Maps a SOPP to a SOPP with S_NOP 3167def getSOPPWithRelaxation : InstrMapping { 3168 let FilterClass = "SOPPRelaxTable"; 3169 let RowFields = ["KeyName"]; 3170 let ColFields = ["IsRelaxed"]; 3171 let KeyCol = ["0"]; 3172 let ValueCols = [["1"]]; 3173} 3174 3175// Maps flat scratch opcodes by addressing modes 3176def getFlatScratchInstSTfromSS : InstrMapping { 3177 let FilterClass = "FlatScratchInst"; 3178 let RowFields = ["SVOp"]; 3179 let ColFields = ["Mode"]; 3180 let KeyCol = ["SS"]; 3181 let ValueCols = [["ST"]]; 3182} 3183 3184def getFlatScratchInstSSfromSV : InstrMapping { 3185 let FilterClass = "FlatScratchInst"; 3186 let RowFields = ["SVOp"]; 3187 let ColFields = ["Mode"]; 3188 let KeyCol = ["SV"]; 3189 let ValueCols = [["SS"]]; 3190} 3191 3192def getFlatScratchInstSVfromSVS : InstrMapping { 3193 let FilterClass = "FlatScratchInst"; 3194 let RowFields = ["SVOp"]; 3195 let ColFields = ["Mode"]; 3196 let KeyCol = ["SVS"]; 3197 let ValueCols = [["SV"]]; 3198} 3199 3200def getFlatScratchInstSVfromSS : InstrMapping { 3201 let FilterClass = "FlatScratchInst"; 3202 let RowFields = ["SVOp"]; 3203 let ColFields = ["Mode"]; 3204 let KeyCol = ["SS"]; 3205 let ValueCols = [["SV"]]; 3206} 3207 3208def getMFMAEarlyClobberOp : InstrMapping { 3209 let FilterClass = "MFMATable"; 3210 let RowFields = ["FMAOp"]; 3211 let ColFields = ["IsMac"]; 3212 let KeyCol = ["1"]; 3213 let ValueCols = [["0"]]; 3214} 3215 3216// Map from an mfma using VGPRs to one using AGPRs. 3217def getMFMASrcCVDstAGPROp : InstrMapping { 3218 let FilterClass = "MFMATable"; 3219 let RowFields = ["AGPROp"]; 3220 let ColFields = ["MFMAKind"]; 3221 let KeyCol = ["VGPR"]; 3222 let ValueCols = [["AGPR"]]; 3223} 3224 3225// Maps an v_cmp instruction to its v_cmpx equivalent. 3226def getVCMPXOpFromVCMP : InstrMapping { 3227 let FilterClass = "VCMPVCMPXTable"; 3228 let RowFields = ["VCMPOp"]; 3229 let ColFields = ["IsVCMPX"]; 3230 let KeyCol = ["0"]; 3231 let ValueCols = [["1"]]; 3232} 3233 3234// Map encoded mfma(_scale)?_f8f6f4 instructions depending on the 3235// number of registers required for the used format. 3236def getMFMA_F8F6F4_WithSize : GenericTable { 3237 let FilterClass = "MFMA_F8F6F4_WithSizeTable"; 3238 let CppTypeName = "MFMA_F8F6F4_Info"; 3239 let Fields = [ "Opcode", "F8F8Opcode", "NumRegsSrcA", "NumRegsSrcB" ]; 3240 let PrimaryKey = [ "NumRegsSrcA", "NumRegsSrcB", "F8F8Opcode" ]; 3241 let PrimaryKeyName = "getMFMA_F8F6F4_InstWithNumRegs" ; 3242} 3243 3244def isMFMA_F8F6F4Table : GenericTable { 3245 let FilterClass = "MFMA_F8F6F4_WithSizeTable"; 3246 let CppTypeName = "MFMA_F8F6F4_Info"; 3247// let Fields = [ "Opcode" ]; 3248 let Fields = [ "Opcode", "F8F8Opcode", "NumRegsSrcA", "NumRegsSrcB" ]; 3249 let PrimaryKey = [ "Opcode" ]; 3250 let PrimaryKeyName = "isMFMA_F8F6F4" ; 3251} 3252 3253def FP4FP8DstByteSelTable : GenericTable { 3254 let FilterClass = "VOP3_Pseudo"; 3255 let CppTypeName = "FP4FP8DstByteSelInfo"; 3256 let Fields = ["Opcode", "HasFP8DstByteSel", "HasFP4DstByteSel"]; 3257 3258 let PrimaryKey = ["Opcode"]; 3259 let PrimaryKeyName = "getFP4FP8DstByteSelHelper"; 3260} 3261 3262def VOPDComponentTable : GenericTable { 3263 let FilterClass = "VOPD_Component"; 3264 let CppTypeName = "VOPDComponentInfo"; 3265 let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"]; 3266 let PrimaryKey = ["BaseVOP"]; 3267 let PrimaryKeyName = "getVOPDComponentHelper"; 3268} 3269 3270def getVOPDBaseFromComponent : SearchIndex { 3271 let Table = VOPDComponentTable; 3272 let Key = ["VOPDOp"]; 3273} 3274 3275def VOPDPairs : GenericTable { 3276 let FilterClass = "VOPD_Base"; 3277 let CppTypeName = "VOPDInfo"; 3278 let Fields = ["Opcode", "OpX", "OpY", "SubTgt"]; 3279 let PrimaryKey = ["Opcode"]; 3280 let PrimaryKeyName = "getVOPDOpcodeHelper"; 3281} 3282 3283def getVOPDInfoFromComponentOpcodes : SearchIndex { 3284 let Table = VOPDPairs; 3285 let Key = ["OpX", "OpY", "SubTgt"]; 3286} 3287 3288include "SIInstructions.td" 3289 3290include "DSInstructions.td" 3291include "MIMGInstructions.td" 3292