1 2//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// 3// 4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5// See https://llvm.org/LICENSE.txt for license information. 6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7// 8//===----------------------------------------------------------------------===// 9 10//===----------------------------------------------------------------------===// 11// VOP1 Classes 12//===----------------------------------------------------------------------===// 13 14class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 15 bits<8> vdst; 16 bits<9> src0; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 19 let Inst{16-9} = op; 20 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 21 let Inst{31-25} = 0x3f; //encoding 22} 23 24class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 25 bits<8> vdst; 26 27 let Inst{8-0} = 0xf9; // sdwa 28 let Inst{16-9} = op; 29 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 30 let Inst{31-25} = 0x3f; // encoding 31} 32 33class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 34 bits<8> vdst; 35 36 let Inst{8-0} = 0xf9; // sdwa 37 let Inst{16-9} = op; 38 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 39 let Inst{31-25} = 0x3f; // encoding 40} 41 42class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 43 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 44 45 let AsmOperands = P.Asm32; 46 47 let Size = 4; 48 let mayLoad = 0; 49 let mayStore = 0; 50 let hasSideEffects = 0; 51 52 let ReadsModeReg = !or(P.DstVT.isFP, P.Src0VT.isFP); 53 54 let mayRaiseFPException = ReadsModeReg; 55 56 let VOP1 = 1; 57 let VALU = 1; 58 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 59 60 let AsmVariantName = AMDGPUAsmVariants.Default; 61} 62 63class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic > : 64 VOP_Real <ps>, 65 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 66 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 67 68 let VALU = 1; 69 let VOP1 = 1; 70 let isPseudo = 0; 71 let isCodeGenOnly = 0; 72 73 let Constraints = ps.Constraints; 74 let DisableEncoding = ps.DisableEncoding; 75 76 // copy relevant pseudo op flags 77 let SubtargetPredicate = ps.SubtargetPredicate; 78 let OtherPredicates = ps.OtherPredicates; 79 let True16Predicate = ps.True16Predicate; 80 let AsmMatchConverter = ps.AsmMatchConverter; 81 let AsmVariantName = ps.AsmVariantName; 82 let Constraints = ps.Constraints; 83 let DisableEncoding = ps.DisableEncoding; 84 let TSFlags = ps.TSFlags; 85 let UseNamedOperandTable = ps.UseNamedOperandTable; 86 let Uses = ps.Uses; 87 let Defs = ps.Defs; 88 let SchedRW = ps.SchedRW; 89 let mayLoad = ps.mayLoad; 90 let mayStore = ps.mayStore; 91 let TRANS = ps.TRANS; 92 let isConvergent = ps.isConvergent; 93} 94 95class VOP1_Real_Gen <VOP1_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> : 96 VOP1_Real <ps, Gen.Subtarget, real_name> { 97 let AssemblerPredicate = Gen.AssemblerPredicate; 98 let DecoderNamespace = Gen.DecoderNamespace; 99} 100 101class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 102 VOP_SDWA_Pseudo <OpName, P, pattern> { 103 let AsmMatchConverter = "cvtSdwaVOP1"; 104} 105 106class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 107 VOP_DPP_Pseudo <OpName, P, pattern> { 108} 109 110class getVOP1Pat <SDPatternOperator node, VOPProfile P> : LetDummies { 111 list<dag> ret = 112 !if(P.HasModifiers, 113 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], 114 !if(P.HasOMod, 115 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 116 i1:$clamp, i32:$omod))))], 117 [(set P.DstVT:$vdst, (node (P.Src0VT P.Src0RC32:$src0)))] 118 ) 119 ); 120} 121 122multiclass VOP1Inst <string opName, VOPProfile P, 123 SDPatternOperator node = null_frag, int VOPDOp = -1> { 124 // We only want to set this on the basic, non-SDWA or DPP forms. 125 defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"), 126 !eq(opName, "v_mov_b64")); 127 128 let isMoveImm = should_mov_imm in { 129 if !eq(VOPDOp, -1) then 130 def _e32 : VOP1_Pseudo <opName, P>; 131 else 132 // Only for V_MOV_B32 133 def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, opName>; 134 def _e64 : VOP3InstBase <opName, P, node>; 135 } 136 137 if P.HasExtSDWA then 138 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 139 140 if P.HasExtDPP then 141 def _dpp : VOP1_DPP_Pseudo <opName, P>; 142 143 let SubtargetPredicate = isGFX11Plus in { 144 if P.HasExtVOP3DPP then 145 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 146 } // End SubtargetPredicate = isGFX11Plus 147 148 def : LetDummies, AMDGPUMnemonicAlias<opName#"_e32", opName>; 149 def : LetDummies, AMDGPUMnemonicAlias<opName#"_e64", opName>; 150 151 if P.HasExtSDWA then 152 def : LetDummies, AMDGPUMnemonicAlias<opName#"_sdwa", opName>; 153 154 if P.HasExtDPP then 155 def : LetDummies, AMDGPUMnemonicAlias<opName#"_dpp", opName, AMDGPUAsmVariants.DPP>; 156} 157 158multiclass VOP1Inst_t16_with_profiles<string opName, 159 VOPProfile P, 160 VOPProfile P_t16, 161 VOPProfile P_fake16, 162 SDPatternOperator node = null_frag> { 163 let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { 164 defm NAME : VOP1Inst<opName, P, node>; 165 } 166 let OtherPredicates = [UseRealTrue16Insts] in { 167 defm _t16 : VOP1Inst<opName#"_t16", P_t16, node>; 168 } 169 let OtherPredicates = [UseFakeTrue16Insts] in { 170 defm _fake16 : VOP1Inst<opName#"_fake16", P_fake16, node>; 171 } 172} 173 174multiclass VOP1Inst_t16<string opName, VOPProfile P, 175 SDPatternOperator node = null_frag> : 176 VOP1Inst_t16_with_profiles<opName, P, VOPProfile_True16<P>, VOPProfile_Fake16<P>, node>; 177 178// Special profile for instructions which have clamp 179// and output modifiers (but have no input modifiers) 180class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 181 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 182 183 let Ins64 = (ins Src0RC64:$src0, Clamp:$clamp, omod:$omod); 184 let InsVOP3Base = (ins Src0VOP3DPP:$src0, Clamp:$clamp, omod:$omod); 185 let AsmVOP3Base = "$vdst, $src0$clamp$omod"; 186 187 let HasModifiers = 0; 188 let HasClamp = 1; 189} 190 191def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 192def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 193def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 194def VOP1_F16_I16_t16 : VOPProfile_True16 <VOP_F16_I16> { 195 let HasClamp = 1; 196} 197def VOP1_F16_I16_fake16 : VOPProfile_Fake16<VOP_F16_I16> { 198 let HasModifiers = 0; 199 let HasOMod = 1; 200 let HasClamp = 1; 201} 202 203def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{ 204 let HasExtVOP3DPP = 0; 205} 206 207// OMod clears exceptions when set. OMod was always an operand, but its 208// now explicitly set. 209class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> : 210 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 211 212 let HasOMod = 1; 213} 214def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>; 215def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>; 216def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>; 217def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> { 218 let HasOMod = 1; 219} 220def VOP_I16_F16_SPECIAL_OMOD_fake16 : VOPProfile_Fake16<VOP_I16_F16> { 221 let HasOMod = 1; 222} 223 224 225//===----------------------------------------------------------------------===// 226// VOP1 Instructions 227//===----------------------------------------------------------------------===// 228 229defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>; 230 231def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { 232 let InsVOPDX = (ins Src0RC32:$src0X); 233 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X); 234 let InsVOPDY = (ins Src0RC32:$src0Y); 235 let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y); 236} 237 238let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 239defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; 240 241let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in 242defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; 243} // End isMoveImm = 1 244 245def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped, untyped]> { 246 let DstRC = RegisterOperand<SReg_32>; 247 let Src0RC32 = VRegOrLdsSrc_32; 248 let Asm32 = " $vdst, $src0"; 249} 250 251// FIXME: Specify SchedRW for READFIRSTLANE_B32 252// TODO: There is VOP3 encoding also 253def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32", VOP_READFIRSTLANE, 254 [], 1> { 255 let isConvergent = 1; 256} 257 258foreach vt = Reg32Types.types in { 259 def : GCNPat<(vt (int_amdgcn_readfirstlane (vt VRegOrLdsSrc_32:$src0))), 260 (V_READFIRSTLANE_B32 (vt VRegOrLdsSrc_32:$src0)) 261 >; 262} 263 264let isReMaterializable = 1 in { 265let SchedRW = [WriteDoubleCvt] in { 266// OMod clears exceptions when set in this instruction 267defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>; 268 269let mayRaiseFPException = 0 in { 270defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 271} 272 273defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 274defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, any_fpextend>; 275// OMod clears exceptions when set in this instruction 276defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_uint>; 277 278let mayRaiseFPException = 0 in { 279defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 280} 281 282} // End SchedRW = [WriteDoubleCvt] 283 284let SchedRW = [WriteFloatCvt] in { 285 286// XXX: Does this really not raise exceptions? The manual claims the 287// 16-bit ones can. 288let mayRaiseFPException = 0 in { 289defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 290defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 291} 292 293// OMod clears exceptions when set in these 2 instructions 294defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>; 295defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>; 296let FPDPRounding = 1, isReMaterializable = 0 in { 297 // V_CVT_F16_F32 and V_CVT_F32_F16 are special cases because they are 298 // present in targets without Has16BitInsts. Otherwise they could use 299 // class VOP1Inst_t16 300 let OtherPredicates = [NotHasTrue16BitInsts] in 301 defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>; 302 let OtherPredicates = [UseRealTrue16Insts] in 303 defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>; 304 let OtherPredicates = [UseFakeTrue16Insts] in 305 defm V_CVT_F16_F32_fake16 : VOP1Inst <"v_cvt_f16_f32_fake16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>; 306} // End FPDPRounding = 1, isReMaterializable = 0 307let OtherPredicates = [NotHasTrue16BitInsts] in 308 defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>; 309let OtherPredicates = [UseRealTrue16Insts] in 310 defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>; 311let OtherPredicates = [UseFakeTrue16Insts] in 312 defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>; 313 314let SubtargetPredicate = HasBF16ConversionInsts in 315defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>; 316 317let ReadsModeReg = 0, mayRaiseFPException = 0 in { 318defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 319defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 320defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 321} // End ReadsModeReg = 0, mayRaiseFPException = 0 322} // End SchedRW = [WriteFloatCvt] 323 324let ReadsModeReg = 0, mayRaiseFPException = 0 in { 325defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 326defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 327defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 328defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 329} // ReadsModeReg = 0, mayRaiseFPException = 0 330 331defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 332defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 333defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 334defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, froundeven>; 335defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 336 337let TRANS = 1, SchedRW = [WriteTrans32] in { 338defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, AMDGPUexp>; 339defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, AMDGPUlog>; 340defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 341defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 342defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 343defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, int_amdgcn_sqrt>; 344} // End TRANS = 1, SchedRW = [WriteTrans32] 345 346let TRANS = 1, SchedRW = [WriteTrans64] in { 347defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 348defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 349defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, int_amdgcn_sqrt>; 350} // End TRANS = 1, SchedRW = [WriteTrans64] 351 352let TRANS = 1, SchedRW = [WriteTrans32] in { 353defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 354defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 355} // End TRANS = 1, SchedRW = [WriteTrans32] 356 357defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 358defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, DivergentUnaryFrag<bitreverse>>; 359defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; 360defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; 361defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; 362 363let SchedRW = [WriteDoubleAdd] in { 364defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 365defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 366let FPDPRounding = 1 in { 367defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 368} // End FPDPRounding = 1 369} // End SchedRW = [WriteDoubleAdd] 370 371defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 372defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 373} // End isReMaterializable = 1 374 375defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 376 377// Restrict src0 to be VGPR 378def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { 379 let Src0RC32 = VRegSrc_32; 380 let Src0RC64 = VRegSrc_32; 381} 382 383def VOP_PERMLANE_SWAP : VOPProfile<[i32, i32, untyped, untyped]> { 384 let Outs32 = (outs DstRC:$vdst, VRegSrc_32:$src0_out); 385 let Outs64 = (outs DstRC64:$vdst, VRegSrc_32:$src0_out); 386 387 let Src0RC32 = VRegSrc_32; 388 let Src0RC64 = VRegSrc_32; 389 let HasClamp = 0; 390 let HasExtVOP3DPP = 0; 391 let HasExtDPP = 0; 392 let HasExtSDWA = 0; 393 394 let Ins32 = (ins Src0RC64:$vdst_in, Src0RC32:$src0); 395 let Ins64 = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl); 396 let InsVOP3OpSel = (ins Src0RC64:$vdst_in, Src0RC64:$src0, Dpp16FI:$fi, DppBoundCtrl:$bound_ctrl); 397 let Asm64 = "$vdst, $src0$bound_ctrl$fi"; 398 let AsmVOP3OpSel = "$vdst, $src0$bound_ctrl$fi"; 399} 400 401// Special case because there are no true output operands. Hack vdst 402// to be a src operand. The custom inserter must add a tied implicit 403// def and use of the super register since there seems to be no way to 404// add an implicit def of a virtual register in tablegen. 405class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> { 406 let Src0RC32 = VOPDstOperand<VGPR_32>; 407 let Src0RC64 = VOPDstOperand<VGPR_32>; 408 409 let Outs = (outs); 410 let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); 411 let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); 412 let Asm32 = getAsm32<1, 1>.ret; 413 414 let OutsSDWA = (outs Src0RC32:$vdst); 415 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 416 Clamp:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 417 src0_sel:$src0_sel); 418 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 419 420 let OutsDPP = (outs Src0RC32:$vdst); 421 let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, 422 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 423 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi); 424 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 425 let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, Dpp8FI:$fi); 426 let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; 427 428 let OutsVOP3DPP = (outs Src0RC64:$vdst); 429 let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 430 let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 431 let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 432 433 let AsmVOP3Base = 434 getAsmVOP3Base<NumSrcArgs, 1 /* HasDst */, HasClamp, 435 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 436 HasModifiers, HasModifiers, HasModifiers>.ret; 437 438 let HasDst = 0; 439 let EmitDst = 1; // force vdst emission 440} 441 442def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>; 443def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>; 444 445let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 446 // v_movreld_b32 is a special case because the destination output 447 // register is really a source. It isn't actually read (but may be 448 // written), and is only to provide the base register to start 449 // indexing from. Tablegen seems to not let you define an implicit 450 // virtual register output for the super register being written into, 451 // so this must have an implicit def of the register added to it. 452defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 453defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; 454defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; 455} // End Uses = [M0, EXEC] 456 457let isReMaterializable = 1 in { 458let SubtargetPredicate = isGFX6GFX7 in { 459 let TRANS = 1, SchedRW = [WriteTrans32] in { 460 defm V_LOG_CLAMP_F32 : 461 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 462 defm V_RCP_CLAMP_F32 : 463 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 464 defm V_RCP_LEGACY_F32 : 465 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 466 defm V_RSQ_CLAMP_F32 : 467 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 468 defm V_RSQ_LEGACY_F32 : 469 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; 470 } // End TRANS = 1, SchedRW = [WriteTrans32] 471 472 let SchedRW = [WriteTrans64] in { 473 defm V_RCP_CLAMP_F64 : 474 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 475 defm V_RSQ_CLAMP_F64 : 476 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 477 } // End SchedRW = [WriteTrans64] 478} // End SubtargetPredicate = isGFX6GFX7 479 480let SubtargetPredicate = isGFX7GFX8GFX9 in { 481 let TRANS = 1, SchedRW = [WriteTrans32] in { 482 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 483 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 484 } // End TRANS = 1, SchedRW = [WriteTrans32] 485} // End SubtargetPredicate = isGFX7GFX8GFX9 486 487let SubtargetPredicate = isGFX7Plus in { 488 let SchedRW = [WriteDoubleAdd] in { 489 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 490 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 491 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, froundeven>; 492 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 493 } // End SchedRW = [WriteDoubleAdd] 494} // End SubtargetPredicate = isGFX7Plus 495} // End isReMaterializable = 1 496 497let FPDPRounding = 1 in { 498defm V_CVT_F16_U16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_u16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, uint_to_fp>; 499defm V_CVT_F16_I16 : VOP1Inst_t16_with_profiles <"v_cvt_f16_i16", VOP1_F16_I16, VOP1_F16_I16_t16, VOP1_F16_I16_fake16, sint_to_fp>; 500 501} // End FPDPRounding = 1 502// OMod clears exceptions when set in these two instructions 503defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16", 504 VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_uint>; 505defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16", 506 VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>; 507 508let TRANS = 1, SchedRW = [WriteTrans32] in { 509defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 510defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; 511defm V_RSQ_F16 : VOP1Inst_t16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 512defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, AMDGPUlogf16>; 513defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, AMDGPUexpf16>; 514defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 515defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 516} // End TRANS = 1, SchedRW = [WriteTrans32] 517defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 518defm V_FREXP_EXP_I16_F16 : VOP1Inst_t16_with_profiles <"v_frexp_exp_i16_f16", 519 VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, int_amdgcn_frexp_exp>; 520defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>; 521defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>; 522defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>; 523defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, froundeven>; 524let FPDPRounding = 1 in { 525defm V_FRACT_F16 : VOP1Inst_t16 <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 526} // End FPDPRounding = 1 527 528let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { 529def : GCNPat< 530 (f32 (f16_to_fp i16:$src)), 531 (V_CVT_F32_F16_e32 $src) 532>; 533def : GCNPat< 534 (i16 (AMDGPUfp_to_f16 f32:$src)), 535 (V_CVT_F16_F32_e32 $src) 536>; 537} 538let True16Predicate = UseRealTrue16Insts in { 539def : GCNPat< 540 (f32 (f16_to_fp i16:$src)), 541 (V_CVT_F32_F16_t16_e32 $src) 542>; 543def : GCNPat< 544 (i16 (AMDGPUfp_to_f16 f32:$src)), 545 (V_CVT_F16_F32_t16_e32 $src) 546>; 547} 548let True16Predicate = UseFakeTrue16Insts in { 549def : GCNPat< 550 (f32 (f16_to_fp i16:$src)), 551 (V_CVT_F32_F16_fake16_e32 $src) 552>; 553def : GCNPat< 554 (i16 (AMDGPUfp_to_f16 f32:$src)), 555 (V_CVT_F16_F32_fake16_e32 $src) 556>; 557} 558 559def VOP_SWAP_I32 : VOPProfile<[i32, i32, untyped, untyped]> { 560 let Outs32 = (outs VGPR_32:$vdst, VRegSrc_32:$vdst1); 561 let Ins32 = (ins VRegSrc_32:$src0, VGPR_32:$src1); 562 let Asm32 = " $vdst, $src0"; 563} 564 565let SubtargetPredicate = isGFX9Plus in { 566 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 567 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 568 let DisableEncoding = "$vdst1,$src1"; 569 let SchedRW = [Write64Bit, Write64Bit]; 570 } 571 572 let isReMaterializable = 1 in 573 defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>; 574 575 let mayRaiseFPException = 0 in { 576 defm V_CVT_NORM_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_i16_f16", 577 VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; 578 defm V_CVT_NORM_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_norm_u16_f16", 579 VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16>; 580 } // End mayRaiseFPException = 0 581} // End SubtargetPredicate = isGFX9Plus 582 583let SubtargetPredicate = isGFX9Only in { 584 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 585} // End SubtargetPredicate = isGFX9Only 586 587class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { 588 let HasExtDPP = 1; 589 let HasExtSDWA = 1; 590 let HasExtSDWA9 = 1; 591 let HasExt = 1; 592 let DstRCSDWA = getVALUDstForVT<vt>.ret; 593 let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0, 594 Clamp:$clamp, omod:$omod, src0_sel:$src0_sel); 595 let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel 596 let AsmSDWA9 = AsmSDWA; 597 let EmitDstSel = 0; 598} 599 600def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>; 601def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>; 602 603let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, 604 SchedRW = [WriteFloatCvt] in { 605 defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>; 606 defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>; 607 defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>; 608 defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>; 609} 610 611class Cvt_F32_F8_Pat<SDPatternOperator node, int index, 612 VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 613 (f32 (node i32:$src, index)), 614 (inst_sdwa 0, $src, 0, 0, index) 615>; 616 617let SubtargetPredicate = HasFP8ConversionInsts in { 618let OtherPredicates = [HasCvtFP8VOP1Bug] in { 619 def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), 620 (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>; 621 def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), 622 (V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>; 623} 624 625let OtherPredicates = [HasNoCvtFP8VOP1Bug, HasSDWA] in { // FIXME: HasSDWA is a substitute for !gfx12 626 def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), 627 (V_CVT_F32_FP8_e32 $src)>; 628 def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), 629 (V_CVT_F32_BF8_e32 $src)>; 630} 631 632let OtherPredicates = [HasSDWA] in { 633foreach Index = [1, 2, 3] in { 634 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>; 635 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>; 636} 637} // End OtherPredicates = [HasSDWA] 638 639} // End SubtargetPredicate = HasFP8ConversionInsts 640 641class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, 642 VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 643 (v2f32 (node i32:$src, index)), 644 !if (index, 645 (inst_sdwa 0, $src, 0, 0, SDWA.WORD_1), 646 (inst_e32 $src)) 647>; 648 649let SubtargetPredicate = HasFP8ConversionInsts, OtherPredicates = [HasSDWA] in { 650 foreach Index = [0, -1] in { 651 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, 652 V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; 653 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, 654 V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; 655 } 656} 657 658let HasClamp = 0, HasOMod = 0, HasExtDPP = 0, HasExtVOP3DPP = 0, 659 HasOpSel = 1 in { 660 // Input modifiers are not supported 661 // NB: fake16 VOP1 does not support op_sel. 662 def VOPProfile_Base_CVT_PK_F32_F8_fake16 : VOPProfile_Fake16<VOPProfile<[v2f32, f16, untyped, untyped]>> { 663 let Src0Mod = IntT16InputMods<1/*IsFake16*/>; 664 } 665 def VOPProfile_Base_CVT_PK_F32_F8_t16 : VOPProfile_True16<VOPProfile<[v2f32, f16, untyped, untyped]>> { 666 let Src0Mod = IntT16InputMods<0/*IsFake16*/>; 667 } 668} 669 670class VOPProfile_Base_CVT_F_F8_ByteSel<ValueType DstVT> : VOPProfile<[DstVT, i32, untyped, untyped]> { 671 let IsFP8SrcByteSel = 1; 672 let HasOpSel = 0; 673 let HasExtDPP = 1; 674 let HasExtVOP3DPP = 1; 675 let HasExtSDWA = 0; 676 let HasClamp = 0; 677 let HasOMod = 0; 678 let HasModifiers = 0; 679 680 defvar bytesel = (ins ByteSel:$byte_sel); 681 let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 682 HasClamp, HasModifiers, HasSrc2Mods, 683 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret, 684 bytesel); 685 let InsVOP3Base = !con(getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, Src2VOP3DPP, 686 NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, 687 HasOMod, Src0ModVOP3DPP, Src1ModVOP3DPP, 688 Src2ModVOP3DPP, HasOpSel>.ret, 689 bytesel); 690} 691 692let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts], 693 mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in { 694 defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F_F8_ByteSel<f32>>; 695 defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F_F8_ByteSel<f32>>; 696 697 let True16Predicate = UseFakeTrue16Insts in { 698 defm V_CVT_PK_F32_FP8_fake16 : VOP1Inst<"v_cvt_pk_f32_fp8_fake16", VOPProfile_Base_CVT_PK_F32_F8_fake16>; 699 defm V_CVT_PK_F32_BF8_fake16 : VOP1Inst<"v_cvt_pk_f32_bf8_fake16", VOPProfile_Base_CVT_PK_F32_F8_fake16>; 700 } 701 let True16Predicate = UseRealTrue16Insts in { 702 defm V_CVT_PK_F32_FP8_t16 : VOP1Inst<"v_cvt_pk_f32_fp8_t16", VOPProfile_Base_CVT_PK_F32_F8_t16>; 703 defm V_CVT_PK_F32_BF8_t16 : VOP1Inst<"v_cvt_pk_f32_bf8_t16", VOPProfile_Base_CVT_PK_F32_F8_t16>; 704 } 705} 706 707class Cvt_F_F8_Pat_ByteSel<SDPatternOperator node, VOP3_Pseudo inst> : GCNPat< 708 (node i32:$src0, timm:$byte_sel), 709 (inst $src0, (as_i32timm $byte_sel)) 710>; 711 712let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in { 713 def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f32_fp8, V_CVT_F32_FP8_OP_SEL_e64>; 714 def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f32_bf8, V_CVT_F32_BF8_OP_SEL_e64>; 715} 716 717class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index, 718 VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat< 719 (v2f32 (node i32:$src, index)), 720 !if (index, 721 (inst_e64 SRCMODS.OP_SEL_0, $src, 0), 722 (inst_e32 $src)) 723>; 724 725let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in { 726 foreach Index = [0, -1] in { 727 def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index, 728 V_CVT_PK_F32_FP8_fake16_e32, V_CVT_PK_F32_FP8_fake16_e64>; 729 def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index, 730 V_CVT_PK_F32_BF8_fake16_e32, V_CVT_PK_F32_BF8_fake16_e64>; 731 } 732} 733 734let SubtargetPredicate = isGFX10Plus in { 735 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>; 736 737 let Uses = [M0] in { 738 defm V_MOVRELSD_2_B32 : 739 VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>; 740 741 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 742 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 743 let DisableEncoding = "$vdst1,$src1"; 744 let SchedRW = [Write64Bit, Write64Bit]; 745 } 746 } // End Uses = [M0] 747} // End SubtargetPredicate = isGFX10Plus 748 749def VOPProfileAccMov : VOP_NO_EXT<VOP_I32_I32> { 750 let DstRC = RegisterOperand<AGPR_32>; 751 let Src0RC32 = ARegSrc_32; 752 let Asm32 = " $vdst, $src0"; 753} 754 755def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1> { 756 let SubtargetPredicate = isGFX90APlus; 757 let isReMaterializable = 1; 758 let isAsCheapAsAMove = 1; 759} 760 761def VOP_SWAP_I16 : VOPProfile_True16<VOP_I16_I16> { 762 let Outs32 = (outs VOPDstOperand_t16Lo128:$vdst, 763 VOPSrcEncodedDstOperand_t16Lo128:$vdst1); 764 let Ins32 = (ins VOPSrcEncodedDstOperand_t16Lo128:$src0, 765 VOPDstOperand_t16Lo128:$src1); 766 let Asm32 = "$vdst, $src0"; 767} 768 769let SubtargetPredicate = isGFX11Plus in { 770 def V_SWAP_B16 : VOP1_Pseudo<"v_swap_b16", VOP_SWAP_I16, [], /* VOP1Only= */true> { 771 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 772 let DisableEncoding = "$vdst1, $src1"; 773 let SchedRW = [Write64Bit, Write64Bit]; 774 let True16Predicate = UseRealTrue16Insts; 775 } 776 // Restrict src0 to be VGPR 777 def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, 778 [], /*VOP1Only=*/ 1>; 779 defm V_MOV_B16 : VOP1Inst_t16<"v_mov_b16", VOP_I16_I16>; 780 defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>; 781 defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>; 782 defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>; 783} // End SubtargetPredicate = isGFX11Plus 784 785let SubtargetPredicate = HasPrngInst in 786defm V_PRNG_B32 : VOP1Inst <"v_prng_b32", VOP_I32_I32, int_amdgcn_prng_b32>; 787 788let Constraints = "$vdst = $vdst_in, $src0_out = $src0", 789 DisableEncoding="$vdst_in,$src0_out", 790 SchedRW = [Write32Bit, Write32Bit] in { 791let SubtargetPredicate = HasPermlane16Swap in { 792defm V_PERMLANE16_SWAP_B32 : VOP1Inst<"v_permlane16_swap_b32", VOP_PERMLANE_SWAP>; 793} 794 795let SubtargetPredicate = HasPermlane32Swap in { 796defm V_PERMLANE32_SWAP_B32 : VOP1Inst<"v_permlane32_swap_b32", VOP_PERMLANE_SWAP>; 797} 798} 799 800foreach vt = Reg32Types.types in { 801 def : GCNPat<(int_amdgcn_permlane64 (vt VRegSrc_32:$src0)), 802 (vt (V_PERMLANE64_B32 (vt VRegSrc_32:$src0))) 803 >; 804} 805 806//===----------------------------------------------------------------------===// 807// Target-specific instruction encodings. 808//===----------------------------------------------------------------------===// 809 810class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 811 VOP_DPP<ps.OpName, p, isDPP16> { 812 let hasSideEffects = ps.hasSideEffects; 813 let Defs = ps.Defs; 814 let SchedRW = ps.SchedRW; 815 let Uses = ps.Uses; 816 let TRANS = ps.TRANS; 817 let SubtargetPredicate = ps.SubtargetPredicate; 818 let OtherPredicates = ps.OtherPredicates; 819 820 bits<8> vdst; 821 let Inst{8-0} = 0xfa; 822 let Inst{16-9} = op; 823 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 824 let Inst{31-25} = 0x3f; 825} 826 827class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = ps.Pfl> : 828 VOP1_DPP<op, ps, p, 1>, 829 SIMCInstr <ps.PseudoInstr, subtarget> { 830 let AssemblerPredicate = HasDPP16; 831} 832 833class VOP1_DPP16_Gen<bits<8> op, VOP1_DPP_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : 834 VOP1_DPP16 <op, ps, Gen.Subtarget, p> { 835 let AssemblerPredicate = Gen.AssemblerPredicate; 836 let DecoderNamespace = Gen.DecoderNamespace; 837} 838 839class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 840 VOP_DPP8<ps.OpName, p> { 841 let hasSideEffects = ps.hasSideEffects; 842 let Defs = ps.Defs; 843 let SchedRW = ps.SchedRW; 844 let Uses = ps.Uses; 845 let SubtargetPredicate = ps.SubtargetPredicate; 846 let OtherPredicates = ps.OtherPredicates; 847 848 bits<8> vdst; 849 let Inst{8-0} = fi; 850 let Inst{16-9} = op; 851 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 852 let Inst{31-25} = 0x3f; 853} 854 855class VOP1_DPP8_Gen<bits<8> op, VOP1_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : 856 VOP1_DPP8<op, ps, p> { 857 let AssemblerPredicate = Gen.AssemblerPredicate; 858 let DecoderNamespace = Gen.DecoderNamespace; 859} 860 861//===----------------------------------------------------------------------===// 862// GFX11, GFX12 863//===----------------------------------------------------------------------===// 864 865multiclass VOP1Only_Real<GFXGen Gen, bits<9> op> { 866 let IsSingle = 1 in 867 def Gen.Suffix : 868 VOP1_Real_Gen<!cast<VOP1_Pseudo>(NAME), Gen>, 869 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 870} 871 872multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> { 873 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 874 def _e32#Gen.Suffix : 875 VOP1_Real_Gen<ps, Gen>, 876 VOP1e<op{7-0}, ps.Pfl>; 877} 878 879multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName, 880 string asmName> { 881 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 882 let AsmString = asmName # ps.AsmOperands, 883 DecoderNamespace = Gen.DecoderNamespace # 884 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { 885 defm NAME : VOP1_Real_e32<Gen, op, opName>; 886 } 887} 888 889multiclass VOP1_Real_e64<GFXGen Gen, bits<9> op> { 890 def _e64#Gen.Suffix : 891 VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>, 892 VOP3e_gfx11_gfx12<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 893} 894 895multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> { 896 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 897 def _dpp#Gen.Suffix : VOP1_DPP16_Gen<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), Gen>; 898} 899 900multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName, 901 string asmName> { 902 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 903 let AsmString = asmName # ps.Pfl.AsmDPP16, 904 DecoderNamespace = Gen.DecoderNamespace # 905 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { 906 defm NAME : VOP1_Real_dpp<Gen, op, opName>; 907 } 908} 909 910multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> { 911 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 912 def _dpp8#Gen.Suffix : VOP1_DPP8_Gen<op{7-0}, ps, Gen>; 913} 914 915multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName, 916 string asmName> { 917 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 918 let AsmString = asmName # ps.Pfl.AsmDPP8, 919 DecoderNamespace = Gen.DecoderNamespace # 920 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { 921 defm NAME : VOP1_Real_dpp8<Gen, op, opName>; 922 } 923} 924 925multiclass VOP1_Realtriple_e64<GFXGen Gen, bits<9> op> : 926 VOP3_Realtriple<Gen, {0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>; 927 928multiclass VOP1_Realtriple_e64_with_name<GFXGen Gen, bits<9> op, string opName, 929 string asmName> { 930 defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 1, op{6-0}}, opName, 931 asmName>; 932} 933 934multiclass VOP1_Real_FULL<GFXGen Gen, bits<9> op> : 935 VOP1_Real_e32<Gen, op>, VOP1_Realtriple_e64<Gen, op>, 936 VOP1_Real_dpp<Gen, op>, VOP1_Real_dpp8<Gen, op>; 937 938multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName, 939 string asmName> { 940 defm NAME : VOP1_Real_e32_with_name<GFX11Gen, op, opName, asmName>, 941 VOP1_Real_dpp_with_name<GFX11Gen, op, opName, asmName>, 942 VOP1_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>; 943 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 944 def gfx11_alias : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 945 let AssemblerPredicate = isGFX11Plus; 946 } 947} 948 949multiclass VOP1_Real_NO_VOP3_with_name_gfx12<bits<9> op, string opName, 950 string asmName> { 951 defm NAME : VOP1_Real_e32_with_name<GFX12Gen, op, opName, asmName>, 952 VOP1_Real_dpp_with_name<GFX12Gen, op, opName, asmName>, 953 VOP1_Real_dpp8_with_name<GFX12Gen, op, opName, asmName>; 954} 955 956multiclass VOP1_Real_FULL_with_name<GFXGen Gen, bits<9> op, string opName, 957 string asmName> : 958 VOP1_Real_e32_with_name<Gen, op, opName, asmName>, 959 VOP1_Real_dpp_with_name<Gen, op, opName, asmName>, 960 VOP1_Real_dpp8_with_name<Gen, op, opName, asmName>, 961 VOP1_Realtriple_e64_with_name<Gen, op, opName, asmName>; 962 963multiclass VOP1_Real_NO_DPP<GFXGen Gen, bits<9> op> : 964 VOP1_Real_e32<Gen, op>, VOP1_Real_e64<Gen, op>; 965 966multiclass VOP1_Real_FULL_t16_gfx11_gfx12<bits<9> op, string asmName, 967 string opName = NAME> : 968 VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 969 VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 970 971multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName, 972 string asmName> : 973 VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 974 VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 975 976multiclass VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12< 977 bits<9> op, string asmName = !tolower(NAME), string opName = NAME> { 978 defm opName#"_t16" : 979 VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_t16", asmName>; 980 defm opName#"_fake16": 981 VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_fake16", asmName>; 982} 983 984multiclass VOP1Only_Real_gfx11_gfx12<bits<9> op> : 985 VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>; 986 987multiclass VOP1_Real_FULL_gfx11_gfx12<bits<9> op> : 988 VOP1_Real_FULL<GFX11Gen, op>, VOP1_Real_FULL<GFX12Gen, op>; 989 990multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op, 991 string opName, string asmName> : 992 VOP1_Real_e32_with_name<Gen, op, opName, asmName>, 993 VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>; 994 995 996defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">; 997defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">; 998 999defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">; 1000defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">; 1001defm V_CVT_PK_F32_FP8_fake16 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">; 1002defm V_CVT_PK_F32_FP8_t16 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">; 1003defm V_CVT_PK_F32_BF8_fake16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">; 1004defm V_CVT_PK_F32_BF8_t16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">; 1005defm V_CVT_PK_F32_BF8_fake16 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">; 1006defm V_CVT_PK_F32_BF8_t16 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">; 1007 1008defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c, 1009 "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; 1010defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00d, 1011 "V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">; 1012defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x039, 1013 "V_FFBH_U32", "v_clz_i32_u32">; 1014defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03a, 1015 "V_FFBL_B32", "v_ctz_i32_b32">; 1016defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b, 1017 "V_FFBH_I32", "v_cls_i32">; 1018defm V_SWAP_B16 : VOP1Only_Real_gfx11_gfx12<0x066>; 1019defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>; 1020defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">; 1021defm V_NOT_B16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x069>; 1022defm V_CVT_I32_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06a>; 1023defm V_CVT_U32_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06b>; 1024 1025defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x050>; 1026defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x051>; 1027defm V_CVT_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x052>; 1028defm V_CVT_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x053>; 1029defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; 1030defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; 1031defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; 1032defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; 1033defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; 1034defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; 1035defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; 1036defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; 1037defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; 1038defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; 1039defm V_FREXP_MANT_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x059>; 1040defm V_FREXP_EXP_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05a>; 1041defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; 1042defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; 1043defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; 1044defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; 1045defm V_TRUNC_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05d>; 1046defm V_RNDNE_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05e>; 1047defm V_FRACT_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05f>; 1048defm V_SIN_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x060>; 1049defm V_COS_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x061>; 1050defm V_SAT_PK_U8_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x062>; 1051defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x063>; 1052defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064>; 1053 1054defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>; 1055defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>; 1056 1057//===----------------------------------------------------------------------===// 1058// GFX10. 1059//===----------------------------------------------------------------------===// 1060 1061let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 1062 multiclass VOP1Only_Real_gfx10<bits<9> op> { 1063 def _gfx10 : 1064 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 1065 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 1066 } 1067 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 1068 def _e32_gfx10 : 1069 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 1070 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1071 } 1072 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 1073 def _e64_gfx10 : 1074 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1075 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1076 } 1077 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 1078 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1079 def _sdwa_gfx10 : 1080 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1081 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1082 } 1083 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 1084 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1085 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10>; 1086 } 1087 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 1088 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1089 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>; 1090 } 1091} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 1092 1093multiclass VOP1_Real_gfx10<bits<9> op> : 1094 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 1095 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, 1096 VOP1_Real_dpp8_gfx10<op>; 1097 1098multiclass VOP1_Real_gfx10_FULL_gfx11_gfx12<bits<9> op> : 1099 VOP1_Real_gfx10<op>, 1100 VOP1_Real_FULL<GFX11Gen, op>, 1101 VOP1_Real_FULL<GFX12Gen, op>; 1102 1103multiclass VOP1_Real_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 1104 VOP1_Real_gfx10<op>, 1105 VOP1_Real_NO_DPP<GFX11Gen, op>, 1106 VOP1_Real_NO_DPP<GFX12Gen, op>; 1107 1108multiclass VOP1Only_Real_gfx10_gfx11_gfx12<bits<9> op> : 1109 VOP1Only_Real_gfx10<op>, 1110 VOP1Only_Real<GFX11Gen, op>, 1111 VOP1Only_Real<GFX12Gen, op>; 1112 1113defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11_gfx12<0x01b>; 1114defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11_gfx12<0x048>; 1115defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; 1116defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; 1117defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; 1118defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; 1119defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; 1120defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; 1121defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; 1122defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; 1123defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; 1124defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; 1125defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; 1126defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; 1127defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; 1128defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; 1129defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; 1130defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; 1131defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; 1132defm V_COS_F16 : VOP1_Real_gfx10<0x061>; 1133defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; 1134defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; 1135defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; 1136 1137defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11_gfx12<0x065>; 1138defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11_gfx12<0x068>; 1139 1140//===----------------------------------------------------------------------===// 1141// GFX7, GFX10, GFX11, GFX12 1142//===----------------------------------------------------------------------===// 1143 1144let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 1145 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 1146 def _e32_gfx7 : 1147 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 1148 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1149 } 1150 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 1151 def _e64_gfx7 : 1152 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1153 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1154 } 1155} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 1156 1157multiclass VOP1_Real_gfx7<bits<9> op> : 1158 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 1159 1160multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 1161 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>, 1162 VOP1_Real_NO_DPP<GFX12Gen, op>; 1163 1164defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 1165defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 1166 1167defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x017>; 1168defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x018>; 1169defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x019>; 1170defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x01a>; 1171 1172//===----------------------------------------------------------------------===// 1173// GFX6, GFX7, GFX10, GFX11, GFX12 1174//===----------------------------------------------------------------------===// 1175 1176let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1177 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 1178 def _e32_gfx6_gfx7 : 1179 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 1180 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1181 } 1182 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 1183 def _e64_gfx6_gfx7 : 1184 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1185 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1186 } 1187 multiclass VOP1Only_Real_gfx6_gfx7<bits<9> op> { 1188 def _gfx6_gfx7 : 1189 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.SI>, 1190 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 1191 } 1192} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1193 1194multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 1195 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 1196 1197multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 1198 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 1199 1200multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<bits<9> op> : 1201 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_FULL<GFX11Gen, op>, 1202 VOP1_Real_FULL<GFX12Gen, op>; 1203 1204multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 1205 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>, 1206 VOP1_Real_NO_DPP<GFX12Gen, op>; 1207 1208multiclass VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<9> op> : 1209 VOP1Only_Real_gfx6_gfx7<op>, VOP1Only_Real_gfx10_gfx11_gfx12<op>; 1210 1211defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 1212defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 1213defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 1214defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 1215defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 1216defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 1217defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 1218 1219defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x000>; 1220defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x001>; 1221defm V_READFIRSTLANE_B32 : VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>; 1222defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x003>; 1223defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x004>; 1224defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x005>; 1225defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x006>; 1226defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x007>; 1227defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x008>; 1228defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; 1229defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; 1230defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 1231defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 1232defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x00e>; 1233defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x00f>; 1234defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x010>; 1235defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x011>; 1236defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x012>; 1237defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x013>; 1238defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x014>; 1239defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x015>; 1240defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x016>; 1241defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x020>; 1242defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x021>; 1243defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x022>; 1244defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x023>; 1245defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x024>; 1246defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x025>; 1247defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x027>; 1248defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02a>; 1249defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02b>; 1250defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02e>; 1251defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x02f>; 1252defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x031>; 1253defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x033>; 1254defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x034>; 1255defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x035>; 1256defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x036>; 1257defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x037>; 1258defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x038>; 1259defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 1260defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 1261defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 1262defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03c>; 1263defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03d>; 1264defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03e>; 1265defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x03f>; 1266defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x040>; 1267defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 1268defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x042>; 1269defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x043>; 1270defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x044>; 1271 1272//===----------------------------------------------------------------------===// 1273// GFX8, GFX9 (VI). 1274//===----------------------------------------------------------------------===// 1275 1276class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1277 VOP_DPPe <P> { 1278 bits<8> vdst; 1279 let Inst{8-0} = 0xfa; // dpp 1280 let Inst{16-9} = op; 1281 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1282 let Inst{31-25} = 0x3f; //encoding 1283} 1284 1285let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1286 multiclass VOP1Only_Real_vi <bits<10> op> { 1287 def _vi : 1288 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 1289 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 1290 } 1291 1292 multiclass VOP1_Real_e32e64_vi <bits<10> op> { 1293 def _e32_vi : 1294 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1295 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1296 def _e64_vi : 1297 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1298 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1299 } 1300} 1301 1302multiclass VOP1_Real_vi <bits<10> op> { 1303 defm NAME : VOP1_Real_e32e64_vi <op>; 1304 1305 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then 1306 def _sdwa_vi : 1307 VOP_SDWA8_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1308 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1309 1310 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1311 def _sdwa_gfx9 : 1312 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1313 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1314 1315 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1316 def _dpp_vi : 1317 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1318 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1319} 1320 1321defm V_NOP : VOP1_Real_vi <0x0>; 1322defm V_MOV_B32 : VOP1_Real_vi <0x1>; 1323defm V_READFIRSTLANE_B32 : VOP1Only_Real_vi <0x2>; 1324defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 1325defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 1326defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 1327defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 1328defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 1329defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 1330defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 1331defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 1332defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 1333defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 1334defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 1335defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 1336defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 1337defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 1338defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 1339defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 1340defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 1341defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 1342defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 1343defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 1344defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 1345defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 1346defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 1347defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 1348defm V_EXP_F32 : VOP1_Real_vi <0x20>; 1349defm V_LOG_F32 : VOP1_Real_vi <0x21>; 1350defm V_RCP_F32 : VOP1_Real_vi <0x22>; 1351defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 1352defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 1353defm V_RCP_F64 : VOP1_Real_vi <0x25>; 1354defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 1355defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 1356defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 1357defm V_SIN_F32 : VOP1_Real_vi <0x29>; 1358defm V_COS_F32 : VOP1_Real_vi <0x2a>; 1359defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 1360defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 1361defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 1362defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 1363defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 1364defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 1365defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 1366defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 1367defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 1368defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 1369defm V_CLREXCP : VOP1_Real_vi <0x35>; 1370defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 1371defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 1372defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 1373defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 1374defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 1375defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 1376defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 1377defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 1378defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 1379defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 1380defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 1381defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 1382defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 1383defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 1384defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 1385defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 1386defm V_LOG_F16 : VOP1_Real_vi <0x40>; 1387defm V_EXP_F16 : VOP1_Real_vi <0x41>; 1388defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 1389defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 1390defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 1391defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 1392defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 1393defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 1394defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 1395defm V_SIN_F16 : VOP1_Real_vi <0x49>; 1396defm V_COS_F16 : VOP1_Real_vi <0x4a>; 1397defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 1398 1399defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 1400defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 1401defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 1402 1403defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>; 1404 1405let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0], Size = V_MOV_B32_e32.Size in { 1406 1407// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 1408// indexing mode. vdst can't be treated as a def for codegen purposes, 1409// and an implicit use and def of the super register should be added. 1410def V_MOV_B32_indirect_write : VPseudoInstSI<(outs), 1411 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1412 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1413 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1414 1415// Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the 1416// super register should be added. 1417def V_MOV_B32_indirect_read : VPseudoInstSI< 1418 (outs getVALUDstForVT<i32>.ret:$vdst), 1419 (ins getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1420 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1421 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1422 1423} // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0] 1424 1425let OtherPredicates = [isGFX8Plus] in { 1426 1427def : GCNPat < 1428 (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, 1429 timm:$bank_mask, timm:$bound_ctrl)), 1430 (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1431 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1432 (as_i1timm $bound_ctrl)) 1433>; 1434 1435foreach vt = Reg32Types.types in { 1436def : GCNPat < 1437 (vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl, 1438 timm:$row_mask, timm:$bank_mask, 1439 timm:$bound_ctrl)), 1440 (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1441 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1442 (as_i1timm $bound_ctrl)) 1443>; 1444} 1445 1446} // End OtherPredicates = [isGFX8Plus] 1447 1448foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in 1449let OtherPredicates = [isGFX8Plus, p] in { 1450def : GCNPat< 1451 (i32 (anyext i16:$src)), 1452 (COPY $src) 1453>; 1454 1455def : GCNPat< 1456 (i64 (anyext i16:$src)), 1457 (REG_SEQUENCE VReg_64, 1458 (i32 (COPY $src)), sub0, 1459 (V_MOV_B32_e32 (i32 0)), sub1) 1460>; 1461 1462def : GCNPat< 1463 (i16 (trunc i32:$src)), 1464 (COPY $src) 1465>; 1466 1467def : GCNPat < 1468 (i16 (trunc i64:$src)), 1469 (EXTRACT_SUBREG $src, sub0) 1470>; 1471 1472} // End OtherPredicates = [isGFX8Plus, p] 1473 1474let True16Predicate = UseFakeTrue16Insts in { 1475def : GCNPat< 1476 (i32 (DivergentUnaryFrag<anyext> i16:$src)), 1477 (COPY $src) 1478>; 1479} // End True16Predicate = UseFakeTrue16Insts 1480 1481let True16Predicate = UseRealTrue16Insts in { 1482def : GCNPat< 1483 (i32 (UniformUnaryFrag<anyext> (i16 SReg_32:$src))), 1484 (COPY $src) 1485>; 1486 1487def : GCNPat< 1488 (i32 (DivergentUnaryFrag<anyext> i16:$src)), 1489 (REG_SEQUENCE VGPR_32, $src, lo16, (i16 (IMPLICIT_DEF)), hi16) 1490>; 1491 1492def : GCNPat< 1493 (i64 (anyext i16:$src)), 1494 (REG_SEQUENCE VReg_64, $src, lo16, (i16 (IMPLICIT_DEF)), hi16, (i32 (IMPLICIT_DEF)), sub1) 1495>; 1496 1497def : GCNPat< 1498 (i16 (trunc i32:$src)), 1499 (EXTRACT_SUBREG $src, lo16) 1500>; 1501 1502def : GCNPat < 1503 (i16 (trunc i64:$src)), 1504 (EXTRACT_SUBREG $src, lo16) 1505>; 1506 1507} // End OtherPredicates = [UseRealTrue16Insts] 1508 1509//===----------------------------------------------------------------------===// 1510// GFX9 1511//===----------------------------------------------------------------------===// 1512 1513let DecoderNamespace = "GFX9" in { 1514 multiclass VOP1_Real_gfx9 <bits<10> op> { 1515 defm NAME : VOP1_Real_e32e64_vi <op>; 1516 1517 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1518 def _sdwa_gfx9 : 1519 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1520 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1521 1522 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1523 def _dpp_gfx9 : 1524 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1525 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1526 } 1527 1528 multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> { 1529 defm NAME : VOP1_Real_e32e64_vi <op>; 1530 1531 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1532 def _sdwa_gfx9 : 1533 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1534 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1535 let Inst{42-40} = 6; 1536 } 1537 1538 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1539 def _dpp_gfx9 : 1540 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1541 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1542 } 1543} 1544 1545/// Special case of VOP1 instructions, with a VOP3 form where op_sel 1546/// is used for DPP operands. 1547multiclass VOP1_OpSel_Real_e32e64_gfx9 <bits<10> op> { 1548 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1549 def _e32_gfx9 : 1550 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 1551 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1552 1553 def _e64_gfx9 : 1554 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 1555 VOP3OpSelIsDPP_gfx9<!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1556 } 1557} 1558 1559defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 1560 1561let AssemblerPredicate = isGFX940Plus in 1562defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; 1563 1564defm V_CVT_F32_BF16 : VOP1_Real_gfx9 <0x5b>; 1565 1566defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>; 1567defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; 1568defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; 1569defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; 1570 1571defm V_PRNG_B32 : VOP1_Real_gfx9 <0x58>; 1572defm V_PERMLANE16_SWAP_B32 : VOP1_OpSel_Real_e32e64_gfx9<0x059>; 1573defm V_PERMLANE32_SWAP_B32 : VOP1_OpSel_Real_e32e64_gfx9<0x05a>; 1574 1575class MovDPP8Pattern<Predicate Pred, Instruction Inst, ValueType vt> : GCNPat < 1576 (vt (int_amdgcn_mov_dpp8 vt:$src, timm:$dpp8)), 1577 (Inst VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))> { 1578 let OtherPredicates = [Pred]; 1579} 1580 1581foreach vt = Reg32Types.types in { 1582 def : MovDPP8Pattern<isGFX10Only, V_MOV_B32_dpp8_gfx10, vt>; 1583 def : MovDPP8Pattern<isGFX11Only, V_MOV_B32_dpp8_gfx11, vt>; 1584 def : MovDPP8Pattern<isGFX12Only, V_MOV_B32_dpp8_gfx12, vt>; 1585} 1586