1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let ReadsModeReg = !or(P.DstVT.isFP, P.Src0VT.isFP); 73 74 let mayRaiseFPException = ReadsModeReg; 75 76 let VOP2 = 1; 77 let VALU = 1; 78 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 79 80 let AsmVariantName = AMDGPUAsmVariants.Default; 81} 82 83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic> : 84 VOP_Real <ps>, 85 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 86 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 87 88 let VALU = 1; 89 let VOP2 = 1; 90 let isPseudo = 0; 91 let isCodeGenOnly = 0; 92 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 96 // copy relevant pseudo op flags 97 let SubtargetPredicate = ps.SubtargetPredicate; 98 let True16Predicate = ps.True16Predicate; 99 let OtherPredicates = ps.OtherPredicates; 100 let AsmMatchConverter = ps.AsmMatchConverter; 101 let AsmVariantName = ps.AsmVariantName; 102 let Constraints = ps.Constraints; 103 let DisableEncoding = ps.DisableEncoding; 104 let TSFlags = ps.TSFlags; 105 let UseNamedOperandTable = ps.UseNamedOperandTable; 106 let Uses = ps.Uses; 107 let Defs = ps.Defs; 108 let SchedRW = ps.SchedRW; 109 let mayLoad = ps.mayLoad; 110 let mayStore = ps.mayStore; 111 let isConvergent = ps.isConvergent; 112} 113 114class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> : 115 VOP2_Real <ps, Gen.Subtarget, real_name> { 116 let AssemblerPredicate = Gen.AssemblerPredicate; 117 let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); 118 let DecoderNamespace = Gen.DecoderNamespace# 119 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 120} 121 122class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 123 VOP_SDWA_Pseudo <OpName, P, pattern> { 124 let AsmMatchConverter = "cvtSdwaVOP2"; 125} 126 127class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 128 VOP_DPP_Pseudo <OpName, P, pattern> { 129} 130 131 132class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 133 list<dag> ret = !if(P.HasModifiers, 134 [(set P.DstVT:$vdst, 135 (node (P.Src0VT 136 !if(P.HasOMod, 137 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 138 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 139 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 140 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 141} 142 143multiclass VOP2Inst_e32<string opName, 144 VOPProfile P, 145 SDPatternOperator node = null_frag, 146 string revOp = opName> { 147 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 148 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 149} 150multiclass 151 VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, 152 string VOPDName, SDPatternOperator node = null_frag, 153 string revOp = opName> { 154 defm NAME : VOP2Inst_e32<opName, P, node, revOp>, 155 VOPD_Component<VOPDOp, VOPDName>; 156} 157multiclass VOP2Inst_e64<string opName, 158 VOPProfile P, 159 SDPatternOperator node = null_frag, 160 string revOp = opName> { 161 def _e64 : VOP3InstBase <opName, P, node, 1>, 162 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 163 164 let SubtargetPredicate = isGFX11Plus in { 165 if P.HasExtVOP3DPP then 166 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 167 } // End SubtargetPredicate = isGFX11Plus 168} 169 170multiclass VOP2Inst_sdwa<string opName, 171 VOPProfile P, 172 string revOp = opName> { 173 if P.HasExtSDWA then 174 def _sdwa : VOP2_SDWA_Pseudo <opName, P>, 175 Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>; 176} 177 178multiclass VOP2Inst<string opName, 179 VOPProfile P, 180 SDPatternOperator node = null_frag, 181 string revOp = opName> : 182 VOP2Inst_e32<opName, P, node, revOp>, 183 VOP2Inst_e64<opName, P, node, revOp>, 184 VOP2Inst_sdwa<opName, P, revOp> { 185 if P.HasExtDPP then 186 def _dpp : VOP2_DPP_Pseudo <opName, P>; 187} 188 189multiclass VOP2Inst_t16<string opName, 190 VOPProfile P, 191 SDPatternOperator node = null_frag, 192 string revOp = opName> { 193 let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { 194 defm NAME : VOP2Inst<opName, P, node, revOp>; 195 } 196 let SubtargetPredicate = UseRealTrue16Insts in { 197 defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16">; 198 } 199 let SubtargetPredicate = UseFakeTrue16Insts in { 200 defm _fake16 : VOP2Inst<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16">; 201 } 202} 203 204// Creating a _t16_e32 pseudo when there is no corresponding real instruction on 205// any subtarget is a problem. It makes getMCOpcodeGen return -1, which we 206// assume means the instruction is already a real. The fix is to not create that 207// _t16_e32 pseudo 208multiclass VOP2Inst_e64_t16<string opName, 209 VOPProfile P, 210 SDPatternOperator node = null_frag, 211 string revOp = opName> { 212 let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { 213 defm NAME : VOP2Inst<opName, P, node, revOp>; 214 } 215 let SubtargetPredicate = UseRealTrue16Insts in { 216 defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16">; 217 } 218 let SubtargetPredicate = UseFakeTrue16Insts in { 219 defm _fake16 : VOP2Inst_e64<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16">; 220 } 221} 222 223multiclass VOP2Inst_VOPD<string opName, 224 VOPProfile P, 225 bits<5> VOPDOp, 226 string VOPDName, 227 SDPatternOperator node = null_frag, 228 string revOp = opName> : 229 VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp>, 230 VOP2Inst_e64<opName, P, node, revOp>, 231 VOP2Inst_sdwa<opName, P, revOp> { 232 if P.HasExtDPP then 233 def _dpp : VOP2_DPP_Pseudo <opName, P>; 234} 235 236multiclass VOP2bInst <string opName, 237 VOPProfile P, 238 SDPatternOperator node = null_frag, 239 string revOp = opName, 240 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 241 let SchedRW = [Write32Bit, WriteSALU] in { 242 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 243 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 244 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 245 let usesCustomInserter = true; 246 } 247 248 if P.HasExtSDWA then 249 def _sdwa : VOP2_SDWA_Pseudo <opName, P>, 250 Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)> { 251 let AsmMatchConverter = "cvtSdwaVOP2b"; 252 } 253 if P.HasExtDPP then 254 def _dpp : VOP2_DPP_Pseudo <opName, P>; 255 } // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] 256 257 def _e64 : VOP3InstBase <opName, P, node, 1>, 258 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 259 260 let SubtargetPredicate = isGFX11Plus in { 261 if P.HasExtVOP3DPP then 262 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 263 } // End SubtargetPredicate = isGFX11Plus 264 } 265} 266 267class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 268 string OpName, string opnd> : 269 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 270 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 271 ps.Pfl.Src1RC32:$src1), 272 1, inst.AsmVariantName>, 273 PredicateControl { 274} 275 276multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 277 let WaveSizePredicate = isWave32 in { 278 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 279 } 280 let WaveSizePredicate = isWave64 in { 281 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 282 } 283} 284 285multiclass 286 VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 287 SDPatternOperator node, string revOp, bit useSGPRInput> { 288 289 let SchedRW = [Write32Bit] in { 290 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 291 if !empty(VOPDName) then 292 def _e32 : VOP2_Pseudo <opName, P>, 293 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 294 else 295 def _e32 : VOP2_Pseudo <opName, P>, 296 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>, 297 VOPD_Component<VOPDOp, VOPDName>; 298 299 if P.HasExtSDWA then 300 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 301 let AsmMatchConverter = "cvtSdwaVOP2e"; 302 } 303 304 if P.HasExtDPP then 305 def _dpp : VOP2_DPP_Pseudo <opName, P>; 306 } 307 308 def _e64 : VOP3InstBase <opName, P, node, 1>, 309 Commutable_REV<revOp#"_e64", !eq(revOp, opName)> { 310 let isReMaterializable = 1; 311 } 312 313 let SubtargetPredicate = isGFX11Plus in { 314 if P.HasExtVOP3DPP then 315 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 316 } // End SubtargetPredicate = isGFX11Plus 317 } 318} 319 320multiclass 321 VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag, 322 string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 323 : VOP2eInst_Base<opName, P, 0, "", node, revOp, useSGPRInput>; 324 325multiclass 326 VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 327 SDPatternOperator node = null_frag, string revOp = opName, 328 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 329 : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>; 330 331class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> : 332 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 333 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 334 ps.Pfl.Src1RC32:$src1), 335 1, inst.AsmVariantName>, 336 PredicateControl; 337 338class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> : 339 InstAlias <ps.OpName#" "#ps.Pfl.Asm64, 340 (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst, 341 ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, Clamp:$clamp), 342 1, inst.AsmVariantName>, 343 PredicateControl; 344 345multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 346 let WaveSizePredicate = isWave32 in { 347 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 348 } 349 let WaveSizePredicate = isWave64 in { 350 def : VOP2eInstAlias<ps, inst, "vcc">; 351 } 352} 353 354class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 355 string AsmVOPDXDeferred = ?; 356} 357 358class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> { 359 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); 360 field dag Ins32 = !if(!eq(vt.Size, 32), 361 (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm), 362 (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm)); 363 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm); 364 // Note that both src0X and imm are deferred 365 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred); 366 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm); 367 368 field string Asm32 = "$vdst, $src0, $src1, $imm"; 369 field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm"; 370 let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred"; 371 field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm"; 372 field bit HasExt = 0; 373 let IsSingle = 1; 374} 375 376def VOP_MADAK_F16 : VOP_MADAK <f16>; 377def VOP_MADAK_F16_t16 : VOP_MADAK <f16> { 378 let IsTrue16 = 1; 379 let IsRealTrue16 = 1; 380 let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret; 381 let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPRSrc_16_Lo128:$src1, ImmOpType:$imm); 382} 383def VOP_MADAK_F16_fake16 : VOP_MADAK <f16> { 384 let IsTrue16 = 1; 385 let DstRC = getVALUDstForVT_fake16<DstVT>.ret; 386 let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPRSrc_32_Lo128:$src1, ImmOpType:$imm); 387} 388def VOP_MADAK_F32 : VOP_MADAK <f32>; 389 390class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> { 391 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); 392 field dag Ins32 = !if(!eq(vt.Size, 32), 393 (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1), 394 (ins VSrc_f16_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1)); 395 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); 396 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X); 397 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); 398 399 field string Asm32 = "$vdst, $src0, $imm, $src1"; 400 field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X"; 401 let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X"; 402 field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y"; 403 field bit HasExt = 0; 404 let IsSingle = 1; 405} 406 407def VOP_MADMK_F16 : VOP_MADMK <f16>; 408def VOP_MADMK_F16_t16 : VOP_MADMK <f16> { 409 let IsTrue16 = 1; 410 let IsRealTrue16 = 1; 411 let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret; 412 let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_16_Lo128:$src1); 413} 414def VOP_MADMK_F16_fake16 : VOP_MADMK <f16> { 415 let IsTrue16 = 1; 416 let DstRC = getVALUDstForVT_fake16<DstVT>.ret; 417 let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_32_Lo128:$src1); 418} 419def VOP_MADMK_F32 : VOP_MADMK <f32>; 420 421// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 422// and processing time but it makes it easier to convert to mad. 423class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 424 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2); 425 // Src2 must accept the same operand types as vdst, namely VGPRs only 426 let Src2RC64 = getVOP3VRegSrcForVT<Src2VT, IsTrue16, !not(IsRealTrue16)>.ret; 427 let Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, 3, 428 0, HasModifiers, HasModifiers, HasOMod, 429 Src0Mod, Src1Mod, Src2Mod>.ret; 430 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 431 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 432 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 433 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 434 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 435 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 436 let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3, 437 0, HasModifiers, HasModifiers, HasOMod, 438 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret; 439 // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu 440 let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); 441 let InsVOPDXDeferred = 442 (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, 443 VGPR_32:$vsrc1X, VGPRSrc_32:$src2X); 444 let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y); 445 let InsVOPDYDeferred = 446 (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, 447 VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y); 448 449 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 450 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 451 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 452 dpp8:$dpp8, Dpp8FI:$fi); 453 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 454 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 455 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 456 Clamp:$clamp, omod:$omod, 457 dst_sel:$dst_sel, dst_unused:$dst_unused, 458 src0_sel:$src0_sel, src1_sel:$src1_sel); 459 let Asm32 = getAsm32<1, 2, vt0>.ret; 460 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 461 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 462 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 463 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 464 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 465 let AsmVOP3Base = 466 getAsmVOP3Base<2 /*NumSrcArgs*/, HasDst, HasClamp, 467 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 468 HasModifiers, HasModifiers, 469 0 /*Src2HasMods*/, DstVT>.ret; 470 let HasSrc2 = 0; 471 let HasSrc2Mods = 0; 472 473 let HasExt = 1; 474 let HasExtDPP = 1; 475 let HasExt32BitDPP = 1; 476 let HasExtSDWA = 1; 477 let HasExtSDWA9 = 0; 478 let TieRegDPP = "$src2"; 479} 480 481def VOP_MAC_F16 : VOP_MAC <f16>; 482def VOP_MAC_F16_t16 : VOP_MAC <f16> { 483 let IsTrue16 = 1; 484 let IsRealTrue16 = 1; 485 let HasOpSel = 1; 486 let DstRC = VOPDstOperand_t16Lo128; 487 let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 488 let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 489 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2); 490 let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 491 let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 492 let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 493 let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret; 494 let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret; 495 let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret; 496 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 497 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 498 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument 499 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 500 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 501 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 502 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 503 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument 504 dpp8:$dpp8, Dpp8FI:$fi); 505 let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue*/, 1/*IsVOP3Encoding*/>.ret; 506 let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret; 507 let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret; 508 let Src0VOP3DPP = VGPRSrc_16; 509 let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret; 510 let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret; 511 let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret; 512 let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret; 513 let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret; 514 let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 515 let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 516 let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 517} 518def VOP_MAC_F16_fake16 : VOP_MAC <f16> { 519 let IsTrue16 = 1; 520 let DstRC = getVALUDstForVT_fake16<DstVT>.ret; 521 let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 522 let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 523 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2); 524 let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 525 let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 526 let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 527 let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret; 528 let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret; 529 let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret; 530 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 531 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 532 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument 533 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 534 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 535 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 536 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 537 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument 538 dpp8:$dpp8, Dpp8FI:$fi); 539 let DstRC64 = getVALUDstForVT<DstVT>.ret; 540 let Src0VOP3DPP = VGPRSrc_32; 541 let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret; 542 let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret; 543 let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret; 544 let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret; 545 let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret; 546 let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 547 let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 548 let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 549} 550 551def VOP_MAC_F32 : VOP_MAC <f32>; 552let HasExtDPP = 0, HasExt32BitDPP = 0 in 553def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>; 554let HasExtSDWA = 0, HasExt32BitDPP = 0, HasExt64BitDPP = 1 in 555def VOP_MAC_F64 : VOP_MAC <f64>; 556 557class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 558 let HasClamp = 0; 559 let HasExtSDWA = 0; 560 let HasOpSel = 0; 561 let IsPacked = 0; 562} 563 564def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 565 let Src0ModDPP = FPVRegInputMods; 566 let Src1ModDPP = FPVRegInputMods; 567 let HasClamp = 1; 568} 569 570def VOP_DOT_ACC_F32_V2BF16 : VOP_DOT_ACC<f32, v2bf16> { 571 let Src0ModDPP = FPVRegInputMods; 572 let Src1ModDPP = FPVRegInputMods; 573 let HasClamp = 1; 574} 575 576def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> { 577 let HasExtVOP3DPP = 0; 578 let HasSrc0Mods = 1; 579 let HasSrc1Mods = 1; 580 let HasClamp = 1; 581 582 let Src0Mod = Int32InputMods; 583 let Src1Mod = Int32InputMods; 584 let Ins64 = getIns64<Src0RC64, Src1RC64, getVregSrcForVT<Src2VT>.ret, 585 3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/, 586 1 /*HasSrc2Mods*/, HasOMod, 587 Src0Mod, Src1Mod, Src2Mod>.ret; 588 let Asm64 = "$vdst, $src0, $src1$clamp"; 589} 590 591// Write out to vcc or arbitrary SGPR. 592def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/1> { 593 let Asm32 = "$vdst, vcc, $src0, $src1"; 594 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1$clamp"; 595 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 596 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 597 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 598 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 599 let AsmDPP16 = AsmDPP#"$fi"; 600 let InsDPP = (ins DstRCDPP:$old, 601 Src0DPP:$src0, 602 Src1DPP:$src1, 603 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 604 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 605 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 606 let InsDPP8 = (ins DstRCDPP:$old, 607 Src0DPP:$src0, 608 Src1DPP:$src1, 609 dpp8:$dpp8, Dpp8FI:$fi); 610 let Outs32 = (outs DstRC:$vdst); 611 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 612 let OutsVOP3DPP = Outs64; 613 let OutsVOP3DPP8 = Outs64; 614} 615 616// Write out to vcc or arbitrary SGPR and read in from vcc or 617// arbitrary SGPR. 618def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1> { 619 let HasSrc2Mods = 0; 620 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 621 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 622 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 623 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 624 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 625 let AsmDPP16 = AsmDPP#"$fi"; 626 let Outs32 = (outs DstRC:$vdst); 627 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 628 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 629 let OutsVOP3DPP = Outs64; 630 let OutsVOP3DPP8 = Outs64; 631 632 // Suppress src2 implied by type since the 32-bit encoding uses an 633 // implicit VCC use. 634 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 635 636 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 637 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 638 Clamp:$clamp, 639 dst_sel:$dst_sel, dst_unused:$dst_unused, 640 src0_sel:$src0_sel, src1_sel:$src1_sel); 641 642 let InsDPP = (ins DstRCDPP:$old, 643 Src0DPP:$src0, 644 Src1DPP:$src1, 645 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 646 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 647 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 648 let InsDPP8 = (ins DstRCDPP:$old, 649 Src0DPP:$src0, 650 Src1DPP:$src1, 651 dpp8:$dpp8, Dpp8FI:$fi); 652 653 let HasExt = 1; 654 let HasExtDPP = 1; 655 let HasExt32BitDPP = 1; 656 let HasExtSDWA = 1; 657 let HasExtSDWA9 = 1; 658} 659 660// Read in from vcc or arbitrary SGPR. 661class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> { 662 let Asm32 = "$vdst, $src0, $src1"; 663 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 664 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 665 let AsmDPP = "$vdst, $src0_modifiers, $src1_modifiers, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 666 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 667 let AsmDPP16 = AsmDPP#"$fi"; 668 let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 669 670 let Outs32 = (outs DstRC:$vdst); 671 let Outs64 = (outs DstRC64:$vdst); 672 673 // Suppress src2 implied by type since the 32-bit encoding uses an 674 // implicit VCC use. 675 let Ins32 = (ins VSrc_f32:$src0, Src1RC32:$src1); 676 677 let HasModifiers = 1; 678 679 // Select FP modifiers for VOP3 680 let Src0Mod = !if(!eq(Src0VT.Size, 16), FP16InputMods, FP32InputMods); 681 let Src1Mod = Src0Mod; 682 683 let HasSrc0IntMods = 0; 684 let HasSrc1IntMods = 0; 685 let HasSrc0FloatMods = 1; 686 let HasSrc1FloatMods = 1; 687 let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, SDWASrc_f32:$src0, 688 FP32SDWAInputMods:$src1_modifiers, SDWASrc_f32:$src1, 689 Clamp:$clamp, 690 dst_sel:$dst_sel, dst_unused:$dst_unused, 691 src0_sel:$src0_sel, src1_sel:$src1_sel); 692 693 let InsDPP = (ins DstRCDPP:$old, 694 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 695 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 696 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 697 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 698 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 699 let InsDPP8 = (ins DstRCDPP:$old, 700 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 701 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 702 dpp8:$dpp8, Dpp8FI:$fi); 703 704 let Src0ModVOP3DPP = FPVRegInputMods; 705 let Src1ModVOP3DPP = FP32VCSrcInputMods; 706 707 let HasExt = 1; 708 let HasExtDPP = 1; 709 let HasExt32BitDPP = 1; 710 let HasExtSDWA = 1; 711 let HasExtSDWA9 = 1; 712} 713 714def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; 715def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; 716// V_CNDMASK_B16 is VOP3 only 717def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> { 718 let IsTrue16 = 1; 719 let DstRC64 = getVALUDstForVT<DstVT>.ret; 720 721 let Src0Mod = getSrc0Mod<f16, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 722 let Src1Mod = getSrcMod<f16, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 723 724 let Src0VOP3DPP = VGPRSrc_32; 725 let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret; 726 let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 1/*IsFake16*/>.ret; 727 let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 1/*IsFake16*/>.ret; 728} 729 730def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { 731 let Outs32 = (outs SReg_32:$vdst); 732 let Outs64 = Outs32; 733 let Ins32 = (ins VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1); 734 let Ins64 = Ins32; 735 let Asm32 = " $vdst, $src0, $src1"; 736 let Asm64 = Asm32; 737 738 let HasExt = 0; 739 let HasExtDPP = 0; 740 let HasExt32BitDPP = 0; 741 let HasExt64BitDPP = 0; 742 let HasExtSDWA = 0; 743 let HasExtSDWA9 = 0; 744} 745 746def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 747 let Outs32 = (outs VGPR_32:$vdst); 748 let Outs64 = Outs32; 749 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 750 let Ins64 = Ins32; 751 let Asm32 = " $vdst, $src0, $src1"; 752 let Asm64 = Asm32; 753 let HasSrc2 = 0; 754 let HasSrc2Mods = 0; 755 756 let HasExt = 0; 757 let HasExtDPP = 0; 758 let HasExt32BitDPP = 0; 759 let HasExt64BitDPP = 0; 760 let HasExtSDWA = 0; 761 let HasExtSDWA9 = 0; 762} 763 764//===----------------------------------------------------------------------===// 765// VOP2 Instructions 766//===----------------------------------------------------------------------===// 767 768let SubtargetPredicate = isGFX11Plus in 769defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>; 770defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; 771let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in 772def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 773 774let isCommutable = 1 in { 775let isReMaterializable = 1 in { 776defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>; 777defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>; 778defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">; 779defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>; 780defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>; 781defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; 782defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; 783defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; 784defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; 785defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>; 786defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>; 787defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 788defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 789defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 790defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 791defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">; 792defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">; 793defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">; 794defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>; 795defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 796defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 797} // End isReMaterializable = 1 798 799let mayRaiseFPException = 0 in { 800let OtherPredicates = [HasMadMacF32Insts] in { 801let Constraints = "$vdst = $src2", DisableEncoding="$src2", 802 isConvertibleToThreeAddress = 1 in { 803defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 804 805let SubtargetPredicate = isGFX6GFX7GFX10 in 806defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>; 807} // End Constraints = "$vdst = $src2", DisableEncoding="$src2", 808 // isConvertibleToThreeAddress = 1 809 810let isReMaterializable = 1 in 811def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 812} // End OtherPredicates = [HasMadMacF32Insts] 813} // End mayRaiseFPException = 0 814 815// No patterns so that the scalar instructions are always selected. 816// The scalar versions will be replaced with vector when needed later. 817defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32">; 818defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32">; 819defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">; 820defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">; 821 822 823let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { 824 defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">; 825 defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">; 826} 827 828let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1, isAdd = 1 in { 829 defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32">; 830} 831 832let isAdd = 1 in { 833 defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32">; 834 defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32">; 835} 836 837} // End isCommutable = 1 838 839// These are special and do not read the exec mask. 840let isConvergent = 1, Uses = []<Register> in { 841def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, []>; 842let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 843def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, []>; 844} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in 845} // End isConvergent = 1 846 847foreach vt = Reg32Types.types in { 848 def : GCNPat<(vt (int_amdgcn_readlane vt:$src0, i32:$src1)), 849 (V_READLANE_B32 VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1) 850 >; 851 852 def : GCNPat<(vt (int_amdgcn_writelane vt:$src0, i32:$src1, vt:$src2)), 853 (V_WRITELANE_B32 SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$src2) 854 >; 855} 856 857let isReMaterializable = 1 in { 858defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; 859defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>; 860let IsNeverUniform = 1 in { 861defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; 862defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; 863} // End IsNeverUniform = 1 864defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>; 865 866let ReadsModeReg = 0, mayRaiseFPException = 0 in { 867defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; 868defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; 869} 870 871defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_V2F16_F32_F32, AMDGPUpkrtz_f16_f32>; 872defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_V2I16_I32_I32, AMDGPUpk_u16_u32>; 873defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_V2I16_I32_I32, AMDGPUpk_i16_i32>; 874 875 876let SubtargetPredicate = isGFX6GFX7 in { 877defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 878defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 879} // End SubtargetPredicate = isGFX6GFX7 880 881let isCommutable = 1 in { 882let SubtargetPredicate = isGFX6GFX7 in { 883defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>; 884defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>; 885defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>; 886} // End SubtargetPredicate = isGFX6GFX7 887} // End isCommutable = 1 888} // End isReMaterializable = 1 889 890defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 891 892class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 893 GCNPat< 894 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 895 !if(!cast<Commutable_REV>(Inst).IsOrig, 896 (Inst $src0, $src1), 897 (Inst $src1, $src0) 898 ) 899 >; 900 901class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 902 GCNPat< 903 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 904 !if(!cast<Commutable_REV>(Inst).IsOrig, 905 (Inst $src0, $src1, 0), 906 (Inst $src1, $src0, 0) 907 ) 908 >; 909 910def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>; 911def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>; 912def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>; 913 914let SubtargetPredicate = HasAddNoCarryInsts in { 915 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 916 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 917} 918 919let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 920def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>; 921def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>; 922} 923 924def : DivergentBinOp<adde, V_ADDC_U32_e32>; 925def : DivergentBinOp<sube, V_SUBB_U32_e32>; 926 927class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 928 GCNPat< 929 (DivergentBinFrag<Op> i64:$src0, i64:$src1), 930 (REG_SEQUENCE VReg_64, 931 (Inst 932 (i32 (EXTRACT_SUBREG $src0, sub0)), 933 (i32 (EXTRACT_SUBREG $src1, sub0)) 934 ), sub0, 935 (Inst 936 (i32 (EXTRACT_SUBREG $src0, sub1)), 937 (i32 (EXTRACT_SUBREG $src1, sub1)) 938 ), sub1 939 ) 940 >; 941 942def : divergent_i64_BinOp <and, V_AND_B32_e64>; 943def : divergent_i64_BinOp <or, V_OR_B32_e64>; 944def : divergent_i64_BinOp <xor, V_XOR_B32_e64>; 945 946// mul24 w/ 64 bit output. 947class mul24_64_Pat<SDPatternOperator Op, Instruction InstLo, Instruction InstHi> : GCNPat< 948 (i64 (Op i32:$src0, i32:$src1)), 949 (REG_SEQUENCE VReg_64, 950 (InstLo $src0, $src1), sub0, 951 (InstHi $src0, $src1), sub1) 952>; 953 954def : mul24_64_Pat<AMDGPUmul_i24, V_MUL_I32_I24_e64, V_MUL_HI_I32_I24_e64>; 955def : mul24_64_Pat<AMDGPUmul_u24, V_MUL_U32_U24_e64, V_MUL_HI_U32_U24_e64>; 956 957//===----------------------------------------------------------------------===// 958// 16-Bit Operand Instructions 959//===----------------------------------------------------------------------===// 960 961// The ldexp.f16 intrinsic expects a integer src1 operand, though the hardware 962// encoding treats src1 as an f16 963def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> { 964 let Src1Mod = Int32InputMods; 965 let Src1ModDPP = IntVRegInputMods; 966 let Src1ModVOP3DPP = IntVRegInputMods; 967 // SDWA sext is the only modifier allowed. 968 let HasSrc1IntMods = 1; 969 let HasSrc1FloatMods = 0; 970 let Src1ModSDWA = Int16SDWAInputMods; 971} 972def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> { 973 let Src1Mod = IntT16InputMods<0/*IsFake16*/>; 974 let Src1ModDPP = IntT16_Lo128VRegInputMods<0/*IsFake16*/>; 975 let Src1ModVOP3DPP = IntT16VCSrcInputMods<0/*IsFake16*/>; 976} 977def LDEXP_F16_VOPProfile_Fake16 : VOPProfile_Fake16<VOP_F16_F16_F16> { 978 let Src1Mod = Int32InputMods; 979 let Src1ModDPP = IntT16_Lo128VRegInputMods<1/*IsFake16*/>; 980 let Src1ModVOP3DPP = IntT16VCSrcInputMods<1/*IsFake16*/>; 981} 982 983let isReMaterializable = 1 in { 984let FPDPRounding = 1 in { 985 let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in 986 defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>; 987 let SubtargetPredicate = UseRealTrue16Insts in 988 defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>; 989 let SubtargetPredicate = UseFakeTrue16Insts in 990 defm V_LDEXP_F16_fake16 : VOP2Inst <"v_ldexp_f16_fake16", LDEXP_F16_VOPProfile_Fake16, null_frag, "v_ldexp_f16_fake16">; 991} // End FPDPRounding = 1 992defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; 993defm V_LSHRREV_B16 : VOP2Inst_e64_t16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; 994defm V_ASHRREV_I16 : VOP2Inst_e64_t16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; 995let isCommutable = 1 in { 996let FPDPRounding = 1 in { 997defm V_ADD_F16 : VOP2Inst_t16 <"v_add_f16", VOP_F16_F16_F16, any_fadd>; 998defm V_SUB_F16 : VOP2Inst_t16 <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; 999defm V_SUBREV_F16 : VOP2Inst_t16 <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 1000defm V_MUL_F16 : VOP2Inst_t16 <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; 1001} // End FPDPRounding = 1 1002defm V_MUL_LO_U16 : VOP2Inst_e64_t16 <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; 1003defm V_MAX_F16 : VOP2Inst_t16 <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 1004defm V_MIN_F16 : VOP2Inst_t16 <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 1005defm V_MAX_U16 : VOP2Inst_e64_t16 <"v_max_u16", VOP_I16_I16_I16, umax>; 1006defm V_MAX_I16 : VOP2Inst_e64_t16 <"v_max_i16", VOP_I16_I16_I16, smax>; 1007defm V_MIN_U16 : VOP2Inst_e64_t16 <"v_min_u16", VOP_I16_I16_I16, umin>; 1008defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>; 1009} // End isCommutable = 1 1010} // End isReMaterializable = 1 1011 1012class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat < 1013 (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), 1014 (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))), 1015 (inst $src0_modifiers, $src0, 1016 $src1_modifiers, $src1, 1017 $clamp, /* clamp */ 1018 $omod /* omod */) 1019>; 1020 1021let OtherPredicates = [NotHasTrue16BitInsts] in 1022def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>; 1023 1024class LDEXP_F16_t16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat < 1025 (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), 1026 (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))), 1027 (inst $src0_modifiers, $src0, 1028 $src1_modifiers, $src1, 1029 $clamp, /* clamp */ 1030 $omod, /* omod */ 1031 0) /* op_sel */ 1032>; 1033 1034let OtherPredicates = [UseRealTrue16Insts] in 1035def : LDEXP_F16_t16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>; 1036 1037let OtherPredicates = [UseFakeTrue16Insts] in 1038def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_fake16_e64>; 1039 1040let SubtargetPredicate = isGFX11Plus in { 1041 let isCommutable = 1 in { 1042 defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>; 1043 defm V_AND_B16_fake16 : VOP2Inst_e64 <"v_and_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>; 1044 defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>; 1045 defm V_OR_B16_fake16 : VOP2Inst_e64 <"v_or_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>; 1046 defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>; 1047 defm V_XOR_B16_fake16 : VOP2Inst_e64 <"v_xor_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>; 1048 } // End isCommutable = 1 1049} // End SubtargetPredicate = isGFX11Plus 1050 1051let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in { 1052let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { 1053 def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 1054} 1055let True16Predicate = UseRealTrue16Insts in { 1056 def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">; 1057} 1058let True16Predicate = UseFakeTrue16Insts in { 1059 def V_FMAMK_F16_fake16 : VOP2_Pseudo <"v_fmamk_f16_fake16", VOP_MADMK_F16_fake16, [], "">; 1060} 1061 1062let isCommutable = 1 in { 1063let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { 1064 def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 1065} 1066let True16Predicate = UseRealTrue16Insts in { 1067 def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">; 1068} 1069let True16Predicate = UseFakeTrue16Insts in { 1070 def V_FMAAK_F16_fake16 : VOP2_Pseudo <"v_fmaak_f16_fake16", VOP_MADAK_F16_fake16, [], "">; 1071} 1072} // End isCommutable = 1 1073} // End FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 1074 1075let Constraints = "$vdst = $src2", 1076 DisableEncoding="$src2", 1077 isConvertibleToThreeAddress = 1, 1078 isCommutable = 1 in { 1079let SubtargetPredicate = isGFX10Plus in { 1080let True16Predicate = NotHasTrue16BitInsts in { 1081 defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 1082} 1083let True16Predicate = UseRealTrue16Insts in { 1084 defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>; 1085} 1086let True16Predicate = UseFakeTrue16Insts in { 1087 defm V_FMAC_F16_fake16 : VOP2Inst <"v_fmac_f16_fake16", VOP_MAC_F16_fake16>; 1088} 1089} // End SubtargetPredicate = isGFX10Plus 1090} // End FMAC Constraints 1091 1092let SubtargetPredicate = Has16BitInsts in { 1093let isReMaterializable = 1 in { 1094let FPDPRounding = 1 in { 1095 def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 1096} // End FPDPRounding = 1 1097let isCommutable = 1 in { 1098let mayRaiseFPException = 0 in { 1099 def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 1100} 1101let SubtargetPredicate = isGFX8GFX9 in { 1102 defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; 1103 defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; 1104 defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; 1105} 1106} // End isCommutable = 1 1107} // End isReMaterializable = 1 1108 1109// FIXME: Missing FPDPRounding 1110let Constraints = "$vdst = $src2", DisableEncoding="$src2", 1111 isConvertibleToThreeAddress = 1, isCommutable = 1 in { 1112defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 1113} 1114} // End SubtargetPredicate = Has16BitInsts 1115 1116 1117let SubtargetPredicate = HasDLInsts in { 1118 1119let isReMaterializable = 1 in 1120defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>; 1121 1122def : GCNPat< 1123 (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))), 1124 (i32 (V_XNOR_B32_e64 $src0, $src1)) 1125>; 1126 1127def : GCNPat< 1128 (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)), 1129 (i32 (V_XNOR_B32_e64 $src0, $src1)) 1130>; 1131 1132def : GCNPat< 1133 (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))), 1134 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 1135 (i32 (EXTRACT_SUBREG $src0, sub0)), 1136 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 1137 (i32 (V_XNOR_B32_e64 1138 (i32 (EXTRACT_SUBREG $src0, sub1)), 1139 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 1140>; 1141 1142def : GCNPat< 1143 (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)), 1144 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 1145 (i32 (EXTRACT_SUBREG $src0, sub0)), 1146 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 1147 (i32 (V_XNOR_B32_e64 1148 (i32 (EXTRACT_SUBREG $src0, sub1)), 1149 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 1150>; 1151 1152let Constraints = "$vdst = $src2", 1153 DisableEncoding = "$src2", 1154 isConvertibleToThreeAddress = 1, 1155 isCommutable = 1 in 1156defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">; 1157} // End SubtargetPredicate = HasDLInsts 1158 1159let SubtargetPredicate = HasFmaLegacy32 in { 1160 1161let Constraints = "$vdst = $src2", 1162 DisableEncoding = "$src2", 1163 isConvertibleToThreeAddress = 1, 1164 isCommutable = 1 in 1165defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>; 1166 1167} // End SubtargetPredicate = HasFmaLegacy32 1168 1169let SubtargetPredicate = HasFmacF64Inst, 1170 Constraints = "$vdst = $src2", 1171 DisableEncoding="$src2", 1172 isConvertibleToThreeAddress = 1, 1173 isCommutable = 1, 1174 SchedRW = [WriteDoubleAdd] in 1175defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>; 1176 1177let Constraints = "$vdst = $src2", 1178 DisableEncoding="$src2", 1179 isConvertibleToThreeAddress = 1, 1180 isCommutable = 1, 1181 IsDOT = 1 in { 1182 let SubtargetPredicate = HasDot5Insts in 1183 defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">; 1184 let SubtargetPredicate = HasDot6Insts in 1185 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 1186 1187 let SubtargetPredicate = HasDot4Insts in 1188 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 1189 let SubtargetPredicate = HasDot3Insts in 1190 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 1191 1192 let SubtargetPredicate = HasDot13Insts in 1193 defm V_DOT2C_F32_BF16 : VOP2Inst_VOPD<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16, 0xd, "v_dot2acc_f32_bf16">; 1194} 1195 1196let AddedComplexity = 30 in { 1197 def : GCNPat< 1198 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 1199 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 1200 > { 1201 let SubtargetPredicate = HasDot5Insts; 1202 } 1203 def : GCNPat< 1204 (f32 (int_amdgcn_fdot2_f32_bf16 v2bf16:$src0, v2bf16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 1205 (f32 (V_DOT2C_F32_BF16_e32 $src0, $src1, $src2)) 1206 > { 1207 let SubtargetPredicate = HasDot13Insts; 1208 } 1209 def : GCNPat< 1210 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1211 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 1212 > { 1213 let SubtargetPredicate = HasDot6Insts; 1214 } 1215 def : GCNPat< 1216 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1217 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 1218 > { 1219 let SubtargetPredicate = HasDot4Insts; 1220 } 1221 def : GCNPat< 1222 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1223 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 1224 > { 1225 let SubtargetPredicate = HasDot3Insts; 1226 } 1227} // End AddedComplexity = 30 1228 1229let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 in { 1230def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">; 1231 1232let isCommutable = 1 in 1233def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; 1234} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 1235 1236let SubtargetPredicate = HasPkFmacF16Inst in { 1237defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 1238} // End SubtargetPredicate = HasPkFmacF16Inst 1239 1240// Note: 16-bit instructions produce a 0 result in the high 16-bits 1241// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 1242multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> { 1243 1244def : GCNPat< 1245 (i32 (zext (op i16:$src0, i16:$src1))), 1246 (inst VSrc_b16:$src0, VSrc_b16:$src1) 1247>; 1248 1249def : GCNPat< 1250 (i64 (zext (op i16:$src0, i16:$src1))), 1251 (REG_SEQUENCE VReg_64, 1252 (inst $src0, $src1), sub0, 1253 (V_MOV_B32_e32 (i32 0)), sub1) 1254>; 1255} 1256 1257class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 1258 (i16 (ext i1:$src)), 1259 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 1260 (i32 0/*src1mod*/), (i32 1/*src1*/), 1261 $src) 1262>; 1263 1264foreach vt = [i16, v2i16] in { 1265def : GCNPat < 1266 (and vt:$src0, vt:$src1), 1267 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1268>; 1269 1270def : GCNPat < 1271 (or vt:$src0, vt:$src1), 1272 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1273>; 1274 1275def : GCNPat < 1276 (xor vt:$src0, vt:$src1), 1277 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1278>; 1279} 1280 1281let Predicates = [Has16BitInsts, isGFX8GFX9] in { 1282 1283// Undo sub x, c -> add x, -c canonicalization since c is more likely 1284// an inline immediate than -c. 1285// TODO: Also do for 64-bit. 1286def : GCNPat< 1287 (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), 1288 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1289>; 1290 1291def : GCNPat< 1292 (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), 1293 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1294>; 1295 1296defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>; 1297defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>; 1298defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>; 1299defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>; 1300defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>; 1301defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>; 1302defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>; 1303defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>; 1304defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>; 1305defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>; 1306 1307} // End Predicates = [Has16BitInsts, isGFX8GFX9] 1308 1309let Predicates = [Has16BitInsts] in { 1310 1311def : ZExt_i16_i1_Pat<zext>; 1312def : ZExt_i16_i1_Pat<anyext>; 1313 1314def : GCNPat < 1315 (i16 (sext i1:$src)), 1316 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 1317 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 1318>; 1319 1320} // End Predicates = [Has16BitInsts] 1321 1322 1323let SubtargetPredicate = HasIntClamp in { 1324// Set clamp bit for saturation. 1325def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>; 1326def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>; 1327} 1328 1329let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in { 1330let AddedComplexity = 1 in { // Prefer over form with carry-out. 1331def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>; 1332def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>; 1333} 1334} 1335 1336let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in { 1337def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>; 1338def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>; 1339} 1340 1341let SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 in { 1342 let SchedRW = [WriteDoubleAdd], isCommutable = 1 in { 1343 let FPDPRounding = 1 in { 1344 defm V_ADD_F64_pseudo : VOP2Inst <"v_add_f64_pseudo", VOP_F64_F64_F64, any_fadd>; 1345 defm V_MUL_F64_pseudo : VOP2Inst <"v_mul_f64_pseudo", VOP_F64_F64_F64, fmul>; 1346 } // End FPDPRounding = 1 1347 defm V_MIN_NUM_F64 : VOP2Inst <"v_min_num_f64", VOP_F64_F64_F64, fminnum_like>; 1348 defm V_MAX_NUM_F64 : VOP2Inst <"v_max_num_f64", VOP_F64_F64_F64, fmaxnum_like>; 1349 } // End SchedRW = [WriteDoubleAdd], isCommutable = 1 1350 let SchedRW = [Write64Bit] in { 1351 defm V_LSHLREV_B64_pseudo : VOP2Inst <"v_lshlrev_b64_pseudo", VOP_I64_I32_I64, clshl_rev_64>; 1352 } // End SchedRW = [Write64Bit] 1353} // End SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 1354 1355//===----------------------------------------------------------------------===// 1356// DPP Encodings 1357//===----------------------------------------------------------------------===// 1358 1359class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps, 1360 string opName = ps.OpName, VOPProfile p = ps.Pfl, 1361 bit IsDPP16 = 0> : 1362 VOP_DPP<opName, p, IsDPP16> { 1363 let hasSideEffects = ps.hasSideEffects; 1364 let Defs = ps.Defs; 1365 let SchedRW = ps.SchedRW; 1366 let Uses = ps.Uses; 1367 1368 bits<8> vdst; 1369 bits<8> src1; 1370 let Inst{8-0} = 0xfa; 1371 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1372 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1373 let Inst{30-25} = op; 1374 let Inst{31} = 0x0; 1375} 1376 1377class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 1378 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1379 VOP2_DPP<op, ps, opName, p, 1> { 1380 let AssemblerPredicate = HasDPP16; 1381 let SubtargetPredicate = ps.SubtargetPredicate; 1382 let OtherPredicates = ps.OtherPredicates; 1383} 1384 1385class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget, 1386 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1387 Base_VOP2_DPP16<op, ps, opName, p>, 1388 SIMCInstr <ps.PseudoInstr, subtarget>; 1389 1390class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen, 1391 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1392 VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> { 1393 let AssemblerPredicate = Gen.AssemblerPredicate; 1394 let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); 1395 let DecoderNamespace = Gen.DecoderNamespace# 1396 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 1397} 1398 1399class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 1400 VOPProfile p = ps.Pfl> : 1401 VOP_DPP8<ps.OpName, p> { 1402 let hasSideEffects = ps.hasSideEffects; 1403 let Defs = ps.Defs; 1404 let SchedRW = ps.SchedRW; 1405 let Uses = ps.Uses; 1406 1407 bits<8> vdst; 1408 bits<8> src1; 1409 1410 let Inst{8-0} = fi; 1411 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1412 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1413 let Inst{30-25} = op; 1414 let Inst{31} = 0x0; 1415 1416 let SubtargetPredicate = ps.SubtargetPredicate; 1417 let OtherPredicates = ps.OtherPredicates; 1418} 1419 1420class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen, 1421 VOPProfile p = ps.Pfl> : 1422 VOP2_DPP8<op, ps, p> { 1423 let AssemblerPredicate = Gen.AssemblerPredicate; 1424 let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); 1425 let DecoderNamespace = Gen.DecoderNamespace# 1426 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 1427} 1428 1429//===----------------------------------------------------------------------===// 1430// GFX11, GFX12 1431//===----------------------------------------------------------------------===// 1432 1433//===------------------------------- VOP2 -------------------------------===// 1434multiclass VOP2Only_Real_MADK<GFXGen Gen, bits<6> op> { 1435 def Gen.Suffix : 1436 VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME), Gen>, 1437 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1438} 1439 1440multiclass VOP2Only_Real_MADK_with_name<GFXGen Gen, bits<6> op, string asmName, 1441 string opName = NAME> { 1442 def Gen.Suffix : 1443 VOP2_Real_Gen<!cast<VOP2_Pseudo>(opName), Gen>, 1444 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1445 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1446 let AsmString = asmName # ps.AsmOperands; 1447 } 1448} 1449 1450multiclass VOP2_Real_e32<GFXGen Gen, bits<6> op> { 1451 def _e32#Gen.Suffix : 1452 VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME#"_e32"), Gen>, 1453 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1454} 1455 1456multiclass VOP2Only_Real_e32<GFXGen Gen, bits<6> op> { 1457 let IsSingle = 1 in 1458 defm NAME: VOP2_Real_e32<Gen, op>; 1459} 1460 1461multiclass VOP2_Real_e64<GFXGen Gen, bits<6> op> { 1462 def _e64#Gen.Suffix : 1463 VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>, 1464 VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1465} 1466 1467multiclass VOP2_Real_dpp<GFXGen Gen, bits<6> op> { 1468 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1469 def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), Gen>; 1470} 1471 1472multiclass VOP2_Real_dpp8<GFXGen Gen, bits<6> op> { 1473 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1474 def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(NAME#"_e32"), Gen>; 1475} 1476 1477//===------------------------- VOP2 (with name) -------------------------===// 1478multiclass VOP2_Real_e32_with_name<GFXGen Gen, bits<6> op, string opName, 1479 string asmName, bit single = 0> { 1480 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1481 def _e32#Gen.Suffix : 1482 VOP2_Real_Gen<ps, Gen, asmName>, 1483 VOP2e<op{5-0}, ps.Pfl> { 1484 let AsmString = asmName # ps.AsmOperands; 1485 let IsSingle = single; 1486 } 1487} 1488multiclass VOP2_Real_e64_with_name<GFXGen Gen, bits<6> op, string opName, 1489 string asmName> { 1490 defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1491 def _e64#Gen.Suffix : 1492 VOP3_Real_Gen<ps, Gen>, 1493 VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, ps.Pfl> { 1494 let AsmString = asmName # ps.AsmOperands; 1495 } 1496} 1497 1498multiclass VOP2_Real_dpp_with_name<GFXGen Gen, bits<6> op, string opName, 1499 string asmName> { 1500 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1501 if ps.Pfl.HasExtDPP then 1502 def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen> { 1503 let AsmString = asmName # ps.Pfl.AsmDPP16; 1504 } 1505} 1506multiclass VOP2_Real_dpp8_with_name<GFXGen Gen, bits<6> op, string opName, 1507 string asmName> { 1508 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1509 if ps.Pfl.HasExtDPP then 1510 def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, ps, Gen> { 1511 let AsmString = asmName # ps.Pfl.AsmDPP8; 1512 } 1513} 1514 1515//===------------------------------ VOP2be ------------------------------===// 1516multiclass VOP2be_Real_e32<GFXGen Gen, bits<6> op, string opName, string asmName> { 1517 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1518 def _e32#Gen.Suffix : 1519 VOP2_Real_Gen<ps, Gen>, 1520 VOP2e<op{5-0}, ps.Pfl> { 1521 let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands); 1522 } 1523} 1524multiclass VOP2be_Real_dpp<GFXGen Gen, bits<6> op, string opName, string asmName> { 1525 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1526 def _dpp#Gen.Suffix : 1527 VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen, asmName> { 1528 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1529 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1530 } 1531 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1532 def _dpp_w32#Gen.Suffix : 1533 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1534 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1535 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1536 let isAsmParserOnly = 1; 1537 let WaveSizePredicate = isWave32; 1538 let AssemblerPredicate = Gen.AssemblerPredicate; 1539 let DecoderNamespace = Gen.DecoderNamespace; 1540 } 1541 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1542 def _dpp_w64#Gen.Suffix : 1543 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1544 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1545 let AsmString = asmName # AsmDPP; 1546 let isAsmParserOnly = 1; 1547 let WaveSizePredicate = isWave64; 1548 let AssemblerPredicate = Gen.AssemblerPredicate; 1549 let DecoderNamespace = Gen.DecoderNamespace; 1550 } 1551} 1552multiclass VOP2be_Real_dpp8<GFXGen Gen, bits<6> op, string opName, string asmName> { 1553 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1554 def _dpp8#Gen.Suffix : 1555 VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(opName#"_e32"), Gen> { 1556 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1557 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1558 } 1559 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1560 def _dpp8_w32#Gen.Suffix : 1561 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1562 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1563 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1564 let isAsmParserOnly = 1; 1565 let WaveSizePredicate = isWave32; 1566 let AssemblerPredicate = Gen.AssemblerPredicate; 1567 let DecoderNamespace = Gen.DecoderNamespace; 1568 } 1569 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1570 def _dpp8_w64#Gen.Suffix : 1571 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1572 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1573 let AsmString = asmName # AsmDPP8; 1574 let isAsmParserOnly = 1; 1575 let WaveSizePredicate = isWave64; 1576 let AssemblerPredicate = Gen.AssemblerPredicate; 1577 let DecoderNamespace = Gen.DecoderNamespace; 1578 } 1579} 1580 1581// We don't want to override separate decoderNamespaces within these 1582multiclass VOP2_Realtriple_e64<GFXGen Gen, bits<6> op> : 1583 VOP3_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME>; 1584 1585multiclass VOP2_Realtriple_e64_with_name<GFXGen Gen, bits<6> op, string opName, 1586 string asmName> { 1587 defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 0, 0, op{5-0}}, opName, asmName> ; 1588} 1589 1590multiclass VOP2be_Real<GFXGen Gen, bits<6> op, string opName, string asmName> : 1591 VOP2be_Real_e32<Gen, op, opName, asmName>, 1592 VOP3be_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>, 1593 VOP2be_Real_dpp<Gen, op, opName, asmName>, 1594 VOP2be_Real_dpp8<Gen, op, opName, asmName>; 1595 1596// Only for CNDMASK 1597multiclass VOP2e_Real<GFXGen Gen, bits<6> op, string opName, string asmName> : 1598 VOP2_Real_e32<Gen, op>, 1599 VOP2_Realtriple_e64<Gen, op>, 1600 VOP2be_Real_dpp<Gen, op, opName, asmName>, 1601 VOP2be_Real_dpp8<Gen, op, opName, asmName>; 1602 1603multiclass VOP2Only_Real<GFXGen Gen, bits<6> op> : 1604 VOP2Only_Real_e32<Gen, op>, 1605 VOP2_Real_dpp<Gen, op>, 1606 VOP2_Real_dpp8<Gen, op>; 1607 1608multiclass VOP2_Real_FULL<GFXGen Gen, bits<6> op> : 1609 VOP2_Realtriple_e64<Gen, op>, 1610 VOP2_Real_e32<Gen, op>, 1611 VOP2_Real_dpp<Gen, op>, 1612 VOP2_Real_dpp8<Gen, op>; 1613 1614multiclass VOP2_Real_NO_VOP3_with_name<GFXGen Gen, bits<6> op, string opName, 1615 string asmName, bit isSingle = 0> { 1616 defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName, isSingle>, 1617 VOP2_Real_dpp_with_name<Gen, op, opName, asmName>, 1618 VOP2_Real_dpp8_with_name<Gen, op, opName, asmName>; 1619 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1620 def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 1621 let AssemblerPredicate = Gen.AssemblerPredicate; 1622 } 1623} 1624 1625multiclass VOP2_Real_FULL_with_name<GFXGen Gen, bits<6> op, string opName, 1626 string asmName> : 1627 VOP2_Realtriple_e64_with_name<Gen, op, opName, asmName>, 1628 VOP2_Real_NO_VOP3_with_name<Gen, op, opName, asmName>; 1629 1630multiclass VOP2_Real_NO_DPP_with_name<GFXGen Gen, bits<6> op, string opName, 1631 string asmName> { 1632 defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName>, 1633 VOP2_Real_e64_with_name<Gen, op, opName, asmName>; 1634 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1635 def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 1636 let AssemblerPredicate = Gen.AssemblerPredicate; 1637 } 1638} 1639 1640multiclass VOP2_Real_NO_DPP_with_alias<GFXGen Gen, bits<6> op, string alias> { 1641 defm NAME : VOP2_Real_e32<Gen, op>, 1642 VOP2_Real_e64<Gen, op>; 1643 def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<alias, NAME> { 1644 let AssemblerPredicate = Gen.AssemblerPredicate; 1645 } 1646} 1647 1648//===----------------------------------------------------------------------===// 1649// GFX12. 1650//===----------------------------------------------------------------------===// 1651 1652multiclass VOP2be_Real_gfx12<bits<6> op, string opName, string asmName> : 1653 VOP2be_Real<GFX12Gen, op, opName, asmName>; 1654 1655// Only for CNDMASK 1656multiclass VOP2e_Real_gfx12<bits<6> op, string opName, string asmName> : 1657 VOP2e_Real<GFX12Gen, op, opName, asmName>; 1658 1659multiclass VOP2_Real_FULL_with_name_gfx12<bits<6> op, string opName, 1660 string asmName> : 1661 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1662 1663multiclass VOP2_Real_FULL_t16_gfx12<bits<6> op, string opName, 1664 string asmName, string alias> { 1665 defm NAME : VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1666 def _gfx12_2nd_alias : AMDGPUMnemonicAlias<alias, asmName> { 1667 let AssemblerPredicate = isGFX12Only; 1668 } 1669} 1670 1671multiclass VOP2_Real_FULL_t16_and_fake16_gfx12<bits<6> op, string opName, 1672 string asmName, string alias> { 1673 defm _t16: VOP2_Real_FULL_t16_gfx12<op, opName#"_t16", asmName, alias>; 1674 defm _fake16: VOP2_Real_FULL_t16_gfx12<op, opName#"_fake16", asmName, alias>; 1675} 1676 1677multiclass VOP2_Real_NO_DPP_with_name_gfx12<bits<6> op, string opName, 1678 string asmName> : 1679 VOP2_Real_NO_DPP_with_name<GFX12Gen, op, opName, asmName>; 1680 1681multiclass VOP2_Real_NO_DPP_with_alias_gfx12<bits<6> op, string alias> : 1682 VOP2_Real_NO_DPP_with_alias<GFX12Gen, op, alias>; 1683 1684defm V_ADD_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x002, "V_ADD_F64_pseudo", "v_add_f64">; 1685defm V_MUL_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x006, "V_MUL_F64_pseudo", "v_mul_f64">; 1686defm V_LSHLREV_B64 : VOP2_Real_NO_DPP_with_name_gfx12<0x01f, "V_LSHLREV_B64_pseudo", "v_lshlrev_b64">; 1687defm V_MIN_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00d, "v_min_f64">; 1688defm V_MAX_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00e, "v_max_f64">; 1689 1690defm V_CNDMASK_B32 : VOP2e_Real_gfx12<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 1691defm V_ADD_CO_CI_U32 : 1692 VOP2be_Real_gfx12<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; 1693defm V_SUB_CO_CI_U32 : 1694 VOP2be_Real_gfx12<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; 1695defm V_SUBREV_CO_CI_U32 : 1696 VOP2be_Real_gfx12<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1697 1698defm V_MIN_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x015, "V_MIN_F32", "v_min_num_f32">; 1699defm V_MAX_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x016, "V_MAX_F32", "v_max_num_f32">; 1700defm V_MIN_NUM_F16 : VOP2_Real_FULL_t16_and_fake16_gfx12<0x030, "V_MIN_F16", "v_min_num_f16", "v_min_f16">; 1701defm V_MAX_NUM_F16 : VOP2_Real_FULL_t16_and_fake16_gfx12<0x031, "V_MAX_F16", "v_max_num_f16", "v_max_f16">; 1702 1703let SubtargetPredicate = isGFX12Plus in { 1704 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx12>; 1705 1706 defm : VOP2bInstAliases< 1707 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx12, "v_add_co_ci_u32">; 1708 defm : VOP2bInstAliases< 1709 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx12, "v_sub_co_ci_u32">; 1710 defm : VOP2bInstAliases< 1711 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">; 1712} // End SubtargetPredicate = isGFX12Plus 1713 1714//===----------------------------------------------------------------------===// 1715// GFX11. 1716//===----------------------------------------------------------------------===// 1717 1718multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> : 1719 VOP2be_Real<GFX11Gen, op, opName, asmName>; 1720 1721// Only for CNDMASK 1722multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> : 1723 VOP2e_Real<GFX11Gen, op, opName, asmName>; 1724 1725multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName, 1726 string asmName, bit isSingle = 0> { 1727 defm NAME : VOP2_Real_e32_with_name<GFX11Gen, op, opName, asmName, isSingle>, 1728 VOP2_Real_dpp_with_name<GFX11Gen, op, opName, asmName>, 1729 VOP2_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>; 1730 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1731 def _gfx11_alias : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 1732 let AssemblerPredicate = isGFX11Only; 1733 } 1734} 1735 1736multiclass VOP2_Real_FULL_t16_gfx11<bits<6> op, string asmName, string opName = NAME> : 1737 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>; 1738 1739multiclass VOP2_Real_FULL_t16_and_fake16_gfx11<bits<6> op, string asmName, string opName = NAME> { 1740 defm opName#"_t16": VOP2_Real_FULL_t16_gfx11<op, asmName, opName#"_t16">; 1741 defm opName#"_fake16": VOP2_Real_FULL_t16_gfx11<op, asmName, opName#"_fake16">; 1742} 1743 1744multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName, 1745 string asmName> : 1746 VOP2_Real_NO_DPP_with_name<GFX11Gen, op, opName, asmName>; 1747 1748multiclass VOP2_Real_FULL_gfx11_gfx12<bits<6> op> : 1749 VOP2_Real_FULL<GFX11Gen, op>, VOP2_Real_FULL<GFX12Gen, op>; 1750 1751multiclass VOP2_Real_FULL_with_name_gfx11_gfx12<bits<6> op, string opName, 1752 string asmName> : 1753 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 1754 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1755 1756multiclass VOP2_Real_e32_gfx11_gfx12<bits<6> op> : 1757 VOP2Only_Real<GFX11Gen, op>, VOP2Only_Real<GFX12Gen, op>; 1758 1759multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> : 1760 VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>; 1761 1762multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> : 1763 VOP3_Realtriple_t16_gfx11<op, asmName, OpName, "", /*IsSingle*/1>, 1764 VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", /*IsSingle*/1>; 1765 1766multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> { 1767 defm _t16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">; 1768 defm _fake16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_fake16">; 1769} 1770 1771multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> : 1772 VOP3beOnly_Realtriple<GFX11Gen, op>, VOP3beOnly_Realtriple<GFX12Gen, op>; 1773 1774multiclass VOP2Only_Real_MADK_t16_gfx11_gfx12<bits<6> op, string asmName, 1775 string opName = NAME> : 1776 VOP2Only_Real_MADK_with_name<GFX11Gen, op, asmName, opName>, 1777 VOP2Only_Real_MADK_with_name<GFX12Gen, op, asmName, opName>; 1778 1779multiclass VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmName, 1780 string opName = NAME> { 1781 defm _t16: VOP2Only_Real_MADK_t16_gfx11_gfx12<op, asmName, opName#"_t16">; 1782 defm _fake16: VOP2Only_Real_MADK_t16_gfx11_gfx12<op, asmName, opName#"_fake16">; 1783} 1784 1785multiclass VOP2_Real_FULL_t16_gfx11_gfx12<bits<6> op, string asmName, 1786 string opName = NAME> : 1787 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 1788 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1789 1790multiclass VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmName, 1791 string opName = NAME> { 1792 defm _t16: VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_t16">; 1793 defm _fake16: VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_fake16">; 1794} 1795 1796multiclass VOP2_Real_FULL_gfx11<bits<6> op> : 1797 VOP2_Real_FULL<GFX11Gen, op>; 1798 1799defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32", 1800 "v_cndmask_b32">; 1801defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002, 1802 "V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>; 1803defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006, 1804 "V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">; 1805defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x007, 1806 "V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">; 1807defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11_gfx12<0x018>; 1808defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11_gfx12<0x019>; 1809defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11_gfx12<0x01a>; 1810defm V_ADD_CO_CI_U32 : 1811 VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; 1812defm V_SUB_CO_CI_U32 : 1813 VOP2be_Real_gfx11<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; 1814defm V_SUBREV_CO_CI_U32 : 1815 VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1816 1817defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x02f, 1818 "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">; 1819defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx11_gfx12<0x03c>; 1820 1821defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x032, "v_add_f16">; 1822defm V_ADD_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x032, "v_add_f16">; 1823defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x033, "v_sub_f16">; 1824defm V_SUB_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x033, "v_sub_f16">; 1825defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16">; 1826defm V_SUBREV_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16">; 1827defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">; 1828defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">; 1829defm V_FMAC_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x036, "v_fmac_f16">; 1830defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">; 1831defm V_LDEXP_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">; 1832defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; 1833defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; 1834defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; 1835defm V_MIN_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; 1836defm V_FMAMK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x037, "v_fmamk_f16">; 1837defm V_FMAAK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x038, "v_fmaak_f16">; 1838 1839// VOP3 only. 1840defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11_gfx12<0x25d>; 1841defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11_gfx12<0x31c>; 1842defm V_BFM_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31d>; 1843defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31e>; 1844defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31f>; 1845defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x320>; 1846defm V_CVT_PK_NORM_I16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x321, "V_CVT_PKNORM_I16_F32", "v_cvt_pk_norm_i16_f32">; 1847defm V_CVT_PK_NORM_U16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x322, "V_CVT_PKNORM_U16_F32", "v_cvt_pk_norm_u16_f32">; 1848defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11_gfx12<0x323>; 1849defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11_gfx12<0x324>; 1850defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x300>; 1851defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x301>; 1852defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x302>; 1853 1854let SubtargetPredicate = isGFX11Only in { 1855 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>; 1856 1857 defm : VOP2bInstAliases< 1858 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx11, "v_add_co_ci_u32">; 1859 defm : VOP2bInstAliases< 1860 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">; 1861 defm : VOP2bInstAliases< 1862 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">; 1863} // End SubtargetPredicate = isGFX11Only 1864 1865//===----------------------------------------------------------------------===// 1866// GFX10. 1867//===----------------------------------------------------------------------===// 1868 1869let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 1870 //===------------------------------- VOP2 -------------------------------===// 1871 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 1872 def _gfx10 : 1873 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 1874 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1875 } 1876 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 1877 string asmName> { 1878 def _gfx10 : 1879 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 1880 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1881 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1882 let AsmString = asmName # ps.AsmOperands; 1883 } 1884 } 1885 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 1886 def _e32_gfx10 : 1887 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 1888 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1889 } 1890 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 1891 def _e64_gfx10 : 1892 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1893 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1894 } 1895 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 1896 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1897 def _sdwa_gfx10 : 1898 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1899 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1900 } 1901 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 1902 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1903 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10>; 1904 } 1905 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 1906 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1907 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")>; 1908 } 1909 1910 //===------------------------- VOP2 (with name) -------------------------===// 1911 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 1912 string asmName> { 1913 def _e32_gfx10 : 1914 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1915 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1916 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1917 let AsmString = asmName # ps.AsmOperands; 1918 } 1919 } 1920 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 1921 string asmName> { 1922 def _e64_gfx10 : 1923 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1924 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 1925 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1926 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1927 let AsmString = asmName # ps.AsmOperands; 1928 } 1929 } 1930 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 1931 string asmName> { 1932 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1933 def _sdwa_gfx10 : 1934 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1935 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1936 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1937 let AsmString = asmName # ps.AsmOperands; 1938 } 1939 } 1940 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 1941 string asmName> { 1942 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1943 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> { 1944 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1945 let AsmString = asmName # ps.Pfl.AsmDPP16; 1946 } 1947 } 1948 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 1949 string asmName> { 1950 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1951 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1952 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1953 let AsmString = asmName # ps.Pfl.AsmDPP8; 1954 } 1955 } 1956 1957 //===------------------------------ VOP2be ------------------------------===// 1958 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> { 1959 def _e32_gfx10 : 1960 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1961 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1962 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1963 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1964 } 1965 } 1966 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> { 1967 def _e64_gfx10 : 1968 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1969 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 1970 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1971 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1972 let AsmString = asmName # Ps.AsmOperands; 1973 } 1974 } 1975 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> { 1976 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1977 def _sdwa_gfx10 : 1978 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1979 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1980 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1981 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1982 } 1983 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1984 def _sdwa_w32_gfx10 : 1985 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1986 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1987 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1988 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 1989 let isAsmParserOnly = 1; 1990 let WaveSizePredicate = isWave32; 1991 } 1992 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1993 def _sdwa_w64_gfx10 : 1994 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1995 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1996 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1997 let AsmString = asmName # Ps.AsmOperands; 1998 let isAsmParserOnly = 1; 1999 let WaveSizePredicate = isWave64; 2000 } 2001 } 2002 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> { 2003 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2004 def _dpp_gfx10 : 2005 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> { 2006 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 2007 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 2008 } 2009 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2010 def _dpp_w32_gfx10 : 2011 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 2012 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 2013 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 2014 let isAsmParserOnly = 1; 2015 let WaveSizePredicate = isWave32; 2016 } 2017 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2018 def _dpp_w64_gfx10 : 2019 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 2020 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 2021 let AsmString = asmName # AsmDPP; 2022 let isAsmParserOnly = 1; 2023 let WaveSizePredicate = isWave64; 2024 } 2025 } 2026 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> { 2027 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2028 def _dpp8_gfx10 : 2029 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 2030 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 2031 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 2032 } 2033 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2034 def _dpp8_w32_gfx10 : 2035 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 2036 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 2037 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 2038 let isAsmParserOnly = 1; 2039 let WaveSizePredicate = isWave32; 2040 } 2041 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2042 def _dpp8_w64_gfx10 : 2043 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 2044 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 2045 let AsmString = asmName # AsmDPP8; 2046 let isAsmParserOnly = 1; 2047 let WaveSizePredicate = isWave64; 2048 } 2049 } 2050 2051 //===----------------------------- VOP3Only -----------------------------===// 2052 multiclass VOP3Only_Real_gfx10<bits<10> op> { 2053 def _e64_gfx10 : 2054 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 2055 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2056 let IsSingle = 1; 2057 } 2058 } 2059 2060 //===---------------------------- VOP3beOnly ----------------------------===// 2061 multiclass VOP3beOnly_Real_gfx10<bits<10> op> { 2062 def _e64_gfx10 : 2063 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 2064 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2065 let IsSingle = 1; 2066 } 2067 } 2068} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 2069 2070multiclass VOP2Only_Real_MADK_gfx10_gfx11<bits<6> op> : 2071 VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK<GFX11Gen, op>; 2072 2073multiclass VOP2Only_Real_MADK_gfx10_gfx11_gfx12<bits<6> op> : 2074 VOP2Only_Real_MADK_gfx10_gfx11<op>, VOP2Only_Real_MADK<GFX12Gen, op>; 2075 2076multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> : 2077 VOP2be_Real_e32_gfx10<op, opName, asmName>, 2078 VOP2be_Real_e64_gfx10<op, opName, asmName>, 2079 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 2080 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 2081 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 2082 2083multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> : 2084 VOP2_Real_e32_gfx10<op>, 2085 VOP2_Real_e64_gfx10<op>, 2086 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 2087 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 2088 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 2089 2090multiclass VOP2_Real_gfx10<bits<6> op> : 2091 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 2092 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 2093 2094multiclass VOP2_Real_gfx10_gfx11<bits<6> op> : 2095 VOP2_Real_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>; 2096 2097multiclass VOP2_Real_gfx10_gfx11_gfx12<bits<6> op> : 2098 VOP2_Real_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>; 2099 2100multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName, 2101 string asmName> : 2102 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 2103 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 2104 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 2105 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 2106 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 2107 2108multiclass VOP2_Real_with_name_gfx10_gfx11_gfx12<bits<6> op, string opName, 2109 string asmName> : 2110 VOP2_Real_with_name_gfx10<op, opName, asmName>, 2111 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 2112 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 2113 2114// NB: Same opcode as v_mac_legacy_f32 2115let DecoderNamespace = "GFX10_B" in 2116defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>; 2117 2118defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11_gfx12<0x01e>; 2119defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11_gfx12<0x02b>; 2120defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02c>; 2121defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02d>; 2122defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; 2123defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; 2124defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; 2125defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; 2126defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; 2127defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; 2128defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; 2129defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; 2130defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; 2131defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; 2132 2133let IsSingle = 1 in { 2134 defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 2135} 2136 2137// VOP2 no carry-in, carry-out. 2138defm V_ADD_NC_U32 : 2139 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x025, "V_ADD_U32", "v_add_nc_u32">; 2140defm V_SUB_NC_U32 : 2141 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x026, "V_SUB_U32", "v_sub_nc_u32">; 2142defm V_SUBREV_NC_U32 : 2143 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 2144 2145// VOP2 carry-in, carry-out. 2146defm V_ADD_CO_CI_U32 : 2147 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 2148defm V_SUB_CO_CI_U32 : 2149 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 2150defm V_SUBREV_CO_CI_U32 : 2151 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 2152 2153defm V_CNDMASK_B32 : 2154 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 2155 2156// VOP3 only. 2157defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 2158defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 2159defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 2160defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 2161defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 2162defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 2163defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 2164defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 2165defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 2166 2167// VOP3 carry-out. 2168defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>; 2169defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>; 2170defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>; 2171 2172let SubtargetPredicate = isGFX10Only in { 2173 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 2174 2175 defm : VOP2bInstAliases< 2176 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 2177 defm : VOP2bInstAliases< 2178 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 2179 defm : VOP2bInstAliases< 2180 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 2181} // End SubtargetPredicate = isGFX10Only 2182 2183//===----------------------------------------------------------------------===// 2184// GFX6, GFX7, GFX10, GFX11 2185//===----------------------------------------------------------------------===// 2186 2187class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 2188 VOP_DPPe <P> { 2189 bits<8> vdst; 2190 bits<8> src1; 2191 let Inst{8-0} = 0xfa; //dpp 2192 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 2193 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 2194 let Inst{30-25} = op; 2195 let Inst{31} = 0x0; //encoding 2196} 2197 2198let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2199 multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> { 2200 def _gfx6_gfx7 : 2201 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 2202 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2203 } 2204 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 2205 def _gfx6_gfx7 : 2206 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 2207 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2208 } 2209 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> { 2210 def _e32_gfx6_gfx7 : 2211 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>, 2212 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>; 2213 } 2214 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 2215 def _e64_gfx6_gfx7 : 2216 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 2217 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 2218 } 2219 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 2220 def _e64_gfx6_gfx7 : 2221 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 2222 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 2223 } 2224} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2225 2226multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 2227 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 2228 2229multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 2230 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 2231 2232multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 2233 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 2234 2235multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<6> op> : 2236 VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>; 2237 2238multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<6> op> : 2239 VOP2_Real_gfx6_gfx7_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>; 2240 2241multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 2242 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 2243 2244multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op, 2245 string opName, string asmName> { 2246 defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32"); 2247 defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64"); 2248 2249 let AsmString = asmName # ps32.AsmOperands in { 2250 defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>; 2251 } 2252 2253 let AsmString = asmName # ps64.AsmOperands in { 2254 defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>; 2255 } 2256} 2257 2258defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 2259defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 2260defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 2261defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 2262defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 2263defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 2264defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 2265defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 2266defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 2267defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 2268defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 2269defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 2270defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 2271defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 2272defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 2273defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 2274 2275// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in 2276// VI, but the VI instructions behave the same as the SI versions. 2277defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">; 2278defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">; 2279defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">; 2280defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 2281defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 2282defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 2283 2284defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>; 2285 2286let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { 2287 defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>; 2288} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) 2289 2290let SubtargetPredicate = isGFX6GFX7 in { 2291 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 2292 defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>; 2293 defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>; 2294 defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>; 2295 2296 def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>; 2297 def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>; 2298 def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>; 2299} // End SubtargetPredicate = isGFX6GFX7 2300 2301defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>; 2302defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>; 2303defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>; 2304defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 2305defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 2306defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>; 2307defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>; 2308defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>; 2309defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>; 2310defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>; 2311defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>; 2312defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>; 2313defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x011>; 2314defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x012>; 2315defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x013>; 2316defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>; 2317defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 2318defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 2319defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 2320defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01b>; 2321defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01c>; 2322defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01d>; 2323defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 2324defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 2325defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 2326defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 2327 2328//===----------------------------------------------------------------------===// 2329// GFX8, GFX9 (VI). 2330//===----------------------------------------------------------------------===// 2331 2332let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 2333 2334multiclass VOP2_Real_MADK_vi <bits<6> op> { 2335 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 2336 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2337} 2338 2339multiclass VOP2_Real_MADK_gfx940 <bits<6> op> { 2340 def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>, 2341 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> { 2342 let DecoderNamespace = "GFX9"; 2343 } 2344} 2345 2346multiclass VOP2_Real_e32_vi <bits<6> op> { 2347 def _e32_vi : 2348 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 2349 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2350} 2351 2352multiclass VOP2_Real_e64_vi <bits<10> op> { 2353 def _e64_vi : 2354 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 2355 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2356} 2357 2358multiclass VOP2_Real_e64only_vi <bits<10> op> { 2359 def _e64_vi : 2360 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 2361 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2362 let IsSingle = 1; 2363 } 2364} 2365 2366multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 2367 VOP2_Real_e32_vi<op>, 2368 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 2369 2370} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 2371 2372multiclass VOP2_SDWA8_Real <bits<6> op> { 2373 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then 2374 def _sdwa_vi : 2375 VOP_SDWA8_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2376 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2377} 2378 2379multiclass VOP2_SDWA9_Real <bits<6> op> { 2380 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 2381 def _sdwa_gfx9 : 2382 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2383 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2384} 2385 2386let AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" in { 2387 2388multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 2389 def _e32_vi : 2390 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 2391 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2392 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2393 let AsmString = AsmName # ps.AsmOperands; 2394 } 2395 def _e64_vi : 2396 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 2397 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2398 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2399 let AsmString = AsmName # ps.AsmOperands; 2400 } 2401 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA then 2402 def _sdwa_vi : 2403 VOP_SDWA8_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2404 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2405 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2406 let AsmString = AsmName # ps.AsmOperands; 2407 } 2408 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then 2409 def _dpp_vi : 2410 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 2411 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2412 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2413 let AsmString = AsmName # ps.AsmOperands; 2414 } 2415} 2416 2417} // End AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" 2418 2419let DecoderNamespace = "GFX9" in { 2420 2421multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 2422 def _e32_gfx9 : 2423 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 2424 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2425 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2426 let AsmString = AsmName # ps.AsmOperands; 2427 } 2428 def _e64_gfx9 : 2429 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 2430 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2431 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2432 let AsmString = AsmName # ps.AsmOperands; 2433 } 2434 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9 then 2435 def _sdwa_gfx9 : 2436 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2437 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2438 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2439 let AsmString = AsmName # ps.AsmOperands; 2440 } 2441 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then 2442 def _dpp_gfx9 : 2443 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 2444 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2445 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2446 let AsmString = AsmName # ps.AsmOperands; 2447 } 2448} 2449 2450multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 2451 def _e32_gfx9 : 2452 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 2453 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2454 def _e64_gfx9 : 2455 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 2456 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2457 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 2458 def _sdwa_gfx9 : 2459 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2460 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 2461 } 2462 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2463 def _dpp_gfx9 : 2464 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 2465 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2466} 2467 2468} // End DecoderNamespace = "GFX9" 2469 2470multiclass VOP2_Real_e32e64_vi <bits<6> op> : 2471 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA8_Real<op>, VOP2_SDWA9_Real<op> { 2472 2473 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2474 def _dpp_vi : 2475 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 2476 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2477} 2478 2479defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 2480defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 2481defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 2482defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 2483let OtherPredicates = [isGCN3ExcludingGFX90A] in 2484defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 2485defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 2486defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 2487defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 2488defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 2489defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 2490defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 2491defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 2492defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 2493defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 2494defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 2495defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 2496defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 2497defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 2498defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 2499defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 2500defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 2501defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 2502defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 2503defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 2504defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 2505 2506defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">; 2507defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">; 2508defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">; 2509defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 2510defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 2511defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 2512 2513let AssemblerPredicate = isGFX9Only in { 2514defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">; 2515defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">; 2516defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">; 2517defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 2518defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 2519defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 2520 2521defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 2522defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 2523defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 2524} // End AssemblerPredicate = isGFX9Only 2525 2526defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 2527defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 2528defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 2529defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 2530defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 2531defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 2532defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 2533defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 2534defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 2535defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 2536defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 2537 2538defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 2539defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 2540defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 2541defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 2542defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 2543defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 2544defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 2545defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 2546defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 2547defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 2548defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 2549defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 2550defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 2551defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 2552defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 2553defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 2554defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 2555defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 2556defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 2557defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 2558defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 2559 2560let SubtargetPredicate = isGFX8GFX9 in { 2561 2562// Aliases to simplify matching of floating-point instructions that 2563// are VOP2 on SI and VOP3 on VI. 2564class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 2565 name#" $dst, $src0, $src1", 2566 !if(inst.Pfl.HasOMod, 2567 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 2568 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 2569>, PredicateControl { 2570 let UseInstAsmMatchConverter = 0; 2571 let AsmVariantName = AMDGPUAsmVariants.VOP3; 2572} 2573 2574def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 2575def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 2576def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 2577def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 2578def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 2579 2580defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 2581 2582} // End SubtargetPredicate = isGFX8GFX9 2583 2584let SubtargetPredicate = isGFX9Only in { 2585 2586defm : VOP2bInstAliases<V_ADD_U32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 2587defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 2588defm : VOP2bInstAliases<V_SUB_U32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 2589defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 2590defm : VOP2bInstAliases<V_SUBREV_U32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 2591defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 2592 2593} // End SubtargetPredicate = isGFX9Only 2594 2595let SubtargetPredicate = HasDLInsts in { 2596 2597defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 2598defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 2599 2600} // End SubtargetPredicate = HasDLInsts 2601 2602let DecoderNamespace = "GFX90A" in { 2603 multiclass VOP2_Real_e32_gfx90a <bits<6> op> { 2604 def _e32_gfx90a : 2605 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>, 2606 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2607 } 2608 2609 multiclass VOP2_Real_e64_gfx90a <bits<10> op> { 2610 def _e64_gfx90a : 2611 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 2612 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2613 } 2614 2615 multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> : 2616 VOP2_Real_e32_gfx90a<op>, 2617 VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>; 2618 2619 multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> : 2620 Base_VOP2_Real_e32e64_gfx90a<op> { 2621 2622 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2623 def _dpp_gfx90a : 2624 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>, 2625 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 2626 let DecoderNamespace = "GFX9"; 2627 } 2628 } 2629} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" 2630 2631let SubtargetPredicate = HasFmacF64Inst in { 2632 defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; 2633} // End SubtargetPredicate = HasFmacF64Inst 2634 2635let IsSingle = 1 in { 2636 defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; 2637} 2638 2639let SubtargetPredicate = HasFmaakFmamkF32Insts in { 2640defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>; 2641defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>; 2642} 2643 2644multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> { 2645 let SubtargetPredicate = isGFX9Only in 2646 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2647} 2648 2649multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 2650 VOP2_Real_e32_gfx10<op>, 2651 VOP2_Real_dpp_gfx10<op>, 2652 VOP2_Real_dpp8_gfx10<op>; 2653 2654multiclass VOP2Only_Real_DOT_ACC_gfx10<bits<6> op> : VOP2_Real_dpp_gfx10<op>, 2655 VOP2_Real_dpp8_gfx10<op> { 2656 let IsSingle = 1 in 2657 defm NAME : VOP2_Real_e32_gfx10<op>; 2658} 2659 2660let OtherPredicates = [HasDot5Insts] in { 2661 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 2662 // NB: Opcode conflicts with V_DOT8C_I32_I4 2663 // This opcode exists in gfx 10.1* only 2664 defm V_DOT2C_F32_F16 : VOP2Only_Real_DOT_ACC_gfx10<0x02>; 2665} 2666 2667let OtherPredicates = [HasDot6Insts] in { 2668 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 2669 defm V_DOT4C_I32_I8 : VOP2Only_Real_DOT_ACC_gfx10<0x0d>; 2670} 2671 2672let OtherPredicates = [HasDot4Insts] in { 2673 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 2674} 2675let OtherPredicates = [HasDot3Insts] in { 2676 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 2677} 2678 2679let SubtargetPredicate = HasPkFmacF16Inst in { 2680defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 2681} // End SubtargetPredicate = HasPkFmacF16Inst 2682 2683let SubtargetPredicate = HasDot3Insts in { 2684 // NB: Opcode conflicts with V_DOT2C_F32_F16 2685 let DecoderNamespace = "GFX10_B" in 2686 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>; 2687} 2688 2689let OtherPredicates = [HasDot13Insts] in { 2690 let DecoderNamespace = "GFX950" in 2691 defm V_DOT2C_F32_BF16 : VOP2_Real_DOT_ACC_gfx9<0x16>; 2692} 2693