1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let ReadsModeReg = !or(P.DstVT.isFP, P.Src0VT.isFP); 73 74 let mayRaiseFPException = ReadsModeReg; 75 76 let VOP2 = 1; 77 let VALU = 1; 78 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 79 80 let AsmVariantName = AMDGPUAsmVariants.Default; 81} 82 83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic> : 84 VOP_Real <ps>, 85 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 86 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 87 88 let VALU = 1; 89 let VOP2 = 1; 90 let isPseudo = 0; 91 let isCodeGenOnly = 0; 92 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 96 // copy relevant pseudo op flags 97 let SubtargetPredicate = ps.SubtargetPredicate; 98 let True16Predicate = ps.True16Predicate; 99 let OtherPredicates = ps.OtherPredicates; 100 let AsmMatchConverter = ps.AsmMatchConverter; 101 let AsmVariantName = ps.AsmVariantName; 102 let Constraints = ps.Constraints; 103 let DisableEncoding = ps.DisableEncoding; 104 let TSFlags = ps.TSFlags; 105 let UseNamedOperandTable = ps.UseNamedOperandTable; 106 let Uses = ps.Uses; 107 let Defs = ps.Defs; 108 let SchedRW = ps.SchedRW; 109 let mayLoad = ps.mayLoad; 110 let mayStore = ps.mayStore; 111 let isConvergent = ps.isConvergent; 112} 113 114class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> : 115 VOP2_Real <ps, Gen.Subtarget, real_name> { 116 let AssemblerPredicate = Gen.AssemblerPredicate; 117 let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); 118 let DecoderNamespace = Gen.DecoderNamespace# 119 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 120} 121 122class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 123 VOP_SDWA_Pseudo <OpName, P, pattern> { 124 let AsmMatchConverter = "cvtSdwaVOP2"; 125} 126 127class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 128 VOP_DPP_Pseudo <OpName, P, pattern> { 129} 130 131 132class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 133 list<dag> ret = !if(P.HasModifiers, 134 [(set P.DstVT:$vdst, 135 (node (P.Src0VT 136 !if(P.HasOMod, 137 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 138 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 139 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 140 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 141} 142 143multiclass VOP2Inst_e32<string opName, 144 VOPProfile P, 145 SDPatternOperator node = null_frag, 146 string revOp = opName> { 147 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 148 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 149} 150multiclass 151 VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, 152 string VOPDName, SDPatternOperator node = null_frag, 153 string revOp = opName> { 154 defm NAME : VOP2Inst_e32<opName, P, node, revOp>, 155 VOPD_Component<VOPDOp, VOPDName>; 156} 157multiclass VOP2Inst_e64<string opName, 158 VOPProfile P, 159 SDPatternOperator node = null_frag, 160 string revOp = opName> { 161 def _e64 : VOP3InstBase <opName, P, node, 1>, 162 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 163 164 let SubtargetPredicate = isGFX11Plus in { 165 if P.HasExtVOP3DPP then 166 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 167 } // End SubtargetPredicate = isGFX11Plus 168} 169 170multiclass VOP2Inst_sdwa<string opName, 171 VOPProfile P, 172 string revOp = opName> { 173 if P.HasExtSDWA then 174 def _sdwa : VOP2_SDWA_Pseudo <opName, P>, 175 Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>; 176} 177 178multiclass VOP2Inst<string opName, 179 VOPProfile P, 180 SDPatternOperator node = null_frag, 181 string revOp = opName> : 182 VOP2Inst_e32<opName, P, node, revOp>, 183 VOP2Inst_e64<opName, P, node, revOp>, 184 VOP2Inst_sdwa<opName, P, revOp> { 185 if P.HasExtDPP then 186 def _dpp : VOP2_DPP_Pseudo <opName, P>; 187} 188 189multiclass VOP2Inst_t16<string opName, 190 VOPProfile P, 191 SDPatternOperator node = null_frag, 192 string revOp = opName> { 193 let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { 194 defm NAME : VOP2Inst<opName, P, node, revOp>; 195 } 196 let SubtargetPredicate = UseRealTrue16Insts in { 197 defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16">; 198 } 199 let SubtargetPredicate = UseFakeTrue16Insts in { 200 defm _fake16 : VOP2Inst<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16">; 201 } 202} 203 204// Creating a _t16_e32 pseudo when there is no corresponding real instruction on 205// any subtarget is a problem. It makes getMCOpcodeGen return -1, which we 206// assume means the instruction is already a real. The fix is to not create that 207// _t16_e32 pseudo 208multiclass VOP2Inst_e64_t16<string opName, 209 VOPProfile P, 210 SDPatternOperator node = null_frag, 211 string revOp = opName> { 212 let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { 213 defm NAME : VOP2Inst<opName, P, node, revOp>; 214 } 215 let SubtargetPredicate = UseRealTrue16Insts in { 216 defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16">; 217 } 218 let SubtargetPredicate = UseFakeTrue16Insts in { 219 defm _fake16 : VOP2Inst_e64<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16">; 220 } 221} 222 223multiclass VOP2Inst_VOPD<string opName, 224 VOPProfile P, 225 bits<5> VOPDOp, 226 string VOPDName, 227 SDPatternOperator node = null_frag, 228 string revOp = opName> : 229 VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp>, 230 VOP2Inst_e64<opName, P, node, revOp>, 231 VOP2Inst_sdwa<opName, P, revOp> { 232 if P.HasExtDPP then 233 def _dpp : VOP2_DPP_Pseudo <opName, P>; 234} 235 236multiclass VOP2bInst <string opName, 237 VOPProfile P, 238 SDPatternOperator node = null_frag, 239 string revOp = opName, 240 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 241 let SchedRW = [Write32Bit, WriteSALU] in { 242 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 243 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 244 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 245 let usesCustomInserter = true; 246 } 247 248 if P.HasExtSDWA then 249 def _sdwa : VOP2_SDWA_Pseudo <opName, P>, 250 Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)> { 251 let AsmMatchConverter = "cvtSdwaVOP2b"; 252 } 253 if P.HasExtDPP then 254 def _dpp : VOP2_DPP_Pseudo <opName, P>; 255 } // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] 256 257 def _e64 : VOP3InstBase <opName, P, node, 1>, 258 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 259 260 let SubtargetPredicate = isGFX11Plus in { 261 if P.HasExtVOP3DPP then 262 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 263 } // End SubtargetPredicate = isGFX11Plus 264 } 265} 266 267class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 268 string OpName, string opnd> : 269 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 270 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 271 ps.Pfl.Src1RC32:$src1), 272 1, inst.AsmVariantName>, 273 PredicateControl { 274} 275 276multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 277 let WaveSizePredicate = isWave32 in { 278 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 279 } 280 let WaveSizePredicate = isWave64 in { 281 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 282 } 283} 284 285multiclass 286 VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 287 SDPatternOperator node, string revOp, bit useSGPRInput> { 288 289 let SchedRW = [Write32Bit] in { 290 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 291 if !empty(VOPDName) then 292 def _e32 : VOP2_Pseudo <opName, P>, 293 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 294 else 295 def _e32 : VOP2_Pseudo <opName, P>, 296 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>, 297 VOPD_Component<VOPDOp, VOPDName>; 298 299 if P.HasExtSDWA then 300 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 301 let AsmMatchConverter = "cvtSdwaVOP2e"; 302 } 303 304 if P.HasExtDPP then 305 def _dpp : VOP2_DPP_Pseudo <opName, P>; 306 } 307 308 def _e64 : VOP3InstBase <opName, P, node, 1>, 309 Commutable_REV<revOp#"_e64", !eq(revOp, opName)> { 310 let isReMaterializable = 1; 311 } 312 313 let SubtargetPredicate = isGFX11Plus in { 314 if P.HasExtVOP3DPP then 315 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 316 } // End SubtargetPredicate = isGFX11Plus 317 } 318} 319 320multiclass 321 VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag, 322 string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 323 : VOP2eInst_Base<opName, P, 0, "", node, revOp, useSGPRInput>; 324 325multiclass 326 VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 327 SDPatternOperator node = null_frag, string revOp = opName, 328 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 329 : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>; 330 331class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> : 332 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 333 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 334 ps.Pfl.Src1RC32:$src1), 335 1, inst.AsmVariantName>, 336 PredicateControl; 337 338class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> : 339 InstAlias <ps.OpName#" "#ps.Pfl.Asm64, 340 (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst, 341 ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, Clamp:$clamp), 342 1, inst.AsmVariantName>, 343 PredicateControl; 344 345multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 346 let WaveSizePredicate = isWave32 in { 347 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 348 } 349 let WaveSizePredicate = isWave64 in { 350 def : VOP2eInstAlias<ps, inst, "vcc">; 351 } 352} 353 354class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 355 string AsmVOPDXDeferred = ?; 356} 357 358class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> { 359 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); 360 field dag Ins32 = !if(!eq(vt.Size, 32), 361 (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm), 362 (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm)); 363 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm); 364 // Note that both src0X and imm are deferred 365 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred); 366 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm); 367 368 field string Asm32 = "$vdst, $src0, $src1, $imm"; 369 field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm"; 370 let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred"; 371 field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm"; 372 field bit HasExt = 0; 373 let IsSingle = 1; 374} 375 376def VOP_MADAK_F16 : VOP_MADAK <f16>; 377def VOP_MADAK_F16_t16 : VOP_MADAK <f16> { 378 let IsTrue16 = 1; 379 let IsRealTrue16 = 1; 380 let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret; 381 let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPRSrc_16_Lo128:$src1, ImmOpType:$imm); 382} 383def VOP_MADAK_F16_fake16 : VOP_MADAK <f16> { 384 let IsTrue16 = 1; 385 let DstRC = getVALUDstForVT_fake16<DstVT>.ret; 386 let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPRSrc_32_Lo128:$src1, ImmOpType:$imm); 387} 388def VOP_MADAK_F32 : VOP_MADAK <f32>; 389 390class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> { 391 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); 392 field dag Ins32 = !if(!eq(vt.Size, 32), 393 (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1), 394 (ins VSrc_f16_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1)); 395 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); 396 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X); 397 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); 398 399 field string Asm32 = "$vdst, $src0, $imm, $src1"; 400 field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X"; 401 let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X"; 402 field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y"; 403 field bit HasExt = 0; 404 let IsSingle = 1; 405} 406 407def VOP_MADMK_F16 : VOP_MADMK <f16>; 408def VOP_MADMK_F16_t16 : VOP_MADMK <f16> { 409 let IsTrue16 = 1; 410 let IsRealTrue16 = 1; 411 let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret; 412 let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_16_Lo128:$src1); 413} 414def VOP_MADMK_F16_fake16 : VOP_MADMK <f16> { 415 let IsTrue16 = 1; 416 let DstRC = getVALUDstForVT_fake16<DstVT>.ret; 417 let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_32_Lo128:$src1); 418} 419def VOP_MADMK_F32 : VOP_MADMK <f32>; 420 421// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 422// and processing time but it makes it easier to convert to mad. 423class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 424 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2); 425 // Src2 must accept the same operand types as vdst, namely VGPRs only 426 let Src2RC64 = getVOP3VRegSrcForVT<Src2VT, IsTrue16, !not(IsRealTrue16)>.ret; 427 let Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, 3, 428 0, HasModifiers, HasModifiers, HasOMod, 429 Src0Mod, Src1Mod, Src2Mod>.ret; 430 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 431 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 432 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 433 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 434 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 435 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 436 let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3, 437 0, HasModifiers, HasModifiers, HasOMod, 438 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret; 439 // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu 440 let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); 441 let InsVOPDXDeferred = 442 (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, 443 VGPR_32:$vsrc1X, VGPRSrc_32:$src2X); 444 let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y); 445 let InsVOPDYDeferred = 446 (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, 447 VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y); 448 449 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 450 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 451 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 452 dpp8:$dpp8, Dpp8FI:$fi); 453 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 454 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 455 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 456 Clamp:$clamp, omod:$omod, 457 dst_sel:$dst_sel, dst_unused:$dst_unused, 458 src0_sel:$src0_sel, src1_sel:$src1_sel); 459 let Asm32 = getAsm32<1, 2, vt0>.ret; 460 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 461 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 462 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 463 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 464 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 465 let AsmVOP3Base = 466 getAsmVOP3Base<2 /*NumSrcArgs*/, HasDst, HasClamp, 467 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 468 HasModifiers, HasModifiers, 469 0 /*Src2HasMods*/, DstVT>.ret; 470 let HasSrc2 = 0; 471 let HasSrc2Mods = 0; 472 473 let HasExt = 1; 474 let HasExtDPP = 1; 475 let HasExt32BitDPP = 1; 476 let HasExtSDWA = 1; 477 let HasExtSDWA9 = 0; 478 let TieRegDPP = "$src2"; 479} 480 481def VOP_MAC_F16 : VOP_MAC <f16>; 482def VOP_MAC_F16_t16 : VOP_MAC <f16> { 483 let IsTrue16 = 1; 484 let IsRealTrue16 = 1; 485 let HasOpSel = 1; 486 let DstRC = VOPDstOperand_t16Lo128; 487 let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 488 let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 489 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2); 490 let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 491 let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 492 let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 493 let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret; 494 let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret; 495 let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret; 496 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 497 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 498 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument 499 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 500 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 501 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 502 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 503 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument 504 dpp8:$dpp8, Dpp8FI:$fi); 505 let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue*/, 1/*IsVOP3Encoding*/>.ret; 506 let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret; 507 let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret; 508 let Src0VOP3DPP = VGPRSrc_16; 509 let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret; 510 let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret; 511 let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret; 512 let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret; 513 let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret; 514 let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 515 let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 516 let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 517} 518def VOP_MAC_F16_fake16 : VOP_MAC <f16> { 519 let IsTrue16 = 1; 520 let DstRC = getVALUDstForVT_fake16<DstVT>.ret; 521 let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 522 let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 523 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2); 524 let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 525 let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 526 let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 527 let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret; 528 let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret; 529 let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret; 530 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 531 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 532 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument 533 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 534 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 535 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 536 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 537 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument 538 dpp8:$dpp8, Dpp8FI:$fi); 539 let DstRC64 = getVALUDstForVT<DstVT>.ret; 540 let Src0VOP3DPP = VGPRSrc_32; 541 let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret; 542 let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret; 543 let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret; 544 let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret; 545 let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret; 546 let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 547 let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 548 let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 549} 550 551def VOP_MAC_F32 : VOP_MAC <f32>; 552let HasExtDPP = 0, HasExt32BitDPP = 0 in 553def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>; 554let HasExtSDWA = 0, HasExt32BitDPP = 0, HasExt64BitDPP = 1 in 555def VOP_MAC_F64 : VOP_MAC <f64>; 556 557class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 558 let HasClamp = 0; 559 let HasExtSDWA = 0; 560 let HasOpSel = 0; 561 let IsPacked = 0; 562} 563 564def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 565 let Src0ModDPP = FPVRegInputMods; 566 let Src1ModDPP = FPVRegInputMods; 567 let HasClamp = 1; 568} 569 570def VOP_DOT_ACC_F32_V2BF16 : VOP_DOT_ACC<f32, v2bf16> { 571 let Src0ModDPP = FPVRegInputMods; 572 let Src1ModDPP = FPVRegInputMods; 573 let HasClamp = 1; 574} 575 576def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> { 577 let HasExtVOP3DPP = 0; 578 let HasSrc0Mods = 1; 579 let HasSrc1Mods = 1; 580 let HasClamp = 1; 581 582 let Src0Mod = Int32InputMods; 583 let Src1Mod = Int32InputMods; 584 let Ins64 = getIns64<Src0RC64, Src1RC64, getVregSrcForVT<Src2VT>.ret, 585 3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/, 586 1 /*HasSrc2Mods*/, HasOMod, 587 Src0Mod, Src1Mod, Src2Mod>.ret; 588 let Asm64 = "$vdst, $src0, $src1$clamp"; 589} 590 591// Write out to vcc or arbitrary SGPR. 592def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/1> { 593 let Asm32 = "$vdst, vcc, $src0, $src1"; 594 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1$clamp"; 595 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 596 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 597 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 598 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 599 let AsmDPP16 = AsmDPP#"$fi"; 600 let InsDPP = (ins DstRCDPP:$old, 601 Src0DPP:$src0, 602 Src1DPP:$src1, 603 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 604 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 605 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 606 let InsDPP8 = (ins DstRCDPP:$old, 607 Src0DPP:$src0, 608 Src1DPP:$src1, 609 dpp8:$dpp8, Dpp8FI:$fi); 610 let Outs32 = (outs DstRC:$vdst); 611 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 612 let OutsVOP3DPP = Outs64; 613 let OutsVOP3DPP8 = Outs64; 614} 615 616// Write out to vcc or arbitrary SGPR and read in from vcc or 617// arbitrary SGPR. 618def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1> { 619 let HasSrc2Mods = 0; 620 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 621 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 622 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 623 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 624 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 625 let AsmDPP16 = AsmDPP#"$fi"; 626 let Outs32 = (outs DstRC:$vdst); 627 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 628 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 629 let OutsVOP3DPP = Outs64; 630 let OutsVOP3DPP8 = Outs64; 631 632 // Suppress src2 implied by type since the 32-bit encoding uses an 633 // implicit VCC use. 634 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 635 636 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 637 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 638 Clamp:$clamp, 639 dst_sel:$dst_sel, dst_unused:$dst_unused, 640 src0_sel:$src0_sel, src1_sel:$src1_sel); 641 642 let InsDPP = (ins DstRCDPP:$old, 643 Src0DPP:$src0, 644 Src1DPP:$src1, 645 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 646 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 647 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 648 let InsDPP8 = (ins DstRCDPP:$old, 649 Src0DPP:$src0, 650 Src1DPP:$src1, 651 dpp8:$dpp8, Dpp8FI:$fi); 652 653 let HasExt = 1; 654 let HasExtDPP = 1; 655 let HasExt32BitDPP = 1; 656 let HasExtSDWA = 1; 657 let HasExtSDWA9 = 1; 658} 659 660// Read in from vcc or arbitrary SGPR. 661class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> { 662 let Asm32 = "$vdst, $src0, $src1"; 663 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 664 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 665 let AsmDPP = "$vdst, $src0_modifiers, $src1_modifiers, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 666 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 667 let AsmDPP16 = AsmDPP#"$fi"; 668 let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 669 670 let Outs32 = (outs DstRC:$vdst); 671 let Outs64 = (outs DstRC64:$vdst); 672 673 // Suppress src2 implied by type since the 32-bit encoding uses an 674 // implicit VCC use. 675 let Ins32 = (ins VSrc_f32:$src0, Src1RC32:$src1); 676 677 let HasModifiers = 1; 678 679 // Select FP modifiers for VOP3 680 let Src0Mod = !if(!eq(Src0VT.Size, 16), FP16InputMods, FP32InputMods); 681 let Src1Mod = Src0Mod; 682 683 let HasSrc0IntMods = 0; 684 let HasSrc1IntMods = 0; 685 let HasSrc0FloatMods = 1; 686 let HasSrc1FloatMods = 1; 687 let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, SDWASrc_f32:$src0, 688 FP32SDWAInputMods:$src1_modifiers, SDWASrc_f32:$src1, 689 Clamp:$clamp, 690 dst_sel:$dst_sel, dst_unused:$dst_unused, 691 src0_sel:$src0_sel, src1_sel:$src1_sel); 692 693 let InsDPP = (ins DstRCDPP:$old, 694 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 695 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 696 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 697 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 698 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 699 let InsDPP8 = (ins DstRCDPP:$old, 700 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 701 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 702 dpp8:$dpp8, Dpp8FI:$fi); 703 704 let Src0ModVOP3DPP = FPVRegInputMods; 705 let Src1ModVOP3DPP = FP32VCSrcInputMods; 706 707 let HasExt = 1; 708 let HasExtDPP = 1; 709 let HasExt32BitDPP = 1; 710 let HasExtSDWA = 1; 711 let HasExtSDWA9 = 1; 712} 713 714def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; 715def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; 716// V_CNDMASK_B16 is VOP3 only 717def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> { 718 let IsTrue16 = 1; 719 let IsRealTrue16 = 1; 720 let HasOpSel = 1; 721 let DstRC64 = getVALUDstForVT<DstVT, 1, 1>.ret; 722 let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret; 723 let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret; 724 let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret; 725 let Src0Mod = getSrc0Mod<f16, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 726 let Src1Mod = getSrcMod<f16, 1/*IsTrue16*/, 0/*IsFake16*/>.ret; 727 let HasSrc2Mods = 0; 728 let InsVOP3OpSel = getInsVOP3Base<Src0RC64, Src1RC64, 729 Src2RC64, NumSrcArgs, 730 HasClamp, 1/*HasModifiers*/, 0/*HasSrc2Mods*/, HasOMod, 731 Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/>.ret; 732 let Src0VOP3DPP = VGPRSrc_16; 733 let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret; 734 let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 0/*IsFake16*/>.ret; 735 let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 0/*IsFake16*/>.ret; 736} 737def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> { 738 let IsTrue16 = 1; 739 let DstRC64 = getVALUDstForVT<DstVT>.ret; 740 741 let Src0Mod = getSrc0Mod<f16, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 742 let Src1Mod = getSrcMod<f16, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 743 744 let Src0VOP3DPP = VGPRSrc_32; 745 let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret; 746 let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 1/*IsFake16*/>.ret; 747 let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 1/*IsFake16*/>.ret; 748} 749 750def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { 751 let Outs32 = (outs SReg_32:$vdst); 752 let Outs64 = Outs32; 753 let Ins32 = (ins VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1); 754 let Ins64 = Ins32; 755 let Asm32 = " $vdst, $src0, $src1"; 756 let Asm64 = Asm32; 757 758 let HasExt = 0; 759 let HasExtDPP = 0; 760 let HasExt32BitDPP = 0; 761 let HasExt64BitDPP = 0; 762 let HasExtSDWA = 0; 763 let HasExtSDWA9 = 0; 764} 765 766def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 767 let Outs32 = (outs VGPR_32:$vdst); 768 let Outs64 = Outs32; 769 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 770 let Ins64 = Ins32; 771 let Asm32 = " $vdst, $src0, $src1"; 772 let Asm64 = Asm32; 773 let HasSrc2 = 0; 774 let HasSrc2Mods = 0; 775 776 let HasExt = 0; 777 let HasExtDPP = 0; 778 let HasExt32BitDPP = 0; 779 let HasExt64BitDPP = 0; 780 let HasExtSDWA = 0; 781 let HasExtSDWA9 = 0; 782} 783 784//===----------------------------------------------------------------------===// 785// VOP2 Instructions 786//===----------------------------------------------------------------------===// 787 788let SubtargetPredicate = isGFX11Plus, True16Predicate = UseRealTrue16Insts in 789defm V_CNDMASK_B16_t16 : VOP2eInst <"v_cndmask_b16_t16", VOP2e_I16_I16_I16_I1_true16>; 790let SubtargetPredicate = isGFX11Plus, True16Predicate = UseFakeTrue16Insts in 791defm V_CNDMASK_B16_fake16 : VOP2eInst <"v_cndmask_b16_fake16", VOP2e_I16_I16_I16_I1_fake16>; 792defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; 793let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in 794def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 795 796let isCommutable = 1 in { 797let isReMaterializable = 1 in { 798defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>; 799defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>; 800defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">; 801defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>; 802defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>; 803defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; 804defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; 805defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; 806defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; 807defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>; 808defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>; 809defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 810defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 811defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 812defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 813defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">; 814defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">; 815defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">; 816defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>; 817defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 818defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 819} // End isReMaterializable = 1 820 821let mayRaiseFPException = 0 in { 822let OtherPredicates = [HasMadMacF32Insts] in { 823let Constraints = "$vdst = $src2", DisableEncoding="$src2", 824 isConvertibleToThreeAddress = 1 in { 825defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 826 827let SubtargetPredicate = isGFX6GFX7GFX10 in 828defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>; 829} // End Constraints = "$vdst = $src2", DisableEncoding="$src2", 830 // isConvertibleToThreeAddress = 1 831 832let isReMaterializable = 1 in 833def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 834} // End OtherPredicates = [HasMadMacF32Insts] 835} // End mayRaiseFPException = 0 836 837// No patterns so that the scalar instructions are always selected. 838// The scalar versions will be replaced with vector when needed later. 839defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32">; 840defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32">; 841defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">; 842defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">; 843 844 845let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { 846 defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">; 847 defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">; 848} 849 850let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1, isAdd = 1 in { 851 defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32">; 852} 853 854let isAdd = 1 in { 855 defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32">; 856 defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32">; 857} 858 859} // End isCommutable = 1 860 861// These are special and do not read the exec mask. 862let isConvergent = 1, Uses = []<Register> in { 863def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, []>; 864let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 865def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, []>; 866} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in 867} // End isConvergent = 1 868 869foreach vt = Reg32Types.types in { 870 def : GCNPat<(vt (int_amdgcn_readlane vt:$src0, i32:$src1)), 871 (V_READLANE_B32 VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1) 872 >; 873 874 def : GCNPat<(vt (int_amdgcn_writelane vt:$src0, i32:$src1, vt:$src2)), 875 (V_WRITELANE_B32 SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$src2) 876 >; 877} 878 879let isReMaterializable = 1 in { 880defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; 881defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>; 882let IsNeverUniform = 1 in { 883defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; 884defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; 885} // End IsNeverUniform = 1 886defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>; 887 888let ReadsModeReg = 0, mayRaiseFPException = 0 in { 889defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; 890defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; 891} 892 893defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_V2F16_F32_F32, AMDGPUpkrtz_f16_f32>; 894defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_V2I16_I32_I32, AMDGPUpk_u16_u32>; 895defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_V2I16_I32_I32, AMDGPUpk_i16_i32>; 896 897 898let SubtargetPredicate = isGFX6GFX7 in { 899defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 900defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 901} // End SubtargetPredicate = isGFX6GFX7 902 903let isCommutable = 1 in { 904let SubtargetPredicate = isGFX6GFX7 in { 905defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>; 906defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>; 907defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>; 908} // End SubtargetPredicate = isGFX6GFX7 909} // End isCommutable = 1 910} // End isReMaterializable = 1 911 912defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 913 914class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 915 GCNPat< 916 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 917 !if(!cast<Commutable_REV>(Inst).IsOrig, 918 (Inst $src0, $src1), 919 (Inst $src1, $src0) 920 ) 921 >; 922 923class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 924 GCNPat< 925 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 926 !if(!cast<Commutable_REV>(Inst).IsOrig, 927 (Inst $src0, $src1, 0), 928 (Inst $src1, $src0, 0) 929 ) 930 >; 931 932def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>; 933def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>; 934def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>; 935 936let SubtargetPredicate = HasAddNoCarryInsts in { 937 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 938 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 939} 940 941let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 942def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>; 943def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>; 944} 945 946def : DivergentBinOp<adde, V_ADDC_U32_e32>; 947def : DivergentBinOp<sube, V_SUBB_U32_e32>; 948 949class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 950 GCNPat< 951 (DivergentBinFrag<Op> i64:$src0, i64:$src1), 952 (REG_SEQUENCE VReg_64, 953 (Inst 954 (i32 (EXTRACT_SUBREG $src0, sub0)), 955 (i32 (EXTRACT_SUBREG $src1, sub0)) 956 ), sub0, 957 (Inst 958 (i32 (EXTRACT_SUBREG $src0, sub1)), 959 (i32 (EXTRACT_SUBREG $src1, sub1)) 960 ), sub1 961 ) 962 >; 963 964def : divergent_i64_BinOp <and, V_AND_B32_e64>; 965def : divergent_i64_BinOp <or, V_OR_B32_e64>; 966def : divergent_i64_BinOp <xor, V_XOR_B32_e64>; 967 968// mul24 w/ 64 bit output. 969class mul24_64_Pat<SDPatternOperator Op, Instruction InstLo, Instruction InstHi> : GCNPat< 970 (i64 (Op i32:$src0, i32:$src1)), 971 (REG_SEQUENCE VReg_64, 972 (InstLo $src0, $src1), sub0, 973 (InstHi $src0, $src1), sub1) 974>; 975 976def : mul24_64_Pat<AMDGPUmul_i24, V_MUL_I32_I24_e64, V_MUL_HI_I32_I24_e64>; 977def : mul24_64_Pat<AMDGPUmul_u24, V_MUL_U32_U24_e64, V_MUL_HI_U32_U24_e64>; 978 979//===----------------------------------------------------------------------===// 980// 16-Bit Operand Instructions 981//===----------------------------------------------------------------------===// 982 983// The ldexp.f16 intrinsic expects a integer src1 operand, though the hardware 984// encoding treats src1 as an f16 985def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> { 986 let Src1Mod = Int32InputMods; 987 let Src1ModDPP = IntVRegInputMods; 988 let Src1ModVOP3DPP = IntVRegInputMods; 989 // SDWA sext is the only modifier allowed. 990 let HasSrc1IntMods = 1; 991 let HasSrc1FloatMods = 0; 992 let Src1ModSDWA = Int16SDWAInputMods; 993} 994def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> { 995 let Src1Mod = IntT16InputMods<0/*IsFake16*/>; 996 let Src1ModDPP = IntT16_Lo128VRegInputMods<0/*IsFake16*/>; 997 let Src1ModVOP3DPP = IntT16VCSrcInputMods<0/*IsFake16*/>; 998} 999def LDEXP_F16_VOPProfile_Fake16 : VOPProfile_Fake16<VOP_F16_F16_F16> { 1000 let Src1Mod = Int32InputMods; 1001 let Src1ModDPP = IntT16_Lo128VRegInputMods<1/*IsFake16*/>; 1002 let Src1ModVOP3DPP = IntT16VCSrcInputMods<1/*IsFake16*/>; 1003} 1004 1005let isReMaterializable = 1 in { 1006let FPDPRounding = 1 in { 1007 let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in 1008 defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>; 1009 let SubtargetPredicate = UseRealTrue16Insts in 1010 defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>; 1011 let SubtargetPredicate = UseFakeTrue16Insts in 1012 defm V_LDEXP_F16_fake16 : VOP2Inst <"v_ldexp_f16_fake16", LDEXP_F16_VOPProfile_Fake16, null_frag, "v_ldexp_f16_fake16">; 1013} // End FPDPRounding = 1 1014defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; 1015defm V_LSHRREV_B16 : VOP2Inst_e64_t16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; 1016defm V_ASHRREV_I16 : VOP2Inst_e64_t16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; 1017let isCommutable = 1 in { 1018let FPDPRounding = 1 in { 1019defm V_ADD_F16 : VOP2Inst_t16 <"v_add_f16", VOP_F16_F16_F16, any_fadd>; 1020defm V_SUB_F16 : VOP2Inst_t16 <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; 1021defm V_SUBREV_F16 : VOP2Inst_t16 <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 1022defm V_MUL_F16 : VOP2Inst_t16 <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; 1023} // End FPDPRounding = 1 1024defm V_MUL_LO_U16 : VOP2Inst_e64_t16 <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; 1025defm V_MAX_F16 : VOP2Inst_t16 <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 1026defm V_MIN_F16 : VOP2Inst_t16 <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 1027defm V_MAX_U16 : VOP2Inst_e64_t16 <"v_max_u16", VOP_I16_I16_I16, umax>; 1028defm V_MAX_I16 : VOP2Inst_e64_t16 <"v_max_i16", VOP_I16_I16_I16, smax>; 1029defm V_MIN_U16 : VOP2Inst_e64_t16 <"v_min_u16", VOP_I16_I16_I16, umin>; 1030defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>; 1031} // End isCommutable = 1 1032} // End isReMaterializable = 1 1033 1034class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat < 1035 (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), 1036 (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))), 1037 (inst $src0_modifiers, $src0, 1038 $src1_modifiers, $src1, 1039 $clamp, /* clamp */ 1040 $omod /* omod */) 1041>; 1042 1043let OtherPredicates = [NotHasTrue16BitInsts] in 1044def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>; 1045 1046class LDEXP_F16_t16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat < 1047 (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), 1048 (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))), 1049 (inst $src0_modifiers, $src0, 1050 $src1_modifiers, $src1, 1051 $clamp, /* clamp */ 1052 $omod, /* omod */ 1053 0) /* op_sel */ 1054>; 1055 1056let OtherPredicates = [UseRealTrue16Insts] in 1057def : LDEXP_F16_t16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>; 1058 1059let OtherPredicates = [UseFakeTrue16Insts] in 1060def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_fake16_e64>; 1061 1062let SubtargetPredicate = isGFX11Plus in { 1063 let isCommutable = 1 in { 1064 defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>; 1065 defm V_AND_B16_fake16 : VOP2Inst_e64 <"v_and_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>; 1066 defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>; 1067 defm V_OR_B16_fake16 : VOP2Inst_e64 <"v_or_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>; 1068 defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>; 1069 defm V_XOR_B16_fake16 : VOP2Inst_e64 <"v_xor_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>; 1070 } // End isCommutable = 1 1071} // End SubtargetPredicate = isGFX11Plus 1072 1073let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in { 1074let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { 1075 def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 1076} 1077let True16Predicate = UseRealTrue16Insts in { 1078 def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">; 1079} 1080let True16Predicate = UseFakeTrue16Insts in { 1081 def V_FMAMK_F16_fake16 : VOP2_Pseudo <"v_fmamk_f16_fake16", VOP_MADMK_F16_fake16, [], "">; 1082} 1083 1084let isCommutable = 1 in { 1085let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { 1086 def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 1087} 1088let True16Predicate = UseRealTrue16Insts in { 1089 def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">; 1090} 1091let True16Predicate = UseFakeTrue16Insts in { 1092 def V_FMAAK_F16_fake16 : VOP2_Pseudo <"v_fmaak_f16_fake16", VOP_MADAK_F16_fake16, [], "">; 1093} 1094} // End isCommutable = 1 1095} // End FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 1096 1097let Constraints = "$vdst = $src2", 1098 DisableEncoding="$src2", 1099 isConvertibleToThreeAddress = 1, 1100 isCommutable = 1 in { 1101let SubtargetPredicate = isGFX10Plus in { 1102let True16Predicate = NotHasTrue16BitInsts in { 1103 defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 1104} 1105let True16Predicate = UseRealTrue16Insts in { 1106 defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>; 1107} 1108let True16Predicate = UseFakeTrue16Insts in { 1109 defm V_FMAC_F16_fake16 : VOP2Inst <"v_fmac_f16_fake16", VOP_MAC_F16_fake16>; 1110} 1111} // End SubtargetPredicate = isGFX10Plus 1112} // End FMAC Constraints 1113 1114let SubtargetPredicate = Has16BitInsts in { 1115let isReMaterializable = 1 in { 1116let FPDPRounding = 1 in { 1117 def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 1118} // End FPDPRounding = 1 1119let isCommutable = 1 in { 1120let mayRaiseFPException = 0 in { 1121 def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 1122} 1123let SubtargetPredicate = isGFX8GFX9 in { 1124 defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; 1125 defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; 1126 defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; 1127} 1128} // End isCommutable = 1 1129} // End isReMaterializable = 1 1130 1131// FIXME: Missing FPDPRounding 1132let Constraints = "$vdst = $src2", DisableEncoding="$src2", 1133 isConvertibleToThreeAddress = 1, isCommutable = 1 in { 1134defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 1135} 1136} // End SubtargetPredicate = Has16BitInsts 1137 1138 1139let SubtargetPredicate = HasDLInsts in { 1140 1141let isReMaterializable = 1 in 1142defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>; 1143 1144def : GCNPat< 1145 (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))), 1146 (i32 (V_XNOR_B32_e64 $src0, $src1)) 1147>; 1148 1149def : GCNPat< 1150 (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)), 1151 (i32 (V_XNOR_B32_e64 $src0, $src1)) 1152>; 1153 1154def : GCNPat< 1155 (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))), 1156 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 1157 (i32 (EXTRACT_SUBREG $src0, sub0)), 1158 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 1159 (i32 (V_XNOR_B32_e64 1160 (i32 (EXTRACT_SUBREG $src0, sub1)), 1161 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 1162>; 1163 1164def : GCNPat< 1165 (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)), 1166 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 1167 (i32 (EXTRACT_SUBREG $src0, sub0)), 1168 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 1169 (i32 (V_XNOR_B32_e64 1170 (i32 (EXTRACT_SUBREG $src0, sub1)), 1171 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 1172>; 1173 1174let Constraints = "$vdst = $src2", 1175 DisableEncoding = "$src2", 1176 isConvertibleToThreeAddress = 1, 1177 isCommutable = 1 in 1178defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">; 1179} // End SubtargetPredicate = HasDLInsts 1180 1181let SubtargetPredicate = HasFmaLegacy32 in { 1182 1183let Constraints = "$vdst = $src2", 1184 DisableEncoding = "$src2", 1185 isConvertibleToThreeAddress = 1, 1186 isCommutable = 1 in 1187defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>; 1188 1189} // End SubtargetPredicate = HasFmaLegacy32 1190 1191let SubtargetPredicate = HasFmacF64Inst, 1192 Constraints = "$vdst = $src2", 1193 DisableEncoding="$src2", 1194 isConvertibleToThreeAddress = 1, 1195 isCommutable = 1, 1196 SchedRW = [WriteDoubleAdd] in 1197defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>; 1198 1199let Constraints = "$vdst = $src2", 1200 DisableEncoding="$src2", 1201 isConvertibleToThreeAddress = 1, 1202 isCommutable = 1, 1203 IsDOT = 1 in { 1204 let SubtargetPredicate = HasDot5Insts in 1205 defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">; 1206 let SubtargetPredicate = HasDot6Insts in 1207 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 1208 1209 let SubtargetPredicate = HasDot4Insts in 1210 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 1211 let SubtargetPredicate = HasDot3Insts in 1212 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 1213 1214 let SubtargetPredicate = HasDot13Insts in 1215 defm V_DOT2C_F32_BF16 : VOP2Inst_VOPD<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16, 0xd, "v_dot2acc_f32_bf16">; 1216} 1217 1218let AddedComplexity = 30 in { 1219 def : GCNPat< 1220 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 1221 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 1222 > { 1223 let SubtargetPredicate = HasDot5Insts; 1224 } 1225 def : GCNPat< 1226 (f32 (int_amdgcn_fdot2_f32_bf16 v2bf16:$src0, v2bf16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 1227 (f32 (V_DOT2C_F32_BF16_e32 $src0, $src1, $src2)) 1228 > { 1229 let SubtargetPredicate = HasDot13Insts; 1230 } 1231 def : GCNPat< 1232 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1233 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 1234 > { 1235 let SubtargetPredicate = HasDot6Insts; 1236 } 1237 def : GCNPat< 1238 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1239 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 1240 > { 1241 let SubtargetPredicate = HasDot4Insts; 1242 } 1243 def : GCNPat< 1244 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1245 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 1246 > { 1247 let SubtargetPredicate = HasDot3Insts; 1248 } 1249} // End AddedComplexity = 30 1250 1251let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 in { 1252def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">; 1253 1254let isCommutable = 1 in 1255def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; 1256} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 1257 1258let SubtargetPredicate = HasPkFmacF16Inst in { 1259defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 1260} // End SubtargetPredicate = HasPkFmacF16Inst 1261 1262// Note: 16-bit instructions produce a 0 result in the high 16-bits 1263// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 1264multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> { 1265 1266def : GCNPat< 1267 (i32 (zext (op i16:$src0, i16:$src1))), 1268 (inst VSrc_b16:$src0, VSrc_b16:$src1) 1269>; 1270 1271def : GCNPat< 1272 (i64 (zext (op i16:$src0, i16:$src1))), 1273 (REG_SEQUENCE VReg_64, 1274 (inst $src0, $src1), sub0, 1275 (V_MOV_B32_e32 (i32 0)), sub1) 1276>; 1277} 1278 1279class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 1280 (i16 (ext i1:$src)), 1281 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 1282 (i32 0/*src1mod*/), (i32 1/*src1*/), 1283 $src) 1284>; 1285 1286foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in 1287let True16Predicate = p in { 1288def : GCNPat < 1289 (and i16:$src0, i16:$src1), 1290 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1291>; 1292 1293def : GCNPat < 1294 (or i16:$src0, i16:$src1), 1295 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1296>; 1297 1298def : GCNPat < 1299 (xor i16:$src0, i16:$src1), 1300 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1301>; 1302} 1303 1304def : GCNPat < 1305 (and v2i16:$src0, v2i16:$src1), 1306 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1307>; 1308 1309def : GCNPat < 1310 (or v2i16:$src0, v2i16:$src1), 1311 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1312>; 1313 1314def : GCNPat < 1315 (xor v2i16:$src0, v2i16:$src1), 1316 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1317>; 1318 1319let Predicates = [Has16BitInsts, isGFX8GFX9] in { 1320 1321// Undo sub x, c -> add x, -c canonicalization since c is more likely 1322// an inline immediate than -c. 1323// TODO: Also do for 64-bit. 1324def : GCNPat< 1325 (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), 1326 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1327>; 1328 1329def : GCNPat< 1330 (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), 1331 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1332>; 1333 1334defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>; 1335defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>; 1336defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>; 1337defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>; 1338defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>; 1339defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>; 1340defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>; 1341defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>; 1342defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>; 1343defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>; 1344 1345} // End Predicates = [Has16BitInsts, isGFX8GFX9] 1346 1347let Predicates = [Has16BitInsts] in { 1348 1349def : ZExt_i16_i1_Pat<zext>; 1350def : ZExt_i16_i1_Pat<anyext>; 1351 1352def : GCNPat < 1353 (i16 (sext i1:$src)), 1354 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 1355 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 1356>; 1357 1358} // End Predicates = [Has16BitInsts] 1359 1360 1361let SubtargetPredicate = HasIntClamp in { 1362// Set clamp bit for saturation. 1363def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>; 1364def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>; 1365} 1366 1367let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in { 1368let AddedComplexity = 1 in { // Prefer over form with carry-out. 1369def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>; 1370def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>; 1371} 1372} 1373 1374let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in { 1375def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>; 1376def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>; 1377} 1378 1379let SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 in { 1380 let SchedRW = [WriteDoubleAdd], isCommutable = 1 in { 1381 let FPDPRounding = 1 in { 1382 defm V_ADD_F64_pseudo : VOP2Inst <"v_add_f64_pseudo", VOP_F64_F64_F64, any_fadd>; 1383 defm V_MUL_F64_pseudo : VOP2Inst <"v_mul_f64_pseudo", VOP_F64_F64_F64, fmul>; 1384 } // End FPDPRounding = 1 1385 defm V_MIN_NUM_F64 : VOP2Inst <"v_min_num_f64", VOP_F64_F64_F64, fminnum_like>; 1386 defm V_MAX_NUM_F64 : VOP2Inst <"v_max_num_f64", VOP_F64_F64_F64, fmaxnum_like>; 1387 } // End SchedRW = [WriteDoubleAdd], isCommutable = 1 1388 let SchedRW = [Write64Bit] in { 1389 defm V_LSHLREV_B64_pseudo : VOP2Inst <"v_lshlrev_b64_pseudo", VOP_I64_I32_I64, clshl_rev_64>; 1390 } // End SchedRW = [Write64Bit] 1391} // End SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 1392 1393//===----------------------------------------------------------------------===// 1394// DPP Encodings 1395//===----------------------------------------------------------------------===// 1396 1397class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps, 1398 string opName = ps.OpName, VOPProfile p = ps.Pfl, 1399 bit IsDPP16 = 0> : 1400 VOP_DPP<opName, p, IsDPP16> { 1401 let hasSideEffects = ps.hasSideEffects; 1402 let Defs = ps.Defs; 1403 let SchedRW = ps.SchedRW; 1404 let Uses = ps.Uses; 1405 1406 bits<8> vdst; 1407 bits<8> src1; 1408 let Inst{8-0} = 0xfa; 1409 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1410 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1411 let Inst{30-25} = op; 1412 let Inst{31} = 0x0; 1413} 1414 1415class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 1416 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1417 VOP2_DPP<op, ps, opName, p, 1> { 1418 let AssemblerPredicate = HasDPP16; 1419 let SubtargetPredicate = ps.SubtargetPredicate; 1420 let OtherPredicates = ps.OtherPredicates; 1421} 1422 1423class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget, 1424 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1425 Base_VOP2_DPP16<op, ps, opName, p>, 1426 SIMCInstr <ps.PseudoInstr, subtarget>; 1427 1428class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen, 1429 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1430 VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> { 1431 let AssemblerPredicate = Gen.AssemblerPredicate; 1432 let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); 1433 let DecoderNamespace = Gen.DecoderNamespace# 1434 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 1435} 1436 1437class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 1438 VOPProfile p = ps.Pfl> : 1439 VOP_DPP8<ps.OpName, p> { 1440 let hasSideEffects = ps.hasSideEffects; 1441 let Defs = ps.Defs; 1442 let SchedRW = ps.SchedRW; 1443 let Uses = ps.Uses; 1444 1445 bits<8> vdst; 1446 bits<8> src1; 1447 1448 let Inst{8-0} = fi; 1449 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1450 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1451 let Inst{30-25} = op; 1452 let Inst{31} = 0x0; 1453 1454 let SubtargetPredicate = ps.SubtargetPredicate; 1455 let OtherPredicates = ps.OtherPredicates; 1456} 1457 1458class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen, 1459 VOPProfile p = ps.Pfl> : 1460 VOP2_DPP8<op, ps, p> { 1461 let AssemblerPredicate = Gen.AssemblerPredicate; 1462 let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); 1463 let DecoderNamespace = Gen.DecoderNamespace# 1464 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 1465} 1466 1467//===----------------------------------------------------------------------===// 1468// GFX11, GFX12 1469//===----------------------------------------------------------------------===// 1470 1471//===------------------------------- VOP2 -------------------------------===// 1472multiclass VOP2Only_Real_MADK<GFXGen Gen, bits<6> op> { 1473 def Gen.Suffix : 1474 VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME), Gen>, 1475 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1476} 1477 1478multiclass VOP2Only_Real_MADK_with_name<GFXGen Gen, bits<6> op, string asmName, 1479 string opName = NAME> { 1480 def Gen.Suffix : 1481 VOP2_Real_Gen<!cast<VOP2_Pseudo>(opName), Gen>, 1482 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1483 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1484 let AsmString = asmName # ps.AsmOperands; 1485 } 1486} 1487 1488multiclass VOP2_Real_e32<GFXGen Gen, bits<6> op> { 1489 def _e32#Gen.Suffix : 1490 VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME#"_e32"), Gen>, 1491 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1492} 1493 1494multiclass VOP2Only_Real_e32<GFXGen Gen, bits<6> op> { 1495 let IsSingle = 1 in 1496 defm NAME: VOP2_Real_e32<Gen, op>; 1497} 1498 1499multiclass VOP2_Real_e64<GFXGen Gen, bits<6> op> { 1500 def _e64#Gen.Suffix : 1501 VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>, 1502 VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1503} 1504 1505multiclass VOP2_Real_dpp<GFXGen Gen, bits<6> op> { 1506 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1507 def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), Gen>; 1508} 1509 1510multiclass VOP2_Real_dpp8<GFXGen Gen, bits<6> op> { 1511 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1512 def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(NAME#"_e32"), Gen>; 1513} 1514 1515//===------------------------- VOP2 (with name) -------------------------===// 1516multiclass VOP2_Real_e32_with_name<GFXGen Gen, bits<6> op, string opName, 1517 string asmName, bit single = 0> { 1518 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1519 def _e32#Gen.Suffix : 1520 VOP2_Real_Gen<ps, Gen, asmName>, 1521 VOP2e<op{5-0}, ps.Pfl> { 1522 let AsmString = asmName # ps.AsmOperands; 1523 let IsSingle = single; 1524 } 1525} 1526multiclass VOP2_Real_e64_with_name<GFXGen Gen, bits<6> op, string opName, 1527 string asmName> { 1528 defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1529 def _e64#Gen.Suffix : 1530 VOP3_Real_Gen<ps, Gen>, 1531 VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, ps.Pfl> { 1532 let AsmString = asmName # ps.AsmOperands; 1533 } 1534} 1535 1536multiclass VOP2_Real_dpp_with_name<GFXGen Gen, bits<6> op, string opName, 1537 string asmName> { 1538 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1539 if ps.Pfl.HasExtDPP then 1540 def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen> { 1541 let AsmString = asmName # ps.Pfl.AsmDPP16; 1542 } 1543} 1544multiclass VOP2_Real_dpp8_with_name<GFXGen Gen, bits<6> op, string opName, 1545 string asmName> { 1546 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1547 if ps.Pfl.HasExtDPP then 1548 def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, ps, Gen> { 1549 let AsmString = asmName # ps.Pfl.AsmDPP8; 1550 } 1551} 1552 1553//===------------------------------ VOP2be ------------------------------===// 1554multiclass VOP2be_Real_e32<GFXGen Gen, bits<6> op, string opName, string asmName> { 1555 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1556 def _e32#Gen.Suffix : 1557 VOP2_Real_Gen<ps, Gen>, 1558 VOP2e<op{5-0}, ps.Pfl> { 1559 let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands); 1560 } 1561} 1562multiclass VOP2be_Real_dpp<GFXGen Gen, bits<6> op, string opName, string asmName> { 1563 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1564 def _dpp#Gen.Suffix : 1565 VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen, asmName> { 1566 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1567 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1568 } 1569 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1570 def _dpp_w32#Gen.Suffix : 1571 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1572 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1573 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1574 let isAsmParserOnly = 1; 1575 let WaveSizePredicate = isWave32; 1576 let AssemblerPredicate = Gen.AssemblerPredicate; 1577 let DecoderNamespace = Gen.DecoderNamespace; 1578 } 1579 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1580 def _dpp_w64#Gen.Suffix : 1581 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1582 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1583 let AsmString = asmName # AsmDPP; 1584 let isAsmParserOnly = 1; 1585 let WaveSizePredicate = isWave64; 1586 let AssemblerPredicate = Gen.AssemblerPredicate; 1587 let DecoderNamespace = Gen.DecoderNamespace; 1588 } 1589} 1590multiclass VOP2be_Real_dpp8<GFXGen Gen, bits<6> op, string opName, string asmName> { 1591 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1592 def _dpp8#Gen.Suffix : 1593 VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(opName#"_e32"), Gen> { 1594 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1595 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1596 } 1597 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1598 def _dpp8_w32#Gen.Suffix : 1599 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1600 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1601 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1602 let isAsmParserOnly = 1; 1603 let WaveSizePredicate = isWave32; 1604 let AssemblerPredicate = Gen.AssemblerPredicate; 1605 let DecoderNamespace = Gen.DecoderNamespace; 1606 } 1607 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1608 def _dpp8_w64#Gen.Suffix : 1609 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1610 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1611 let AsmString = asmName # AsmDPP8; 1612 let isAsmParserOnly = 1; 1613 let WaveSizePredicate = isWave64; 1614 let AssemblerPredicate = Gen.AssemblerPredicate; 1615 let DecoderNamespace = Gen.DecoderNamespace; 1616 } 1617} 1618 1619// We don't want to override separate decoderNamespaces within these 1620multiclass VOP2_Realtriple_e64<GFXGen Gen, bits<6> op> : 1621 VOP3_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME>; 1622 1623multiclass VOP2_Realtriple_e64_with_name<GFXGen Gen, bits<6> op, string opName, 1624 string asmName> { 1625 defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 0, 0, op{5-0}}, opName, asmName> ; 1626} 1627 1628multiclass VOP2be_Real<GFXGen Gen, bits<6> op, string opName, string asmName> : 1629 VOP2be_Real_e32<Gen, op, opName, asmName>, 1630 VOP3be_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>, 1631 VOP2be_Real_dpp<Gen, op, opName, asmName>, 1632 VOP2be_Real_dpp8<Gen, op, opName, asmName>; 1633 1634// Only for CNDMASK 1635multiclass VOP2e_Real<GFXGen Gen, bits<6> op, string opName, string asmName> : 1636 VOP2_Real_e32<Gen, op>, 1637 VOP2_Realtriple_e64<Gen, op>, 1638 VOP2be_Real_dpp<Gen, op, opName, asmName>, 1639 VOP2be_Real_dpp8<Gen, op, opName, asmName>; 1640 1641multiclass VOP2Only_Real<GFXGen Gen, bits<6> op> : 1642 VOP2Only_Real_e32<Gen, op>, 1643 VOP2_Real_dpp<Gen, op>, 1644 VOP2_Real_dpp8<Gen, op>; 1645 1646multiclass VOP2_Real_FULL<GFXGen Gen, bits<6> op> : 1647 VOP2_Realtriple_e64<Gen, op>, 1648 VOP2_Real_e32<Gen, op>, 1649 VOP2_Real_dpp<Gen, op>, 1650 VOP2_Real_dpp8<Gen, op>; 1651 1652multiclass VOP2_Real_NO_VOP3_with_name<GFXGen Gen, bits<6> op, string opName, 1653 string asmName, bit isSingle = 0> { 1654 defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName, isSingle>, 1655 VOP2_Real_dpp_with_name<Gen, op, opName, asmName>, 1656 VOP2_Real_dpp8_with_name<Gen, op, opName, asmName>; 1657 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1658 def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 1659 let AssemblerPredicate = Gen.AssemblerPredicate; 1660 } 1661} 1662 1663multiclass VOP2_Real_FULL_with_name<GFXGen Gen, bits<6> op, string opName, 1664 string asmName> : 1665 VOP2_Realtriple_e64_with_name<Gen, op, opName, asmName>, 1666 VOP2_Real_NO_VOP3_with_name<Gen, op, opName, asmName>; 1667 1668multiclass VOP2_Real_NO_DPP_with_name<GFXGen Gen, bits<6> op, string opName, 1669 string asmName> { 1670 defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName>, 1671 VOP2_Real_e64_with_name<Gen, op, opName, asmName>; 1672 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1673 def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 1674 let AssemblerPredicate = Gen.AssemblerPredicate; 1675 } 1676} 1677 1678multiclass VOP2_Real_NO_DPP_with_alias<GFXGen Gen, bits<6> op, string alias> { 1679 defm NAME : VOP2_Real_e32<Gen, op>, 1680 VOP2_Real_e64<Gen, op>; 1681 def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<alias, NAME> { 1682 let AssemblerPredicate = Gen.AssemblerPredicate; 1683 } 1684} 1685 1686//===----------------------------------------------------------------------===// 1687// GFX12. 1688//===----------------------------------------------------------------------===// 1689 1690multiclass VOP2be_Real_gfx12<bits<6> op, string opName, string asmName> : 1691 VOP2be_Real<GFX12Gen, op, opName, asmName>; 1692 1693// Only for CNDMASK 1694multiclass VOP2e_Real_gfx12<bits<6> op, string opName, string asmName> : 1695 VOP2e_Real<GFX12Gen, op, opName, asmName>; 1696 1697multiclass VOP2_Real_FULL_with_name_gfx12<bits<6> op, string opName, 1698 string asmName> : 1699 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1700 1701multiclass VOP2_Real_FULL_t16_gfx12<bits<6> op, string opName, 1702 string asmName, string alias> { 1703 defm NAME : VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1704 def _gfx12_2nd_alias : AMDGPUMnemonicAlias<alias, asmName> { 1705 let AssemblerPredicate = isGFX12Only; 1706 } 1707} 1708 1709multiclass VOP2_Real_FULL_t16_and_fake16_gfx12<bits<6> op, string opName, 1710 string asmName, string alias> { 1711 defm _t16: VOP2_Real_FULL_t16_gfx12<op, opName#"_t16", asmName, alias>; 1712 defm _fake16: VOP2_Real_FULL_t16_gfx12<op, opName#"_fake16", asmName, alias>; 1713} 1714 1715multiclass VOP2_Real_NO_DPP_with_name_gfx12<bits<6> op, string opName, 1716 string asmName> : 1717 VOP2_Real_NO_DPP_with_name<GFX12Gen, op, opName, asmName>; 1718 1719multiclass VOP2_Real_NO_DPP_with_alias_gfx12<bits<6> op, string alias> : 1720 VOP2_Real_NO_DPP_with_alias<GFX12Gen, op, alias>; 1721 1722defm V_ADD_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x002, "V_ADD_F64_pseudo", "v_add_f64">; 1723defm V_MUL_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x006, "V_MUL_F64_pseudo", "v_mul_f64">; 1724defm V_LSHLREV_B64 : VOP2_Real_NO_DPP_with_name_gfx12<0x01f, "V_LSHLREV_B64_pseudo", "v_lshlrev_b64">; 1725defm V_MIN_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00d, "v_min_f64">; 1726defm V_MAX_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00e, "v_max_f64">; 1727 1728defm V_CNDMASK_B32 : VOP2e_Real_gfx12<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 1729defm V_ADD_CO_CI_U32 : 1730 VOP2be_Real_gfx12<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; 1731defm V_SUB_CO_CI_U32 : 1732 VOP2be_Real_gfx12<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; 1733defm V_SUBREV_CO_CI_U32 : 1734 VOP2be_Real_gfx12<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1735 1736defm V_MIN_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x015, "V_MIN_F32", "v_min_num_f32">; 1737defm V_MAX_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x016, "V_MAX_F32", "v_max_num_f32">; 1738defm V_MIN_NUM_F16 : VOP2_Real_FULL_t16_and_fake16_gfx12<0x030, "V_MIN_F16", "v_min_num_f16", "v_min_f16">; 1739defm V_MAX_NUM_F16 : VOP2_Real_FULL_t16_and_fake16_gfx12<0x031, "V_MAX_F16", "v_max_num_f16", "v_max_f16">; 1740 1741let SubtargetPredicate = isGFX12Plus in { 1742 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx12>; 1743 1744 defm : VOP2bInstAliases< 1745 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx12, "v_add_co_ci_u32">; 1746 defm : VOP2bInstAliases< 1747 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx12, "v_sub_co_ci_u32">; 1748 defm : VOP2bInstAliases< 1749 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">; 1750} // End SubtargetPredicate = isGFX12Plus 1751 1752//===----------------------------------------------------------------------===// 1753// GFX11. 1754//===----------------------------------------------------------------------===// 1755 1756multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> : 1757 VOP2be_Real<GFX11Gen, op, opName, asmName>; 1758 1759// Only for CNDMASK 1760multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> : 1761 VOP2e_Real<GFX11Gen, op, opName, asmName>; 1762 1763multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName, 1764 string asmName, bit isSingle = 0> { 1765 defm NAME : VOP2_Real_e32_with_name<GFX11Gen, op, opName, asmName, isSingle>, 1766 VOP2_Real_dpp_with_name<GFX11Gen, op, opName, asmName>, 1767 VOP2_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>; 1768 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1769 def _gfx11_alias : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 1770 let AssemblerPredicate = isGFX11Only; 1771 } 1772} 1773 1774multiclass VOP2_Real_FULL_t16_gfx11<bits<6> op, string asmName, string opName = NAME> : 1775 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>; 1776 1777multiclass VOP2_Real_FULL_t16_and_fake16_gfx11<bits<6> op, string asmName, string opName = NAME> { 1778 defm opName#"_t16": VOP2_Real_FULL_t16_gfx11<op, asmName, opName#"_t16">; 1779 defm opName#"_fake16": VOP2_Real_FULL_t16_gfx11<op, asmName, opName#"_fake16">; 1780} 1781 1782multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName, 1783 string asmName> : 1784 VOP2_Real_NO_DPP_with_name<GFX11Gen, op, opName, asmName>; 1785 1786multiclass VOP2_Real_FULL_gfx11_gfx12<bits<6> op> : 1787 VOP2_Real_FULL<GFX11Gen, op>, VOP2_Real_FULL<GFX12Gen, op>; 1788 1789multiclass VOP2_Real_FULL_with_name_gfx11_gfx12<bits<6> op, string opName, 1790 string asmName> : 1791 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 1792 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1793 1794multiclass VOP2_Real_e32_gfx11_gfx12<bits<6> op> : 1795 VOP2Only_Real<GFX11Gen, op>, VOP2Only_Real<GFX12Gen, op>; 1796 1797multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> : 1798 VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>; 1799 1800multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> : 1801 VOP3_Realtriple_t16_gfx11<op, asmName, OpName, "", /*IsSingle*/1>, 1802 VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", /*IsSingle*/1>; 1803 1804multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> { 1805 defm _t16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">; 1806 defm _fake16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_fake16">; 1807} 1808 1809multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> : 1810 VOP3beOnly_Realtriple<GFX11Gen, op>, VOP3beOnly_Realtriple<GFX12Gen, op>; 1811 1812multiclass VOP2Only_Real_MADK_t16_gfx11_gfx12<bits<6> op, string asmName, 1813 string opName = NAME> : 1814 VOP2Only_Real_MADK_with_name<GFX11Gen, op, asmName, opName>, 1815 VOP2Only_Real_MADK_with_name<GFX12Gen, op, asmName, opName>; 1816 1817multiclass VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmName, 1818 string opName = NAME> { 1819 defm _t16: VOP2Only_Real_MADK_t16_gfx11_gfx12<op, asmName, opName#"_t16">; 1820 defm _fake16: VOP2Only_Real_MADK_t16_gfx11_gfx12<op, asmName, opName#"_fake16">; 1821} 1822 1823multiclass VOP2_Real_FULL_t16_gfx11_gfx12<bits<6> op, string asmName, 1824 string opName = NAME> : 1825 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 1826 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1827 1828multiclass VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmName, 1829 string opName = NAME> { 1830 defm _t16: VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_t16">; 1831 defm _fake16: VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_fake16">; 1832} 1833 1834multiclass VOP2_Real_FULL_gfx11<bits<6> op> : 1835 VOP2_Real_FULL<GFX11Gen, op>; 1836 1837defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32", 1838 "v_cndmask_b32">; 1839defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002, 1840 "V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>; 1841defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006, 1842 "V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">; 1843defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x007, 1844 "V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">; 1845defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11_gfx12<0x018>; 1846defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11_gfx12<0x019>; 1847defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11_gfx12<0x01a>; 1848defm V_ADD_CO_CI_U32 : 1849 VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; 1850defm V_SUB_CO_CI_U32 : 1851 VOP2be_Real_gfx11<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; 1852defm V_SUBREV_CO_CI_U32 : 1853 VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1854 1855defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x02f, 1856 "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">; 1857defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx11_gfx12<0x03c>; 1858 1859defm V_ADD_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x032, "v_add_f16">; 1860defm V_SUB_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x033, "v_sub_f16">; 1861defm V_SUBREV_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x034, "v_subrev_f16">; 1862defm V_MUL_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x035, "v_mul_f16">; 1863defm V_FMAC_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x036, "v_fmac_f16">; 1864defm V_LDEXP_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x03b, "v_ldexp_f16">; 1865defm V_MAX_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11<0x039, "v_max_f16">; 1866defm V_MIN_F16 : VOP2_Real_FULL_t16_and_fake16_gfx11<0x03a, "v_min_f16">; 1867defm V_FMAMK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x037, "v_fmamk_f16">; 1868defm V_FMAAK_F16 : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x038, "v_fmaak_f16">; 1869 1870// VOP3 only. 1871defm V_CNDMASK_B16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x25d, "v_cndmask_b16">; 1872defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11_gfx12<0x31c>; 1873defm V_BFM_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31d>; 1874defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31e>; 1875defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31f>; 1876defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x320>; 1877defm V_CVT_PK_NORM_I16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x321, "V_CVT_PKNORM_I16_F32", "v_cvt_pk_norm_i16_f32">; 1878defm V_CVT_PK_NORM_U16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x322, "V_CVT_PKNORM_U16_F32", "v_cvt_pk_norm_u16_f32">; 1879defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11_gfx12<0x323>; 1880defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11_gfx12<0x324>; 1881defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x300>; 1882defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x301>; 1883defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x302>; 1884 1885let SubtargetPredicate = isGFX11Only in { 1886 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>; 1887 1888 defm : VOP2bInstAliases< 1889 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx11, "v_add_co_ci_u32">; 1890 defm : VOP2bInstAliases< 1891 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">; 1892 defm : VOP2bInstAliases< 1893 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">; 1894} // End SubtargetPredicate = isGFX11Only 1895 1896//===----------------------------------------------------------------------===// 1897// GFX10. 1898//===----------------------------------------------------------------------===// 1899 1900let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 1901 //===------------------------------- VOP2 -------------------------------===// 1902 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 1903 def _gfx10 : 1904 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 1905 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1906 } 1907 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 1908 string asmName> { 1909 def _gfx10 : 1910 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 1911 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1912 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1913 let AsmString = asmName # ps.AsmOperands; 1914 } 1915 } 1916 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 1917 def _e32_gfx10 : 1918 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 1919 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1920 } 1921 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 1922 def _e64_gfx10 : 1923 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1924 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1925 } 1926 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 1927 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1928 def _sdwa_gfx10 : 1929 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1930 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1931 } 1932 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 1933 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1934 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10>; 1935 } 1936 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 1937 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1938 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")>; 1939 } 1940 1941 //===------------------------- VOP2 (with name) -------------------------===// 1942 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 1943 string asmName> { 1944 def _e32_gfx10 : 1945 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1946 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1947 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1948 let AsmString = asmName # ps.AsmOperands; 1949 } 1950 } 1951 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 1952 string asmName> { 1953 def _e64_gfx10 : 1954 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1955 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 1956 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1957 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1958 let AsmString = asmName # ps.AsmOperands; 1959 } 1960 } 1961 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 1962 string asmName> { 1963 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1964 def _sdwa_gfx10 : 1965 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1966 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1967 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1968 let AsmString = asmName # ps.AsmOperands; 1969 } 1970 } 1971 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 1972 string asmName> { 1973 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1974 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> { 1975 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1976 let AsmString = asmName # ps.Pfl.AsmDPP16; 1977 } 1978 } 1979 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 1980 string asmName> { 1981 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1982 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1983 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1984 let AsmString = asmName # ps.Pfl.AsmDPP8; 1985 } 1986 } 1987 1988 //===------------------------------ VOP2be ------------------------------===// 1989 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> { 1990 def _e32_gfx10 : 1991 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1992 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1993 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1994 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1995 } 1996 } 1997 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> { 1998 def _e64_gfx10 : 1999 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 2000 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 2001 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 2002 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 2003 let AsmString = asmName # Ps.AsmOperands; 2004 } 2005 } 2006 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> { 2007 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 2008 def _sdwa_gfx10 : 2009 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 2010 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 2011 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 2012 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 2013 } 2014 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 2015 def _sdwa_w32_gfx10 : 2016 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 2017 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 2018 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 2019 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 2020 let isAsmParserOnly = 1; 2021 let WaveSizePredicate = isWave32; 2022 } 2023 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 2024 def _sdwa_w64_gfx10 : 2025 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 2026 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 2027 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 2028 let AsmString = asmName # Ps.AsmOperands; 2029 let isAsmParserOnly = 1; 2030 let WaveSizePredicate = isWave64; 2031 } 2032 } 2033 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> { 2034 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2035 def _dpp_gfx10 : 2036 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> { 2037 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 2038 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 2039 } 2040 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2041 def _dpp_w32_gfx10 : 2042 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 2043 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 2044 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 2045 let isAsmParserOnly = 1; 2046 let WaveSizePredicate = isWave32; 2047 } 2048 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2049 def _dpp_w64_gfx10 : 2050 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 2051 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 2052 let AsmString = asmName # AsmDPP; 2053 let isAsmParserOnly = 1; 2054 let WaveSizePredicate = isWave64; 2055 } 2056 } 2057 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> { 2058 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2059 def _dpp8_gfx10 : 2060 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 2061 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 2062 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 2063 } 2064 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2065 def _dpp8_w32_gfx10 : 2066 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 2067 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 2068 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 2069 let isAsmParserOnly = 1; 2070 let WaveSizePredicate = isWave32; 2071 } 2072 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 2073 def _dpp8_w64_gfx10 : 2074 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 2075 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 2076 let AsmString = asmName # AsmDPP8; 2077 let isAsmParserOnly = 1; 2078 let WaveSizePredicate = isWave64; 2079 } 2080 } 2081 2082 //===----------------------------- VOP3Only -----------------------------===// 2083 multiclass VOP3Only_Real_gfx10<bits<10> op> { 2084 def _e64_gfx10 : 2085 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 2086 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2087 let IsSingle = 1; 2088 } 2089 } 2090 2091 //===---------------------------- VOP3beOnly ----------------------------===// 2092 multiclass VOP3beOnly_Real_gfx10<bits<10> op> { 2093 def _e64_gfx10 : 2094 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 2095 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2096 let IsSingle = 1; 2097 } 2098 } 2099} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 2100 2101multiclass VOP2Only_Real_MADK_gfx10_gfx11<bits<6> op> : 2102 VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK<GFX11Gen, op>; 2103 2104multiclass VOP2Only_Real_MADK_gfx10_gfx11_gfx12<bits<6> op> : 2105 VOP2Only_Real_MADK_gfx10_gfx11<op>, VOP2Only_Real_MADK<GFX12Gen, op>; 2106 2107multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> : 2108 VOP2be_Real_e32_gfx10<op, opName, asmName>, 2109 VOP2be_Real_e64_gfx10<op, opName, asmName>, 2110 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 2111 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 2112 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 2113 2114multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> : 2115 VOP2_Real_e32_gfx10<op>, 2116 VOP2_Real_e64_gfx10<op>, 2117 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 2118 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 2119 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 2120 2121multiclass VOP2_Real_gfx10<bits<6> op> : 2122 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 2123 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 2124 2125multiclass VOP2_Real_gfx10_gfx11<bits<6> op> : 2126 VOP2_Real_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>; 2127 2128multiclass VOP2_Real_gfx10_gfx11_gfx12<bits<6> op> : 2129 VOP2_Real_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>; 2130 2131multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName, 2132 string asmName> : 2133 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 2134 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 2135 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 2136 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 2137 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 2138 2139multiclass VOP2_Real_with_name_gfx10_gfx11_gfx12<bits<6> op, string opName, 2140 string asmName> : 2141 VOP2_Real_with_name_gfx10<op, opName, asmName>, 2142 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 2143 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 2144 2145// NB: Same opcode as v_mac_legacy_f32 2146let DecoderNamespace = "GFX10_B" in 2147defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>; 2148 2149defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11_gfx12<0x01e>; 2150defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11_gfx12<0x02b>; 2151defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02c>; 2152defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02d>; 2153defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; 2154defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; 2155defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; 2156defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; 2157defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; 2158defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; 2159defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; 2160defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; 2161defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; 2162defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; 2163 2164let IsSingle = 1 in { 2165 defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 2166} 2167 2168// VOP2 no carry-in, carry-out. 2169defm V_ADD_NC_U32 : 2170 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x025, "V_ADD_U32", "v_add_nc_u32">; 2171defm V_SUB_NC_U32 : 2172 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x026, "V_SUB_U32", "v_sub_nc_u32">; 2173defm V_SUBREV_NC_U32 : 2174 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 2175 2176// VOP2 carry-in, carry-out. 2177defm V_ADD_CO_CI_U32 : 2178 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 2179defm V_SUB_CO_CI_U32 : 2180 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 2181defm V_SUBREV_CO_CI_U32 : 2182 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 2183 2184defm V_CNDMASK_B32 : 2185 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 2186 2187// VOP3 only. 2188defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 2189defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 2190defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 2191defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 2192defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 2193defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 2194defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 2195defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 2196defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 2197 2198// VOP3 carry-out. 2199defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>; 2200defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>; 2201defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>; 2202 2203let SubtargetPredicate = isGFX10Only in { 2204 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 2205 2206 defm : VOP2bInstAliases< 2207 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 2208 defm : VOP2bInstAliases< 2209 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 2210 defm : VOP2bInstAliases< 2211 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 2212} // End SubtargetPredicate = isGFX10Only 2213 2214//===----------------------------------------------------------------------===// 2215// GFX6, GFX7, GFX10, GFX11 2216//===----------------------------------------------------------------------===// 2217 2218class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 2219 VOP_DPPe <P> { 2220 bits<8> vdst; 2221 bits<8> src1; 2222 let Inst{8-0} = 0xfa; //dpp 2223 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 2224 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 2225 let Inst{30-25} = op; 2226 let Inst{31} = 0x0; //encoding 2227} 2228 2229let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2230 multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> { 2231 def _gfx6_gfx7 : 2232 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 2233 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2234 } 2235 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 2236 def _gfx6_gfx7 : 2237 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 2238 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2239 } 2240 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> { 2241 def _e32_gfx6_gfx7 : 2242 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>, 2243 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>; 2244 } 2245 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 2246 def _e64_gfx6_gfx7 : 2247 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 2248 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 2249 } 2250 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 2251 def _e64_gfx6_gfx7 : 2252 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 2253 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 2254 } 2255} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2256 2257multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 2258 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 2259 2260multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 2261 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 2262 2263multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 2264 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 2265 2266multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<6> op> : 2267 VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>; 2268 2269multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<6> op> : 2270 VOP2_Real_gfx6_gfx7_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>; 2271 2272multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 2273 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 2274 2275multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op, 2276 string opName, string asmName> { 2277 defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32"); 2278 defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64"); 2279 2280 let AsmString = asmName # ps32.AsmOperands in { 2281 defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>; 2282 } 2283 2284 let AsmString = asmName # ps64.AsmOperands in { 2285 defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>; 2286 } 2287} 2288 2289defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 2290defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 2291defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 2292defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 2293defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 2294defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 2295defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 2296defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 2297defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 2298defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 2299defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 2300defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 2301defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 2302defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 2303defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 2304defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 2305 2306// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in 2307// VI, but the VI instructions behave the same as the SI versions. 2308defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">; 2309defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">; 2310defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">; 2311defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 2312defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 2313defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 2314 2315defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>; 2316 2317let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { 2318 defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>; 2319} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) 2320 2321let SubtargetPredicate = isGFX6GFX7 in { 2322 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 2323 defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>; 2324 defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>; 2325 defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>; 2326 2327 def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>; 2328 def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>; 2329 def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>; 2330} // End SubtargetPredicate = isGFX6GFX7 2331 2332defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>; 2333defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>; 2334defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>; 2335defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 2336defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 2337defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>; 2338defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>; 2339defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>; 2340defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>; 2341defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>; 2342defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>; 2343defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>; 2344defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x011>; 2345defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x012>; 2346defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x013>; 2347defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>; 2348defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 2349defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 2350defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 2351defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01b>; 2352defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01c>; 2353defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01d>; 2354defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 2355defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 2356defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 2357defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 2358 2359//===----------------------------------------------------------------------===// 2360// GFX8, GFX9 (VI). 2361//===----------------------------------------------------------------------===// 2362 2363let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 2364 2365multiclass VOP2_Real_MADK_vi <bits<6> op> { 2366 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 2367 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2368} 2369 2370multiclass VOP2_Real_MADK_gfx940 <bits<6> op> { 2371 def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>, 2372 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> { 2373 let DecoderNamespace = "GFX9"; 2374 } 2375} 2376 2377multiclass VOP2_Real_e32_vi <bits<6> op> { 2378 def _e32_vi : 2379 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 2380 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2381} 2382 2383multiclass VOP2_Real_e64_vi <bits<10> op> { 2384 def _e64_vi : 2385 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 2386 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2387} 2388 2389multiclass VOP2_Real_e64only_vi <bits<10> op> { 2390 def _e64_vi : 2391 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 2392 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2393 let IsSingle = 1; 2394 } 2395} 2396 2397multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 2398 VOP2_Real_e32_vi<op>, 2399 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 2400 2401} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 2402 2403multiclass VOP2_SDWA8_Real <bits<6> op> { 2404 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then 2405 def _sdwa_vi : 2406 VOP_SDWA8_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2407 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2408} 2409 2410multiclass VOP2_SDWA9_Real <bits<6> op> { 2411 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 2412 def _sdwa_gfx9 : 2413 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2414 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2415} 2416 2417let AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" in { 2418 2419multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 2420 def _e32_vi : 2421 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 2422 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2423 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2424 let AsmString = AsmName # ps.AsmOperands; 2425 } 2426 def _e64_vi : 2427 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 2428 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2429 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2430 let AsmString = AsmName # ps.AsmOperands; 2431 } 2432 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA then 2433 def _sdwa_vi : 2434 VOP_SDWA8_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2435 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2436 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2437 let AsmString = AsmName # ps.AsmOperands; 2438 } 2439 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then 2440 def _dpp_vi : 2441 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 2442 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2443 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2444 let AsmString = AsmName # ps.AsmOperands; 2445 } 2446} 2447 2448} // End AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" 2449 2450let DecoderNamespace = "GFX9" in { 2451 2452multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 2453 def _e32_gfx9 : 2454 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 2455 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2456 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2457 let AsmString = AsmName # ps.AsmOperands; 2458 } 2459 def _e64_gfx9 : 2460 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 2461 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2462 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2463 let AsmString = AsmName # ps.AsmOperands; 2464 } 2465 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9 then 2466 def _sdwa_gfx9 : 2467 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2468 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2469 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2470 let AsmString = AsmName # ps.AsmOperands; 2471 } 2472 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then 2473 def _dpp_gfx9 : 2474 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 2475 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2476 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2477 let AsmString = AsmName # ps.AsmOperands; 2478 } 2479} 2480 2481multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 2482 def _e32_gfx9 : 2483 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 2484 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2485 def _e64_gfx9 : 2486 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 2487 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2488 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 2489 def _sdwa_gfx9 : 2490 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2491 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 2492 } 2493 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2494 def _dpp_gfx9 : 2495 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 2496 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2497} 2498 2499} // End DecoderNamespace = "GFX9" 2500 2501multiclass VOP2_Real_e32e64_vi <bits<6> op> : 2502 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA8_Real<op>, VOP2_SDWA9_Real<op> { 2503 2504 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2505 def _dpp_vi : 2506 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 2507 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2508} 2509 2510defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 2511defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 2512defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 2513defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 2514let OtherPredicates = [isGCN3ExcludingGFX90A] in 2515defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 2516defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 2517defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 2518defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 2519defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 2520defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 2521defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 2522defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 2523defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 2524defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 2525defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 2526defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 2527defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 2528defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 2529defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 2530defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 2531defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 2532defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 2533defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 2534defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 2535defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 2536 2537defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">; 2538defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">; 2539defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">; 2540defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 2541defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 2542defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 2543 2544let AssemblerPredicate = isGFX9Only in { 2545defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">; 2546defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">; 2547defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">; 2548defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 2549defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 2550defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 2551 2552defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 2553defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 2554defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 2555} // End AssemblerPredicate = isGFX9Only 2556 2557defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 2558defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 2559defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 2560defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 2561defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 2562defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 2563defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 2564defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 2565defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 2566defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 2567defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 2568 2569defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 2570defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 2571defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 2572defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 2573defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 2574defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 2575defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 2576defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 2577defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 2578defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 2579defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 2580defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 2581defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 2582defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 2583defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 2584defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 2585defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 2586defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 2587defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 2588defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 2589defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 2590 2591let SubtargetPredicate = isGFX8GFX9 in { 2592 2593// Aliases to simplify matching of floating-point instructions that 2594// are VOP2 on SI and VOP3 on VI. 2595class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 2596 name#" $dst, $src0, $src1", 2597 !if(inst.Pfl.HasOMod, 2598 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 2599 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 2600>, PredicateControl { 2601 let UseInstAsmMatchConverter = 0; 2602 let AsmVariantName = AMDGPUAsmVariants.VOP3; 2603} 2604 2605def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 2606def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 2607def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 2608def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 2609def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 2610 2611defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 2612 2613} // End SubtargetPredicate = isGFX8GFX9 2614 2615let SubtargetPredicate = isGFX9Only in { 2616 2617defm : VOP2bInstAliases<V_ADD_U32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 2618defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 2619defm : VOP2bInstAliases<V_SUB_U32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 2620defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 2621defm : VOP2bInstAliases<V_SUBREV_U32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 2622defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 2623 2624} // End SubtargetPredicate = isGFX9Only 2625 2626let SubtargetPredicate = HasDLInsts in { 2627 2628defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 2629defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 2630 2631} // End SubtargetPredicate = HasDLInsts 2632 2633let DecoderNamespace = "GFX90A" in { 2634 multiclass VOP2_Real_e32_gfx90a <bits<6> op> { 2635 def _e32_gfx90a : 2636 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>, 2637 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2638 } 2639 2640 multiclass VOP2_Real_e64_gfx90a <bits<10> op> { 2641 def _e64_gfx90a : 2642 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 2643 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2644 } 2645 2646 multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> : 2647 VOP2_Real_e32_gfx90a<op>, 2648 VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>; 2649 2650 multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> : 2651 Base_VOP2_Real_e32e64_gfx90a<op> { 2652 2653 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2654 def _dpp_gfx90a : 2655 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>, 2656 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 2657 let DecoderNamespace = "GFX9"; 2658 } 2659 } 2660} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" 2661 2662let SubtargetPredicate = HasFmacF64Inst in { 2663 defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; 2664} // End SubtargetPredicate = HasFmacF64Inst 2665 2666let IsSingle = 1 in { 2667 defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; 2668} 2669 2670let SubtargetPredicate = HasFmaakFmamkF32Insts in { 2671defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>; 2672defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>; 2673} 2674 2675multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> { 2676 let SubtargetPredicate = isGFX9Only in 2677 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2678} 2679 2680multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 2681 VOP2_Real_e32_gfx10<op>, 2682 VOP2_Real_dpp_gfx10<op>, 2683 VOP2_Real_dpp8_gfx10<op>; 2684 2685multiclass VOP2Only_Real_DOT_ACC_gfx10<bits<6> op> : VOP2_Real_dpp_gfx10<op>, 2686 VOP2_Real_dpp8_gfx10<op> { 2687 let IsSingle = 1 in 2688 defm NAME : VOP2_Real_e32_gfx10<op>; 2689} 2690 2691let OtherPredicates = [HasDot5Insts] in { 2692 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 2693 // NB: Opcode conflicts with V_DOT8C_I32_I4 2694 // This opcode exists in gfx 10.1* only 2695 defm V_DOT2C_F32_F16 : VOP2Only_Real_DOT_ACC_gfx10<0x02>; 2696} 2697 2698let OtherPredicates = [HasDot6Insts] in { 2699 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 2700 defm V_DOT4C_I32_I8 : VOP2Only_Real_DOT_ACC_gfx10<0x0d>; 2701} 2702 2703let OtherPredicates = [HasDot4Insts] in { 2704 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 2705} 2706let OtherPredicates = [HasDot3Insts] in { 2707 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 2708} 2709 2710let SubtargetPredicate = HasPkFmacF16Inst in { 2711defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 2712} // End SubtargetPredicate = HasPkFmacF16Inst 2713 2714let SubtargetPredicate = HasDot3Insts in { 2715 // NB: Opcode conflicts with V_DOT2C_F32_F16 2716 let DecoderNamespace = "GFX10_B" in 2717 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>; 2718} 2719 2720let OtherPredicates = [HasDot13Insts] in { 2721 let DecoderNamespace = "GFX950" in 2722 defm V_DOT2C_F32_BF16 : VOP2_Real_DOT_ACC_gfx9<0x16>; 2723} 2724