xref: /llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td (revision 8a0c2e75678a4d1d479676217db622d1981c18d3)
1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9//===----------------------------------------------------------------------===//
10// VOP2 Classes
11//===----------------------------------------------------------------------===//
12
13class VOP2e <bits<6> op, VOPProfile P> : Enc32 {
14  bits<8> vdst;
15  bits<9> src0;
16  bits<8> src1;
17
18  let Inst{8-0}   = !if(P.HasSrc0, src0, 0);
19  let Inst{16-9}  = !if(P.HasSrc1, src1, 0);
20  let Inst{24-17} = !if(P.EmitDst, vdst, 0);
21  let Inst{30-25} = op;
22  let Inst{31}    = 0x0; //encoding
23}
24
25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 {
26  bits<8>  vdst;
27  bits<9>  src0;
28  bits<8>  src1;
29  bits<32> imm;
30
31  let Inst{8-0}   = !if(P.HasSrc0, src0, 0);
32  let Inst{16-9}  = !if(P.HasSrc1, src1, 0);
33  let Inst{24-17} = !if(P.EmitDst, vdst, 0);
34  let Inst{30-25} = op;
35  let Inst{31}    = 0x0; // encoding
36  let Inst{63-32} = imm;
37}
38
39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> {
40  bits<8> vdst;
41  bits<8> src1;
42
43  let Inst{8-0}   = 0xf9; // sdwa
44  let Inst{16-9}  = !if(P.HasSrc1, src1{7-0}, 0);
45  let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
46  let Inst{30-25} = op;
47  let Inst{31}    = 0x0; // encoding
48}
49
50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> {
51  bits<8> vdst;
52  bits<9> src1;
53
54  let Inst{8-0}   = 0xf9; // sdwa
55  let Inst{16-9}  = !if(P.HasSrc1, src1{7-0}, 0);
56  let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
57  let Inst{30-25} = op;
58  let Inst{31}    = 0x0; // encoding
59  let Inst{63}    = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr
60}
61
62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> :
63  VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> {
64
65  let AsmOperands = P.Asm32;
66
67  let Size = 4;
68  let mayLoad = 0;
69  let mayStore = 0;
70  let hasSideEffects = 0;
71
72  let ReadsModeReg = !or(P.DstVT.isFP, P.Src0VT.isFP);
73
74  let mayRaiseFPException = ReadsModeReg;
75
76  let VOP2 = 1;
77  let VALU = 1;
78  let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
79
80  let AsmVariantName = AMDGPUAsmVariants.Default;
81}
82
83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic> :
84  VOP_Real <ps>,
85  InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>,
86  SIMCInstr <ps.PseudoInstr, EncodingFamily> {
87
88  let VALU = 1;
89  let VOP2 = 1;
90  let isPseudo = 0;
91  let isCodeGenOnly = 0;
92
93  let Constraints     = ps.Constraints;
94  let DisableEncoding = ps.DisableEncoding;
95
96  // copy relevant pseudo op flags
97  let SubtargetPredicate = ps.SubtargetPredicate;
98  let True16Predicate    = ps.True16Predicate;
99  let OtherPredicates    = ps.OtherPredicates;
100  let AsmMatchConverter  = ps.AsmMatchConverter;
101  let AsmVariantName     = ps.AsmVariantName;
102  let Constraints        = ps.Constraints;
103  let DisableEncoding    = ps.DisableEncoding;
104  let TSFlags            = ps.TSFlags;
105  let UseNamedOperandTable = ps.UseNamedOperandTable;
106  let Uses                 = ps.Uses;
107  let Defs                 = ps.Defs;
108  let SchedRW              = ps.SchedRW;
109  let mayLoad              = ps.mayLoad;
110  let mayStore             = ps.mayStore;
111  let isConvergent         = ps.isConvergent;
112}
113
114class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> :
115  VOP2_Real <ps, Gen.Subtarget, real_name> {
116  let AssemblerPredicate = Gen.AssemblerPredicate;
117  let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
118  let DecoderNamespace = Gen.DecoderNamespace#
119                         !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
120}
121
122class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
123  VOP_SDWA_Pseudo <OpName, P, pattern> {
124  let AsmMatchConverter = "cvtSdwaVOP2";
125}
126
127class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
128  VOP_DPP_Pseudo <OpName, P, pattern> {
129}
130
131
132class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
133  list<dag> ret = !if(P.HasModifiers,
134    [(set P.DstVT:$vdst,
135      (node (P.Src0VT
136              !if(P.HasOMod,
137                  (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
138                  (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))),
139            (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
140    [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
141}
142
143multiclass VOP2Inst_e32<string opName,
144                        VOPProfile P,
145                        SDPatternOperator node = null_frag,
146                        string revOp = opName> {
147    def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
148               Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
149}
150multiclass
151    VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp,
152                      string VOPDName, SDPatternOperator node = null_frag,
153                      string revOp = opName> {
154  defm NAME : VOP2Inst_e32<opName, P, node, revOp>,
155              VOPD_Component<VOPDOp, VOPDName>;
156}
157multiclass VOP2Inst_e64<string opName,
158                        VOPProfile P,
159                        SDPatternOperator node = null_frag,
160                        string revOp = opName> {
161    def _e64 : VOP3InstBase <opName, P, node, 1>,
162               Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
163
164    let SubtargetPredicate = isGFX11Plus in {
165      if P.HasExtVOP3DPP then
166        def _e64_dpp  : VOP3_DPP_Pseudo <opName, P>;
167    } // End SubtargetPredicate = isGFX11Plus
168}
169
170multiclass VOP2Inst_sdwa<string opName,
171                         VOPProfile P,
172                         string revOp = opName> {
173    if P.HasExtSDWA then
174      def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
175                  Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
176}
177
178multiclass VOP2Inst<string opName,
179                    VOPProfile P,
180                    SDPatternOperator node = null_frag,
181                    string revOp = opName> :
182    VOP2Inst_e32<opName, P, node, revOp>,
183    VOP2Inst_e64<opName, P, node, revOp>,
184    VOP2Inst_sdwa<opName, P, revOp> {
185    if P.HasExtDPP then
186      def _dpp  : VOP2_DPP_Pseudo <opName, P>;
187}
188
189multiclass VOP2Inst_t16<string opName,
190                        VOPProfile P,
191                        SDPatternOperator node = null_frag,
192                        string revOp = opName> {
193  let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
194    defm NAME : VOP2Inst<opName, P, node, revOp>;
195  }
196  let SubtargetPredicate = UseRealTrue16Insts in {
197    defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16">;
198  }
199  let SubtargetPredicate = UseFakeTrue16Insts in {
200    defm _fake16 : VOP2Inst<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16">;
201  }
202}
203
204// Creating a _t16_e32 pseudo when there is no corresponding real instruction on
205// any subtarget is a problem. It makes getMCOpcodeGen return -1, which we
206// assume means the instruction is already a real. The fix is to not create that
207// _t16_e32 pseudo
208multiclass VOP2Inst_e64_t16<string opName,
209                        VOPProfile P,
210                        SDPatternOperator node = null_frag,
211                        string revOp = opName> {
212  let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
213    defm NAME : VOP2Inst<opName, P, node, revOp>;
214  }
215  let SubtargetPredicate = UseRealTrue16Insts in {
216    defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16">;
217  }
218  let SubtargetPredicate = UseFakeTrue16Insts in {
219    defm _fake16 : VOP2Inst_e64<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16">;
220  }
221}
222
223multiclass VOP2Inst_VOPD<string opName,
224                         VOPProfile P,
225                         bits<5> VOPDOp,
226                         string VOPDName,
227                         SDPatternOperator node = null_frag,
228                         string revOp = opName> :
229    VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp>,
230    VOP2Inst_e64<opName, P, node, revOp>,
231    VOP2Inst_sdwa<opName, P, revOp> {
232    if P.HasExtDPP then
233      def _dpp  : VOP2_DPP_Pseudo <opName, P>;
234}
235
236multiclass VOP2bInst <string opName,
237                      VOPProfile P,
238                      SDPatternOperator node = null_frag,
239                      string revOp = opName,
240                      bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
241    let SchedRW = [Write32Bit, WriteSALU] in {
242      let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
243        def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
244                   Commutable_REV<revOp#"_e32", !eq(revOp, opName)> {
245          let usesCustomInserter = true;
246        }
247
248        if P.HasExtSDWA then
249          def _sdwa  : VOP2_SDWA_Pseudo <opName, P>,
250                       Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)> {
251            let AsmMatchConverter = "cvtSdwaVOP2b";
252          }
253        if P.HasExtDPP then
254          def _dpp  : VOP2_DPP_Pseudo <opName, P>;
255      } // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC]
256
257      def _e64 : VOP3InstBase <opName, P, node, 1>,
258                 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
259
260      let SubtargetPredicate = isGFX11Plus in {
261        if P.HasExtVOP3DPP then
262          def _e64_dpp  : VOP3_DPP_Pseudo <opName, P>;
263      } // End SubtargetPredicate = isGFX11Plus
264    }
265}
266
267class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst,
268                      string OpName, string opnd> :
269  InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32),
270             (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
271                   ps.Pfl.Src1RC32:$src1),
272             1, inst.AsmVariantName>,
273  PredicateControl {
274}
275
276multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> {
277  let WaveSizePredicate = isWave32 in {
278    def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">;
279  }
280  let WaveSizePredicate = isWave64 in {
281    def : VOP2bInstAlias<ps, inst, OpName, "vcc">;
282  }
283}
284
285multiclass
286    VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
287                   SDPatternOperator node, string revOp, bit useSGPRInput> {
288
289  let SchedRW = [Write32Bit] in {
290    let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
291      if !empty(VOPDName) then
292        def _e32 : VOP2_Pseudo <opName, P>,
293                   Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
294      else
295        def _e32 : VOP2_Pseudo <opName, P>,
296                   Commutable_REV<revOp#"_e32", !eq(revOp, opName)>,
297                   VOPD_Component<VOPDOp, VOPDName>;
298
299      if P.HasExtSDWA then
300        def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
301          let AsmMatchConverter = "cvtSdwaVOP2e";
302        }
303
304      if P.HasExtDPP then
305        def _dpp  : VOP2_DPP_Pseudo <opName, P>;
306    }
307
308    def _e64 : VOP3InstBase <opName, P, node, 1>,
309               Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
310      let isReMaterializable = 1;
311    }
312
313    let SubtargetPredicate = isGFX11Plus in {
314      if P.HasExtVOP3DPP then
315        def _e64_dpp  : VOP3_DPP_Pseudo <opName, P>;
316    } // End SubtargetPredicate = isGFX11Plus
317  }
318}
319
320multiclass
321    VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag,
322              string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
323    : VOP2eInst_Base<opName, P, 0, "", node, revOp, useSGPRInput>;
324
325multiclass
326    VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName,
327                   SDPatternOperator node = null_frag, string revOp = opName,
328                   bit useSGPRInput = !eq(P.NumSrcArgs, 3)>
329    : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>;
330
331class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> :
332  InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd,
333             (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
334                   ps.Pfl.Src1RC32:$src1),
335             1, inst.AsmVariantName>,
336  PredicateControl;
337
338class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> :
339  InstAlias <ps.OpName#" "#ps.Pfl.Asm64,
340             (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst,
341                   ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, Clamp:$clamp),
342             1, inst.AsmVariantName>,
343  PredicateControl;
344
345multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
346  let WaveSizePredicate = isWave32 in {
347    def : VOP2eInstAlias<ps, inst, "vcc_lo">;
348  }
349  let WaveSizePredicate = isWave64 in {
350    def : VOP2eInstAlias<ps, inst, "vcc">;
351  }
352}
353
354class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
355  string AsmVOPDXDeferred = ?;
356}
357
358class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
359  field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16);
360  field dag Ins32 = !if(!eq(vt.Size, 32),
361                        (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
362                        (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
363  field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm);
364  // Note that both src0X and imm are deferred
365  let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred);
366  field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm);
367
368  field string Asm32 = "$vdst, $src0, $src1, $imm";
369  field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm";
370  let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred";
371  field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm";
372  field bit HasExt = 0;
373  let IsSingle = 1;
374}
375
376def VOP_MADAK_F16 : VOP_MADAK <f16>;
377def VOP_MADAK_F16_t16 : VOP_MADAK <f16> {
378  let IsTrue16 = 1;
379  let IsRealTrue16 = 1;
380  let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
381  let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPRSrc_16_Lo128:$src1, ImmOpType:$imm);
382}
383def VOP_MADAK_F16_fake16 : VOP_MADAK <f16> {
384  let IsTrue16 = 1;
385  let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
386  let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPRSrc_32_Lo128:$src1, ImmOpType:$imm);
387}
388def VOP_MADAK_F32 : VOP_MADAK <f32>;
389
390class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
391  field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16);
392  field dag Ins32 = !if(!eq(vt.Size, 32),
393                        (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1),
394                        (ins VSrc_f16_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1));
395  field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X);
396  let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X);
397  field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y);
398
399  field string Asm32 = "$vdst, $src0, $imm, $src1";
400  field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X";
401  let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X";
402  field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y";
403  field bit HasExt = 0;
404  let IsSingle = 1;
405}
406
407def VOP_MADMK_F16 : VOP_MADMK <f16>;
408def VOP_MADMK_F16_t16 : VOP_MADMK <f16> {
409  let IsTrue16 = 1;
410  let IsRealTrue16 = 1;
411  let DstRC = getVALUDstForVT<DstVT, 1/*IsTrue16*/, 0/*IsVOP3Encoding*/>.ret;
412  let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_16_Lo128:$src1);
413}
414def VOP_MADMK_F16_fake16 : VOP_MADMK <f16> {
415  let IsTrue16 = 1;
416  let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
417  let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPRSrc_32_Lo128:$src1);
418}
419def VOP_MADMK_F32 : VOP_MADMK <f32>;
420
421// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory
422// and processing time but it makes it easier to convert to mad.
423class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> {
424  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2);
425  // Src2 must accept the same operand types as vdst, namely VGPRs only
426  let Src2RC64 = getVOP3VRegSrcForVT<Src2VT, IsTrue16, !not(IsRealTrue16)>.ret;
427  let Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, 3,
428                       0, HasModifiers, HasModifiers, HasOMod,
429                       Src0Mod, Src1Mod, Src2Mod>.ret;
430  let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
431                    Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
432                    getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
433                    dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
434                    DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
435  let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
436  let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3,
437                       0, HasModifiers, HasModifiers, HasOMod,
438                       Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret;
439  // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
440  let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
441  let InsVOPDXDeferred =
442    (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X,
443         VGPR_32:$vsrc1X, VGPRSrc_32:$src2X);
444  let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y);
445  let InsVOPDYDeferred =
446    (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y,
447         VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y);
448
449  let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
450                     Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
451                     getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
452                     dpp8:$dpp8, Dpp8FI:$fi);
453  let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
454                     Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
455                     getVregSrcForVT<Src2VT>.ret:$src2, // stub argument
456                     Clamp:$clamp, omod:$omod,
457                     dst_sel:$dst_sel, dst_unused:$dst_unused,
458                     src0_sel:$src0_sel, src1_sel:$src1_sel);
459  let Asm32 = getAsm32<1, 2, vt0>.ret;
460  let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret;
461  let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret;
462  let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret;
463  let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret;
464  let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret;
465  let AsmVOP3Base =
466      getAsmVOP3Base<2 /*NumSrcArgs*/, HasDst, HasClamp,
467                        HasOpSel, HasOMod, IsVOP3P, HasModifiers,
468                        HasModifiers, HasModifiers,
469                        0 /*Src2HasMods*/, DstVT>.ret;
470  let HasSrc2 = 0;
471  let HasSrc2Mods = 0;
472
473  let HasExt = 1;
474  let HasExtDPP = 1;
475  let HasExt32BitDPP = 1;
476  let HasExtSDWA = 1;
477  let HasExtSDWA9 = 0;
478  let TieRegDPP = "$src2";
479}
480
481def VOP_MAC_F16 : VOP_MAC <f16>;
482def VOP_MAC_F16_t16 : VOP_MAC <f16> {
483  let IsTrue16 = 1;
484  let IsRealTrue16 = 1;
485  let HasOpSel = 1;
486  let DstRC = VOPDstOperand_t16Lo128;
487  let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
488  let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
489  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2);
490  let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
491  let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
492  let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
493  let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0/*IsFake16*/>.ret;
494  let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0/*IsFake16*/>.ret;
495  let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0/*IsFake16*/>.ret;
496  let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
497                    Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
498                    getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument
499                    dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
500                    DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
501  let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
502                     Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
503                     getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret:$src2, // stub argument
504                     dpp8:$dpp8, Dpp8FI:$fi);
505  let DstRC64 = getVALUDstForVT<DstVT, 1/*IsTrue*/, 1/*IsVOP3Encoding*/>.ret;
506  let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
507  let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
508  let Src0VOP3DPP = VGPRSrc_16;
509  let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
510  let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0/*IsFake16*/>.ret;
511  let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
512  let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0/*IsFake16*/>.ret;
513  let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0/*IsFake16*/>.ret;
514  let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
515  let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
516  let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
517}
518def VOP_MAC_F16_fake16 : VOP_MAC <f16> {
519  let IsTrue16 = 1;
520  let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
521  let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
522  let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
523  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2);
524  let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
525  let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
526  let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
527  let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
528  let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
529  let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
530  let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
531                    Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
532                    getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument
533                    dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
534                    DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
535  let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
536                     Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
537                     getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument
538                     dpp8:$dpp8, Dpp8FI:$fi);
539  let DstRC64 = getVALUDstForVT<DstVT>.ret;
540  let Src0VOP3DPP = VGPRSrc_32;
541  let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
542  let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1/*IsFake16*/>.ret;
543  let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
544  let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
545  let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
546  let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
547  let Src1Mod = getSrcMod<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
548  let Src2Mod = getSrcMod<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
549}
550
551def VOP_MAC_F32 : VOP_MAC <f32>;
552let HasExtDPP = 0, HasExt32BitDPP = 0 in
553def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>;
554let HasExtSDWA = 0, HasExt32BitDPP = 0, HasExt64BitDPP = 1 in
555def VOP_MAC_F64 : VOP_MAC <f64>;
556
557class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
558  let HasClamp = 0;
559  let HasExtSDWA = 0;
560  let HasOpSel = 0;
561  let IsPacked = 0;
562}
563
564def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> {
565  let Src0ModDPP = FPVRegInputMods;
566  let Src1ModDPP = FPVRegInputMods;
567  let HasClamp = 1;
568}
569
570def VOP_DOT_ACC_F32_V2BF16 : VOP_DOT_ACC<f32, v2bf16> {
571  let Src0ModDPP = FPVRegInputMods;
572  let Src1ModDPP = FPVRegInputMods;
573  let HasClamp = 1;
574}
575
576def VOP_DOT_ACC_I32_I32   : VOP_DOT_ACC<i32, i32> {
577  let HasExtVOP3DPP = 0;
578  let HasSrc0Mods = 1;
579  let HasSrc1Mods = 1;
580  let HasClamp = 1;
581
582  let Src0Mod = Int32InputMods;
583  let Src1Mod = Int32InputMods;
584  let Ins64 = getIns64<Src0RC64, Src1RC64, getVregSrcForVT<Src2VT>.ret,
585                       3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/,
586                       1 /*HasSrc2Mods*/, HasOMod,
587                       Src0Mod, Src1Mod, Src2Mod>.ret;
588  let Asm64 = "$vdst, $src0, $src1$clamp";
589}
590
591// Write out to vcc or arbitrary SGPR.
592def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/1> {
593  let Asm32 = "$vdst, vcc, $src0, $src1";
594  let AsmVOP3Base = "$vdst, $sdst, $src0, $src1$clamp";
595  let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
596  let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
597  let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
598  let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi";
599  let AsmDPP16 = AsmDPP#"$fi";
600  let InsDPP = (ins DstRCDPP:$old,
601                    Src0DPP:$src0,
602                    Src1DPP:$src1,
603                    dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
604                    DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
605  let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
606  let InsDPP8 = (ins DstRCDPP:$old,
607                    Src0DPP:$src0,
608                    Src1DPP:$src1,
609                    dpp8:$dpp8, Dpp8FI:$fi);
610  let Outs32 = (outs DstRC:$vdst);
611  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
612  let OutsVOP3DPP = Outs64;
613  let OutsVOP3DPP8 = Outs64;
614}
615
616// Write out to vcc or arbitrary SGPR and read in from vcc or
617// arbitrary SGPR.
618def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1> {
619  let HasSrc2Mods = 0;
620  let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
621  let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
622  let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
623  let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
624  let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi";
625  let AsmDPP16 = AsmDPP#"$fi";
626  let Outs32 = (outs DstRC:$vdst);
627  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
628  let AsmVOP3Base = "$vdst, $sdst, $src0, $src1, $src2$clamp";
629  let OutsVOP3DPP = Outs64;
630  let OutsVOP3DPP8 = Outs64;
631
632  // Suppress src2 implied by type since the 32-bit encoding uses an
633  // implicit VCC use.
634  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
635
636  let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
637                     Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1,
638                     Clamp:$clamp,
639                     dst_sel:$dst_sel, dst_unused:$dst_unused,
640                     src0_sel:$src0_sel, src1_sel:$src1_sel);
641
642  let InsDPP = (ins DstRCDPP:$old,
643                    Src0DPP:$src0,
644                    Src1DPP:$src1,
645                    dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
646                    DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
647  let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
648  let InsDPP8 = (ins DstRCDPP:$old,
649                     Src0DPP:$src0,
650                     Src1DPP:$src1,
651                     dpp8:$dpp8, Dpp8FI:$fi);
652
653  let HasExt = 1;
654  let HasExtDPP = 1;
655  let HasExt32BitDPP = 1;
656  let HasExtSDWA = 1;
657  let HasExtSDWA9 = 1;
658}
659
660// Read in from vcc or arbitrary SGPR.
661class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> {
662  let Asm32 = "$vdst, $src0, $src1";
663  let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
664  let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
665  let AsmDPP = "$vdst, $src0_modifiers, $src1_modifiers, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
666  let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi";
667  let AsmDPP16 = AsmDPP#"$fi";
668  let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2";
669
670  let Outs32 = (outs DstRC:$vdst);
671  let Outs64 = (outs DstRC64:$vdst);
672
673  // Suppress src2 implied by type since the 32-bit encoding uses an
674  // implicit VCC use.
675  let Ins32 = (ins VSrc_f32:$src0, Src1RC32:$src1);
676
677  let HasModifiers = 1;
678
679  // Select FP modifiers for VOP3
680  let Src0Mod = !if(!eq(Src0VT.Size, 16), FP16InputMods, FP32InputMods);
681  let Src1Mod = Src0Mod;
682
683  let HasSrc0IntMods = 0;
684  let HasSrc1IntMods = 0;
685  let HasSrc0FloatMods = 1;
686  let HasSrc1FloatMods = 1;
687  let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, SDWASrc_f32:$src0,
688                     FP32SDWAInputMods:$src1_modifiers, SDWASrc_f32:$src1,
689                     Clamp:$clamp,
690                     dst_sel:$dst_sel, dst_unused:$dst_unused,
691                     src0_sel:$src0_sel, src1_sel:$src1_sel);
692
693  let InsDPP = (ins DstRCDPP:$old,
694                    FPVRegInputMods:$src0_modifiers, Src0DPP:$src0,
695                    FPVRegInputMods:$src1_modifiers, Src1DPP:$src1,
696                    dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
697                    DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
698  let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi));
699  let InsDPP8 = (ins DstRCDPP:$old,
700                     FPVRegInputMods:$src0_modifiers, Src0DPP:$src0,
701                     FPVRegInputMods:$src1_modifiers, Src1DPP:$src1,
702                     dpp8:$dpp8, Dpp8FI:$fi);
703
704  let Src0ModVOP3DPP = FPVRegInputMods;
705  let Src1ModVOP3DPP = FP32VCSrcInputMods;
706
707  let HasExt = 1;
708  let HasExtDPP = 1;
709  let HasExt32BitDPP = 1;
710  let HasExtSDWA = 1;
711  let HasExtSDWA9 = 1;
712}
713
714def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>;
715def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>;
716// V_CNDMASK_B16 is VOP3 only
717def VOP2e_I16_I16_I16_I1_true16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
718  let IsTrue16 = 1;
719  let IsRealTrue16 = 1;
720  let HasOpSel = 1;
721  let DstRC64 = getVALUDstForVT<DstVT, 1, 1>.ret;
722  let Src0RC64 = getVOP3SrcForVT<Src0VT, 1/*IsTrue16*/>.ret;
723  let Src1RC64 = getVOP3SrcForVT<Src1VT, 1/*IsTrue16*/>.ret;
724  let Src2RC64 = getVOP3SrcForVT<Src2VT, 1/*IsTrue16*/>.ret;
725  let Src0Mod = getSrc0Mod<f16, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
726  let Src1Mod = getSrcMod<f16, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
727  let HasSrc2Mods = 0;
728  let InsVOP3OpSel = getInsVOP3Base<Src0RC64, Src1RC64,
729                    Src2RC64, NumSrcArgs,
730                    HasClamp, 1/*HasModifiers*/, 0/*HasSrc2Mods*/, HasOMod,
731                    Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/>.ret;
732  let Src0VOP3DPP = VGPRSrc_16;
733  let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0/*IsFake16*/>.ret;
734  let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 0/*IsFake16*/>.ret;
735  let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 0/*IsFake16*/>.ret;
736}
737def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> {
738  let IsTrue16 = 1;
739  let DstRC64 = getVALUDstForVT<DstVT>.ret;
740
741  let Src0Mod = getSrc0Mod<f16, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
742  let Src1Mod = getSrcMod<f16, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
743
744  let Src0VOP3DPP = VGPRSrc_32;
745  let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1/*IsFake16*/>.ret;
746  let Src0ModVOP3DPP = getSrc0ModVOP3DPP<f16, DstVT, 1/*IsFake16*/>.ret;
747  let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 1/*IsFake16*/>.ret;
748}
749
750def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> {
751  let Outs32 = (outs SReg_32:$vdst);
752  let Outs64 = Outs32;
753  let Ins32 = (ins VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1);
754  let Ins64 = Ins32;
755  let Asm32 = " $vdst, $src0, $src1";
756  let Asm64 = Asm32;
757
758  let HasExt = 0;
759  let HasExtDPP = 0;
760  let HasExt32BitDPP = 0;
761  let HasExt64BitDPP = 0;
762  let HasExtSDWA = 0;
763  let HasExtSDWA9 = 0;
764}
765
766def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> {
767  let Outs32 = (outs VGPR_32:$vdst);
768  let Outs64 = Outs32;
769  let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in);
770  let Ins64 = Ins32;
771  let Asm32 = " $vdst, $src0, $src1";
772  let Asm64 = Asm32;
773  let HasSrc2 = 0;
774  let HasSrc2Mods = 0;
775
776  let HasExt = 0;
777  let HasExtDPP = 0;
778  let HasExt32BitDPP = 0;
779  let HasExt64BitDPP = 0;
780  let HasExtSDWA = 0;
781  let HasExtSDWA9 = 0;
782}
783
784//===----------------------------------------------------------------------===//
785// VOP2 Instructions
786//===----------------------------------------------------------------------===//
787
788let SubtargetPredicate = isGFX11Plus, True16Predicate = UseRealTrue16Insts in
789defm V_CNDMASK_B16_t16 : VOP2eInst <"v_cndmask_b16_t16", VOP2e_I16_I16_I16_I1_true16>;
790let SubtargetPredicate = isGFX11Plus, True16Predicate = UseFakeTrue16Insts in
791defm V_CNDMASK_B16_fake16 : VOP2eInst <"v_cndmask_b16_fake16", VOP2e_I16_I16_I16_I1_fake16>;
792defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">;
793let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in
794def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>;
795
796let isCommutable = 1 in {
797let isReMaterializable = 1 in {
798defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>;
799defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>;
800defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">;
801defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>;
802defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>;
803defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
804defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
805defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
806defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
807defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>;
808defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>;
809defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
810defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
811defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
812defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
813defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">;
814defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">;
815defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">;
816defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>;
817defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
818defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
819} // End isReMaterializable = 1
820
821let mayRaiseFPException = 0 in {
822let OtherPredicates = [HasMadMacF32Insts] in {
823let Constraints = "$vdst = $src2", DisableEncoding="$src2",
824    isConvertibleToThreeAddress = 1 in {
825defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
826
827let SubtargetPredicate = isGFX6GFX7GFX10 in
828defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>;
829} // End Constraints = "$vdst = $src2", DisableEncoding="$src2",
830  //     isConvertibleToThreeAddress = 1
831
832let isReMaterializable = 1 in
833def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
834} // End OtherPredicates = [HasMadMacF32Insts]
835} // End mayRaiseFPException = 0
836
837// No patterns so that the scalar instructions are always selected.
838// The scalar versions will be replaced with vector when needed later.
839defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32">;
840defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32">;
841defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
842defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
843
844
845let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in {
846  defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">;
847  defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32">;
848}
849
850let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1, isAdd = 1 in {
851  defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32">;
852}
853
854let isAdd = 1 in {
855  defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32">;
856  defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32">;
857}
858
859} // End isCommutable = 1
860
861// These are special and do not read the exec mask.
862let isConvergent = 1, Uses = []<Register> in {
863def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, []>;
864let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
865def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, []>;
866} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in
867} // End isConvergent = 1
868
869foreach vt = Reg32Types.types in {
870  def : GCNPat<(vt (int_amdgcn_readlane vt:$src0, i32:$src1)),
871        (V_READLANE_B32 VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1)
872  >;
873
874  def : GCNPat<(vt (int_amdgcn_writelane vt:$src0, i32:$src1, vt:$src2)),
875        (V_WRITELANE_B32 SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$src2)
876  >;
877}
878
879let isReMaterializable = 1 in {
880defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>;
881defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>;
882let IsNeverUniform = 1 in {
883defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>;
884defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>;
885} // End IsNeverUniform = 1
886defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;
887
888let ReadsModeReg = 0, mayRaiseFPException = 0 in {
889defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
890defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>;
891}
892
893defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_V2F16_F32_F32, AMDGPUpkrtz_f16_f32>;
894defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_V2I16_I32_I32, AMDGPUpk_u16_u32>;
895defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_V2I16_I32_I32, AMDGPUpk_i16_i32>;
896
897
898let SubtargetPredicate = isGFX6GFX7 in {
899defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
900defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
901} // End SubtargetPredicate = isGFX6GFX7
902
903let isCommutable = 1 in {
904let SubtargetPredicate = isGFX6GFX7 in {
905defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>;
906defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>;
907defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>;
908} // End SubtargetPredicate = isGFX6GFX7
909} // End isCommutable = 1
910} // End isReMaterializable = 1
911
912defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"
913
914class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
915  GCNPat<
916      (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1),
917      !if(!cast<Commutable_REV>(Inst).IsOrig,
918        (Inst $src0, $src1),
919        (Inst $src1, $src0)
920      )
921  >;
922
923class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
924  GCNPat<
925      (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1),
926      !if(!cast<Commutable_REV>(Inst).IsOrig,
927        (Inst $src0, $src1, 0),
928        (Inst $src1, $src0, 0)
929      )
930  >;
931
932def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>;
933def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>;
934def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>;
935
936let SubtargetPredicate = HasAddNoCarryInsts in {
937  def : DivergentClampingBinOp<add, V_ADD_U32_e64>;
938  def : DivergentClampingBinOp<sub, V_SUB_U32_e64>;
939}
940
941let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in {
942def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>;
943def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>;
944}
945
946def : DivergentBinOp<adde, V_ADDC_U32_e32>;
947def : DivergentBinOp<sube, V_SUBB_U32_e32>;
948
949class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> :
950  GCNPat<
951      (DivergentBinFrag<Op> i64:$src0, i64:$src1),
952      (REG_SEQUENCE VReg_64,
953        (Inst
954          (i32 (EXTRACT_SUBREG $src0, sub0)),
955          (i32 (EXTRACT_SUBREG $src1, sub0))
956        ), sub0,
957        (Inst
958          (i32 (EXTRACT_SUBREG $src0, sub1)),
959          (i32 (EXTRACT_SUBREG $src1, sub1))
960        ), sub1
961      )
962  >;
963
964def :  divergent_i64_BinOp <and, V_AND_B32_e64>;
965def :  divergent_i64_BinOp <or,  V_OR_B32_e64>;
966def :  divergent_i64_BinOp <xor, V_XOR_B32_e64>;
967
968// mul24 w/ 64 bit output.
969class mul24_64_Pat<SDPatternOperator Op, Instruction InstLo, Instruction InstHi> : GCNPat<
970  (i64 (Op i32:$src0, i32:$src1)),
971  (REG_SEQUENCE VReg_64,
972    (InstLo $src0, $src1), sub0,
973    (InstHi $src0, $src1), sub1)
974>;
975
976def : mul24_64_Pat<AMDGPUmul_i24, V_MUL_I32_I24_e64, V_MUL_HI_I32_I24_e64>;
977def : mul24_64_Pat<AMDGPUmul_u24, V_MUL_U32_U24_e64, V_MUL_HI_U32_U24_e64>;
978
979//===----------------------------------------------------------------------===//
980// 16-Bit Operand Instructions
981//===----------------------------------------------------------------------===//
982
983// The ldexp.f16 intrinsic expects a integer src1 operand, though the hardware
984// encoding treats src1 as an f16
985def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
986  let Src1Mod = Int32InputMods;
987  let Src1ModDPP = IntVRegInputMods;
988  let Src1ModVOP3DPP = IntVRegInputMods;
989  // SDWA sext is the only modifier allowed.
990  let HasSrc1IntMods = 1;
991  let HasSrc1FloatMods = 0;
992  let Src1ModSDWA = Int16SDWAInputMods;
993}
994def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
995  let Src1Mod = IntT16InputMods<0/*IsFake16*/>;
996  let Src1ModDPP = IntT16_Lo128VRegInputMods<0/*IsFake16*/>;
997  let Src1ModVOP3DPP = IntT16VCSrcInputMods<0/*IsFake16*/>;
998}
999def LDEXP_F16_VOPProfile_Fake16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
1000  let Src1Mod = Int32InputMods;
1001  let Src1ModDPP = IntT16_Lo128VRegInputMods<1/*IsFake16*/>;
1002  let Src1ModVOP3DPP = IntT16VCSrcInputMods<1/*IsFake16*/>;
1003}
1004
1005let isReMaterializable = 1 in {
1006let FPDPRounding = 1 in {
1007  let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in
1008    defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>;
1009  let SubtargetPredicate = UseRealTrue16Insts in
1010    defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>;
1011  let SubtargetPredicate = UseFakeTrue16Insts in
1012    defm V_LDEXP_F16_fake16 : VOP2Inst <"v_ldexp_f16_fake16", LDEXP_F16_VOPProfile_Fake16, null_frag, "v_ldexp_f16_fake16">;
1013} // End FPDPRounding = 1
1014defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
1015defm V_LSHRREV_B16 : VOP2Inst_e64_t16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>;
1016defm V_ASHRREV_I16 : VOP2Inst_e64_t16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>;
1017let isCommutable = 1 in {
1018let FPDPRounding = 1 in {
1019defm V_ADD_F16 : VOP2Inst_t16 <"v_add_f16", VOP_F16_F16_F16, any_fadd>;
1020defm V_SUB_F16 : VOP2Inst_t16 <"v_sub_f16", VOP_F16_F16_F16, any_fsub>;
1021defm V_SUBREV_F16 : VOP2Inst_t16 <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
1022defm V_MUL_F16 : VOP2Inst_t16 <"v_mul_f16", VOP_F16_F16_F16, any_fmul>;
1023} // End FPDPRounding = 1
1024defm V_MUL_LO_U16 : VOP2Inst_e64_t16 <"v_mul_lo_u16", VOP_I16_I16_I16, mul>;
1025defm V_MAX_F16 : VOP2Inst_t16 <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>;
1026defm V_MIN_F16 : VOP2Inst_t16 <"v_min_f16", VOP_F16_F16_F16, fminnum_like>;
1027defm V_MAX_U16 : VOP2Inst_e64_t16 <"v_max_u16", VOP_I16_I16_I16, umax>;
1028defm V_MAX_I16 : VOP2Inst_e64_t16 <"v_max_i16", VOP_I16_I16_I16, smax>;
1029defm V_MIN_U16 : VOP2Inst_e64_t16 <"v_min_u16", VOP_I16_I16_I16, umin>;
1030defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>;
1031} // End isCommutable = 1
1032} // End isReMaterializable = 1
1033
1034class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat <
1035  (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
1036               (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))),
1037  (inst $src0_modifiers, $src0,
1038        $src1_modifiers, $src1,
1039        $clamp, /* clamp */
1040        $omod /* omod */)
1041>;
1042
1043let OtherPredicates = [NotHasTrue16BitInsts] in
1044def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>;
1045
1046class LDEXP_F16_t16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat <
1047  (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
1048               (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))),
1049  (inst $src0_modifiers, $src0,
1050        $src1_modifiers, $src1,
1051        $clamp, /* clamp */
1052        $omod, /* omod */
1053        0) /* op_sel */
1054>;
1055
1056let OtherPredicates = [UseRealTrue16Insts] in
1057def : LDEXP_F16_t16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
1058
1059let OtherPredicates = [UseFakeTrue16Insts] in
1060def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_fake16_e64>;
1061
1062let SubtargetPredicate = isGFX11Plus in {
1063  let isCommutable = 1 in {
1064    defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
1065    defm V_AND_B16_fake16 : VOP2Inst_e64 <"v_and_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
1066    defm V_OR_B16_t16  : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>;
1067    defm V_OR_B16_fake16  : VOP2Inst_e64 <"v_or_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
1068    defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>;
1069    defm V_XOR_B16_fake16 : VOP2Inst_e64 <"v_xor_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
1070  } // End isCommutable = 1
1071} // End SubtargetPredicate = isGFX11Plus
1072
1073let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in {
1074let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in {
1075  def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">;
1076}
1077let True16Predicate = UseRealTrue16Insts in {
1078  def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">;
1079}
1080let True16Predicate = UseFakeTrue16Insts in {
1081  def V_FMAMK_F16_fake16 : VOP2_Pseudo <"v_fmamk_f16_fake16", VOP_MADMK_F16_fake16, [], "">;
1082}
1083
1084let isCommutable = 1 in {
1085let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in {
1086  def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">;
1087}
1088let True16Predicate = UseRealTrue16Insts in {
1089  def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">;
1090}
1091let True16Predicate = UseFakeTrue16Insts in {
1092  def V_FMAAK_F16_fake16 : VOP2_Pseudo <"v_fmaak_f16_fake16", VOP_MADAK_F16_fake16, [], "">;
1093}
1094} // End isCommutable = 1
1095} // End FPDPRounding  = 1, isReMaterializable = 1, FixedSize = 1
1096
1097let Constraints = "$vdst = $src2",
1098    DisableEncoding="$src2",
1099    isConvertibleToThreeAddress = 1,
1100    isCommutable = 1 in {
1101let SubtargetPredicate = isGFX10Plus in {
1102let True16Predicate = NotHasTrue16BitInsts in {
1103  defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>;
1104}
1105let True16Predicate = UseRealTrue16Insts in {
1106  defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>;
1107}
1108let True16Predicate = UseFakeTrue16Insts in {
1109  defm V_FMAC_F16_fake16 : VOP2Inst <"v_fmac_f16_fake16", VOP_MAC_F16_fake16>;
1110}
1111} // End SubtargetPredicate = isGFX10Plus
1112} // End FMAC Constraints
1113
1114let SubtargetPredicate = Has16BitInsts in {
1115let isReMaterializable = 1 in {
1116let FPDPRounding = 1 in {
1117  def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
1118} // End FPDPRounding = 1
1119let isCommutable = 1 in {
1120let mayRaiseFPException = 0 in {
1121  def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">;
1122}
1123let SubtargetPredicate = isGFX8GFX9 in {
1124  defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>;
1125  defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>;
1126  defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">;
1127}
1128} // End isCommutable = 1
1129} // End isReMaterializable = 1
1130
1131// FIXME: Missing FPDPRounding
1132let Constraints = "$vdst = $src2", DisableEncoding="$src2",
1133    isConvertibleToThreeAddress = 1, isCommutable = 1 in {
1134defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>;
1135}
1136} // End SubtargetPredicate = Has16BitInsts
1137
1138
1139let SubtargetPredicate = HasDLInsts in {
1140
1141let isReMaterializable = 1 in
1142defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>;
1143
1144def : GCNPat<
1145  (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))),
1146  (i32 (V_XNOR_B32_e64 $src0, $src1))
1147>;
1148
1149def : GCNPat<
1150  (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)),
1151  (i32 (V_XNOR_B32_e64 $src0, $src1))
1152>;
1153
1154def : GCNPat<
1155  (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))),
1156  (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64
1157                            (i32 (EXTRACT_SUBREG $src0, sub0)),
1158                            (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0,
1159                     (i32 (V_XNOR_B32_e64
1160                            (i32 (EXTRACT_SUBREG $src0, sub1)),
1161                            (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1)
1162>;
1163
1164def : GCNPat<
1165  (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)),
1166  (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64
1167                            (i32 (EXTRACT_SUBREG $src0, sub0)),
1168                            (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0,
1169                     (i32 (V_XNOR_B32_e64
1170                            (i32 (EXTRACT_SUBREG $src0, sub1)),
1171                            (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1)
1172>;
1173
1174let Constraints = "$vdst = $src2",
1175    DisableEncoding = "$src2",
1176    isConvertibleToThreeAddress = 1,
1177    isCommutable = 1 in
1178defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">;
1179} // End SubtargetPredicate = HasDLInsts
1180
1181let SubtargetPredicate = HasFmaLegacy32 in {
1182
1183let Constraints = "$vdst = $src2",
1184    DisableEncoding = "$src2",
1185    isConvertibleToThreeAddress = 1,
1186    isCommutable = 1 in
1187defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>;
1188
1189} // End SubtargetPredicate = HasFmaLegacy32
1190
1191let SubtargetPredicate = HasFmacF64Inst,
1192    Constraints = "$vdst = $src2",
1193    DisableEncoding="$src2",
1194    isConvertibleToThreeAddress = 1,
1195    isCommutable = 1,
1196    SchedRW = [WriteDoubleAdd] in
1197defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>;
1198
1199let Constraints = "$vdst = $src2",
1200      DisableEncoding="$src2",
1201      isConvertibleToThreeAddress = 1,
1202      isCommutable = 1,
1203      IsDOT = 1 in {
1204  let SubtargetPredicate = HasDot5Insts in
1205    defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">;
1206  let SubtargetPredicate = HasDot6Insts in
1207    defm V_DOT4C_I32_I8  : VOP2Inst<"v_dot4c_i32_i8",  VOP_DOT_ACC_I32_I32>;
1208
1209  let SubtargetPredicate = HasDot4Insts in
1210    defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>;
1211  let SubtargetPredicate = HasDot3Insts in
1212    defm V_DOT8C_I32_I4  : VOP2Inst<"v_dot8c_i32_i4",  VOP_DOT_ACC_I32_I32>;
1213
1214  let SubtargetPredicate = HasDot13Insts in
1215    defm V_DOT2C_F32_BF16 : VOP2Inst_VOPD<"v_dot2c_f32_bf16", VOP_DOT_ACC_F32_V2BF16, 0xd, "v_dot2acc_f32_bf16">;
1216}
1217
1218let AddedComplexity = 30 in {
1219  def : GCNPat<
1220    (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))),
1221    (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2))
1222  > {
1223    let SubtargetPredicate = HasDot5Insts;
1224  }
1225  def : GCNPat<
1226    (f32 (int_amdgcn_fdot2_f32_bf16 v2bf16:$src0, v2bf16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))),
1227    (f32 (V_DOT2C_F32_BF16_e32 $src0, $src1, $src2))
1228  > {
1229    let SubtargetPredicate = HasDot13Insts;
1230  }
1231  def : GCNPat<
1232    (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
1233    (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2))
1234  > {
1235    let SubtargetPredicate = HasDot6Insts;
1236  }
1237  def : GCNPat<
1238    (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
1239    (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2))
1240  > {
1241    let SubtargetPredicate = HasDot4Insts;
1242  }
1243  def : GCNPat<
1244    (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))),
1245    (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2))
1246  > {
1247    let SubtargetPredicate = HasDot3Insts;
1248  }
1249} // End AddedComplexity = 30
1250
1251let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 in {
1252def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">;
1253
1254let isCommutable = 1 in
1255def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">;
1256} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1
1257
1258let SubtargetPredicate = HasPkFmacF16Inst in {
1259defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>;
1260} // End SubtargetPredicate = HasPkFmacF16Inst
1261
1262// Note: 16-bit instructions produce a 0 result in the high 16-bits
1263// on GFX8 and GFX9 and preserve high 16 bits on GFX10+
1264multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> {
1265
1266def : GCNPat<
1267  (i32 (zext (op i16:$src0, i16:$src1))),
1268  (inst VSrc_b16:$src0, VSrc_b16:$src1)
1269>;
1270
1271def : GCNPat<
1272  (i64 (zext (op i16:$src0, i16:$src1))),
1273   (REG_SEQUENCE VReg_64,
1274     (inst $src0, $src1), sub0,
1275     (V_MOV_B32_e32 (i32 0)), sub1)
1276>;
1277}
1278
1279class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
1280  (i16 (ext i1:$src)),
1281  (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/),
1282                     (i32 0/*src1mod*/), (i32 1/*src1*/),
1283                     $src)
1284>;
1285
1286foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
1287let True16Predicate = p in {
1288def : GCNPat <
1289  (and i16:$src0, i16:$src1),
1290  (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1291>;
1292
1293def : GCNPat <
1294  (or i16:$src0, i16:$src1),
1295  (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1296>;
1297
1298def : GCNPat <
1299  (xor i16:$src0, i16:$src1),
1300  (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1301>;
1302}
1303
1304def : GCNPat <
1305  (and v2i16:$src0, v2i16:$src1),
1306  (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1307>;
1308
1309def : GCNPat <
1310  (or v2i16:$src0, v2i16:$src1),
1311  (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1312>;
1313
1314def : GCNPat <
1315  (xor v2i16:$src0, v2i16:$src1),
1316  (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1)
1317>;
1318
1319let Predicates = [Has16BitInsts, isGFX8GFX9] in {
1320
1321// Undo sub x, c -> add x, -c canonicalization since c is more likely
1322// an inline immediate than -c.
1323// TODO: Also do for 64-bit.
1324def : GCNPat<
1325  (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
1326  (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
1327>;
1328
1329def : GCNPat<
1330  (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))),
1331  (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1)
1332>;
1333
1334defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>;
1335defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>;
1336defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>;
1337defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>;
1338defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>;
1339defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>;
1340defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>;
1341defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>;
1342defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>;
1343defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>;
1344
1345}  // End Predicates = [Has16BitInsts, isGFX8GFX9]
1346
1347let Predicates = [Has16BitInsts] in {
1348
1349def : ZExt_i16_i1_Pat<zext>;
1350def : ZExt_i16_i1_Pat<anyext>;
1351
1352def : GCNPat <
1353  (i16 (sext i1:$src)),
1354  (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
1355                     /*src1mod*/(i32 0), /*src1*/(i32 -1), $src)
1356>;
1357
1358} // End Predicates = [Has16BitInsts]
1359
1360
1361let SubtargetPredicate = HasIntClamp in {
1362// Set clamp bit for saturation.
1363def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>;
1364def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>;
1365}
1366
1367let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in {
1368let AddedComplexity = 1 in { // Prefer over form with carry-out.
1369def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>;
1370def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>;
1371}
1372}
1373
1374let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in {
1375def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>;
1376def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>;
1377}
1378
1379let SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 in {
1380  let SchedRW = [WriteDoubleAdd], isCommutable = 1 in {
1381    let FPDPRounding = 1 in {
1382      defm V_ADD_F64_pseudo : VOP2Inst <"v_add_f64_pseudo", VOP_F64_F64_F64, any_fadd>;
1383      defm V_MUL_F64_pseudo : VOP2Inst <"v_mul_f64_pseudo", VOP_F64_F64_F64, fmul>;
1384    } // End FPDPRounding = 1
1385    defm V_MIN_NUM_F64 : VOP2Inst <"v_min_num_f64", VOP_F64_F64_F64, fminnum_like>;
1386    defm V_MAX_NUM_F64 : VOP2Inst <"v_max_num_f64", VOP_F64_F64_F64, fmaxnum_like>;
1387  } // End SchedRW = [WriteDoubleAdd], isCommutable = 1
1388  let SchedRW = [Write64Bit] in {
1389    defm V_LSHLREV_B64_pseudo : VOP2Inst <"v_lshlrev_b64_pseudo", VOP_I64_I32_I64, clshl_rev_64>;
1390  } // End SchedRW = [Write64Bit]
1391} // End SubtargetPredicate = isGFX12Plus, isReMaterializable = 1
1392
1393//===----------------------------------------------------------------------===//
1394// DPP Encodings
1395//===----------------------------------------------------------------------===//
1396
1397class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps,
1398               string opName = ps.OpName, VOPProfile p = ps.Pfl,
1399               bit IsDPP16 = 0> :
1400    VOP_DPP<opName, p, IsDPP16> {
1401  let hasSideEffects = ps.hasSideEffects;
1402  let Defs = ps.Defs;
1403  let SchedRW = ps.SchedRW;
1404  let Uses = ps.Uses;
1405
1406  bits<8> vdst;
1407  bits<8> src1;
1408  let Inst{8-0}   = 0xfa;
1409  let Inst{16-9}  = !if(p.HasSrc1, src1{7-0}, 0);
1410  let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
1411  let Inst{30-25} = op;
1412  let Inst{31}    = 0x0;
1413}
1414
1415class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
1416                 string opName = ps.OpName, VOPProfile p = ps.Pfl> :
1417    VOP2_DPP<op, ps, opName, p, 1> {
1418  let AssemblerPredicate = HasDPP16;
1419  let SubtargetPredicate = ps.SubtargetPredicate;
1420  let OtherPredicates = ps.OtherPredicates;
1421}
1422
1423class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget,
1424                 string opName = ps.OpName, VOPProfile p = ps.Pfl> :
1425    Base_VOP2_DPP16<op, ps, opName, p>,
1426    SIMCInstr <ps.PseudoInstr, subtarget>;
1427
1428class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen,
1429                 string opName = ps.OpName, VOPProfile p = ps.Pfl> :
1430    VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> {
1431  let AssemblerPredicate = Gen.AssemblerPredicate;
1432  let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
1433  let DecoderNamespace = Gen.DecoderNamespace#
1434                         !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
1435}
1436
1437class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
1438                VOPProfile p = ps.Pfl> :
1439    VOP_DPP8<ps.OpName, p> {
1440  let hasSideEffects = ps.hasSideEffects;
1441  let Defs = ps.Defs;
1442  let SchedRW = ps.SchedRW;
1443  let Uses = ps.Uses;
1444
1445  bits<8> vdst;
1446  bits<8> src1;
1447
1448  let Inst{8-0}   = fi;
1449  let Inst{16-9}  = !if(p.HasSrc1, src1{7-0}, 0);
1450  let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0);
1451  let Inst{30-25} = op;
1452  let Inst{31}    = 0x0;
1453
1454  let SubtargetPredicate = ps.SubtargetPredicate;
1455  let OtherPredicates = ps.OtherPredicates;
1456}
1457
1458class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen,
1459                    VOPProfile p = ps.Pfl> :
1460    VOP2_DPP8<op, ps, p> {
1461  let AssemblerPredicate = Gen.AssemblerPredicate;
1462  let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate);
1463  let DecoderNamespace = Gen.DecoderNamespace#
1464                         !if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
1465}
1466
1467//===----------------------------------------------------------------------===//
1468// GFX11, GFX12
1469//===----------------------------------------------------------------------===//
1470
1471//===------------------------------- VOP2 -------------------------------===//
1472multiclass VOP2Only_Real_MADK<GFXGen Gen, bits<6> op> {
1473  def Gen.Suffix :
1474    VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME), Gen>,
1475    VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
1476}
1477
1478multiclass VOP2Only_Real_MADK_with_name<GFXGen Gen, bits<6> op, string asmName,
1479                                        string opName = NAME> {
1480  def Gen.Suffix :
1481      VOP2_Real_Gen<!cast<VOP2_Pseudo>(opName), Gen>,
1482      VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> {
1483    VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName);
1484    let AsmString = asmName # ps.AsmOperands;
1485  }
1486}
1487
1488multiclass VOP2_Real_e32<GFXGen Gen, bits<6> op> {
1489  def _e32#Gen.Suffix :
1490    VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME#"_e32"), Gen>,
1491    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
1492}
1493
1494multiclass VOP2Only_Real_e32<GFXGen Gen, bits<6> op> {
1495  let IsSingle = 1 in
1496    defm NAME: VOP2_Real_e32<Gen, op>;
1497}
1498
1499multiclass VOP2_Real_e64<GFXGen Gen, bits<6> op> {
1500  def _e64#Gen.Suffix :
1501    VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>,
1502    VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1503}
1504
1505multiclass VOP2_Real_dpp<GFXGen Gen, bits<6> op> {
1506  if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
1507  def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), Gen>;
1508}
1509
1510multiclass VOP2_Real_dpp8<GFXGen Gen, bits<6> op> {
1511  if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
1512  def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(NAME#"_e32"), Gen>;
1513}
1514
1515//===------------------------- VOP2 (with name) -------------------------===//
1516multiclass VOP2_Real_e32_with_name<GFXGen Gen, bits<6> op, string opName,
1517                                   string asmName, bit single = 0> {
1518  defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1519  def _e32#Gen.Suffix :
1520    VOP2_Real_Gen<ps, Gen, asmName>,
1521    VOP2e<op{5-0}, ps.Pfl> {
1522      let AsmString = asmName # ps.AsmOperands;
1523      let IsSingle = single;
1524    }
1525}
1526multiclass VOP2_Real_e64_with_name<GFXGen Gen, bits<6> op, string opName,
1527                                   string asmName> {
1528  defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
1529  def _e64#Gen.Suffix :
1530    VOP3_Real_Gen<ps, Gen>,
1531    VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, ps.Pfl> {
1532      let AsmString = asmName # ps.AsmOperands;
1533    }
1534}
1535
1536multiclass VOP2_Real_dpp_with_name<GFXGen Gen, bits<6> op, string opName,
1537                                   string asmName> {
1538  defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1539  if ps.Pfl.HasExtDPP then
1540  def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen> {
1541    let AsmString = asmName # ps.Pfl.AsmDPP16;
1542  }
1543}
1544multiclass VOP2_Real_dpp8_with_name<GFXGen Gen, bits<6> op, string opName,
1545                                    string asmName> {
1546  defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1547  if ps.Pfl.HasExtDPP then
1548  def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, ps, Gen> {
1549    let AsmString = asmName # ps.Pfl.AsmDPP8;
1550  }
1551}
1552
1553//===------------------------------ VOP2be ------------------------------===//
1554multiclass VOP2be_Real_e32<GFXGen Gen, bits<6> op, string opName, string asmName> {
1555  defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1556  def _e32#Gen.Suffix :
1557    VOP2_Real_Gen<ps, Gen>,
1558    VOP2e<op{5-0}, ps.Pfl> {
1559      let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands);
1560    }
1561}
1562multiclass VOP2be_Real_dpp<GFXGen Gen, bits<6> op, string opName, string asmName> {
1563  if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1564  def _dpp#Gen.Suffix :
1565    VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen, asmName> {
1566      string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1567      let AsmString = asmName # !subst(", vcc", "", AsmDPP);
1568    }
1569  if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1570  def _dpp_w32#Gen.Suffix :
1571    Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
1572      string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1573      let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
1574      let isAsmParserOnly = 1;
1575      let WaveSizePredicate = isWave32;
1576      let AssemblerPredicate = Gen.AssemblerPredicate;
1577      let DecoderNamespace = Gen.DecoderNamespace;
1578    }
1579  if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1580  def _dpp_w64#Gen.Suffix :
1581    Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
1582      string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
1583      let AsmString = asmName # AsmDPP;
1584      let isAsmParserOnly = 1;
1585      let WaveSizePredicate = isWave64;
1586      let AssemblerPredicate = Gen.AssemblerPredicate;
1587      let DecoderNamespace = Gen.DecoderNamespace;
1588    }
1589}
1590multiclass VOP2be_Real_dpp8<GFXGen Gen, bits<6> op, string opName, string asmName> {
1591  if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1592  def _dpp8#Gen.Suffix :
1593    VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(opName#"_e32"), Gen> {
1594      string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1595      let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
1596    }
1597  if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1598  def _dpp8_w32#Gen.Suffix :
1599    VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1600      string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1601      let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
1602      let isAsmParserOnly = 1;
1603      let WaveSizePredicate = isWave32;
1604      let AssemblerPredicate = Gen.AssemblerPredicate;
1605      let DecoderNamespace = Gen.DecoderNamespace;
1606    }
1607  if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
1608  def _dpp8_w64#Gen.Suffix :
1609    VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1610      string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
1611      let AsmString = asmName # AsmDPP8;
1612      let isAsmParserOnly = 1;
1613      let WaveSizePredicate = isWave64;
1614      let AssemblerPredicate = Gen.AssemblerPredicate;
1615      let DecoderNamespace = Gen.DecoderNamespace;
1616    }
1617}
1618
1619// We don't want to override separate decoderNamespaces within these
1620multiclass VOP2_Realtriple_e64<GFXGen Gen, bits<6> op> :
1621  VOP3_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME>;
1622
1623multiclass VOP2_Realtriple_e64_with_name<GFXGen Gen, bits<6> op, string opName,
1624                                               string asmName> {
1625  defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 0, 0, op{5-0}}, opName, asmName> ;
1626}
1627
1628multiclass VOP2be_Real<GFXGen Gen, bits<6> op, string opName, string asmName> :
1629  VOP2be_Real_e32<Gen, op, opName, asmName>,
1630  VOP3be_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>,
1631  VOP2be_Real_dpp<Gen, op, opName, asmName>,
1632  VOP2be_Real_dpp8<Gen, op, opName, asmName>;
1633
1634// Only for CNDMASK
1635multiclass VOP2e_Real<GFXGen Gen, bits<6> op, string opName, string asmName> :
1636  VOP2_Real_e32<Gen, op>,
1637  VOP2_Realtriple_e64<Gen, op>,
1638  VOP2be_Real_dpp<Gen, op, opName, asmName>,
1639  VOP2be_Real_dpp8<Gen, op, opName, asmName>;
1640
1641multiclass VOP2Only_Real<GFXGen Gen, bits<6> op> :
1642  VOP2Only_Real_e32<Gen, op>,
1643  VOP2_Real_dpp<Gen, op>,
1644  VOP2_Real_dpp8<Gen, op>;
1645
1646multiclass VOP2_Real_FULL<GFXGen Gen, bits<6> op> :
1647  VOP2_Realtriple_e64<Gen, op>,
1648  VOP2_Real_e32<Gen, op>,
1649  VOP2_Real_dpp<Gen, op>,
1650  VOP2_Real_dpp8<Gen, op>;
1651
1652multiclass VOP2_Real_NO_VOP3_with_name<GFXGen Gen, bits<6> op, string opName,
1653                                       string asmName, bit isSingle = 0> {
1654  defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName, isSingle>,
1655              VOP2_Real_dpp_with_name<Gen, op, opName, asmName>,
1656              VOP2_Real_dpp8_with_name<Gen, op, opName, asmName>;
1657  defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1658  def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> {
1659    let AssemblerPredicate = Gen.AssemblerPredicate;
1660  }
1661}
1662
1663multiclass VOP2_Real_FULL_with_name<GFXGen Gen, bits<6> op, string opName,
1664                                    string asmName> :
1665  VOP2_Realtriple_e64_with_name<Gen, op, opName, asmName>,
1666  VOP2_Real_NO_VOP3_with_name<Gen, op, opName, asmName>;
1667
1668multiclass VOP2_Real_NO_DPP_with_name<GFXGen Gen, bits<6> op, string opName,
1669                                      string asmName> {
1670  defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName>,
1671              VOP2_Real_e64_with_name<Gen, op, opName, asmName>;
1672  defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1673  def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> {
1674    let AssemblerPredicate = Gen.AssemblerPredicate;
1675  }
1676}
1677
1678multiclass VOP2_Real_NO_DPP_with_alias<GFXGen Gen, bits<6> op, string alias> {
1679  defm NAME : VOP2_Real_e32<Gen, op>,
1680              VOP2_Real_e64<Gen, op>;
1681  def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<alias, NAME> {
1682    let AssemblerPredicate = Gen.AssemblerPredicate;
1683  }
1684}
1685
1686//===----------------------------------------------------------------------===//
1687// GFX12.
1688//===----------------------------------------------------------------------===//
1689
1690multiclass VOP2be_Real_gfx12<bits<6> op, string opName, string asmName> :
1691  VOP2be_Real<GFX12Gen, op, opName, asmName>;
1692
1693// Only for CNDMASK
1694multiclass VOP2e_Real_gfx12<bits<6> op, string opName, string asmName> :
1695  VOP2e_Real<GFX12Gen, op, opName, asmName>;
1696
1697multiclass VOP2_Real_FULL_with_name_gfx12<bits<6> op, string opName,
1698                                          string asmName> :
1699  VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
1700
1701multiclass VOP2_Real_FULL_t16_gfx12<bits<6> op, string opName,
1702                                    string asmName, string alias> {
1703  defm NAME : VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
1704  def _gfx12_2nd_alias : AMDGPUMnemonicAlias<alias, asmName> {
1705    let AssemblerPredicate = isGFX12Only;
1706  }
1707}
1708
1709multiclass VOP2_Real_FULL_t16_and_fake16_gfx12<bits<6> op, string opName,
1710                                               string asmName, string alias> {
1711  defm _t16: VOP2_Real_FULL_t16_gfx12<op, opName#"_t16", asmName, alias>;
1712  defm _fake16: VOP2_Real_FULL_t16_gfx12<op, opName#"_fake16", asmName, alias>;
1713}
1714
1715multiclass VOP2_Real_NO_DPP_with_name_gfx12<bits<6> op, string opName,
1716                                            string asmName> :
1717  VOP2_Real_NO_DPP_with_name<GFX12Gen, op, opName, asmName>;
1718
1719multiclass VOP2_Real_NO_DPP_with_alias_gfx12<bits<6> op, string alias> :
1720  VOP2_Real_NO_DPP_with_alias<GFX12Gen, op, alias>;
1721
1722defm V_ADD_F64     : VOP2_Real_NO_DPP_with_name_gfx12<0x002, "V_ADD_F64_pseudo", "v_add_f64">;
1723defm V_MUL_F64     : VOP2_Real_NO_DPP_with_name_gfx12<0x006, "V_MUL_F64_pseudo", "v_mul_f64">;
1724defm V_LSHLREV_B64 : VOP2_Real_NO_DPP_with_name_gfx12<0x01f, "V_LSHLREV_B64_pseudo", "v_lshlrev_b64">;
1725defm V_MIN_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00d, "v_min_f64">;
1726defm V_MAX_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00e, "v_max_f64">;
1727
1728defm V_CNDMASK_B32 : VOP2e_Real_gfx12<0x001, "V_CNDMASK_B32", "v_cndmask_b32">;
1729defm V_ADD_CO_CI_U32 :
1730  VOP2be_Real_gfx12<0x020, "V_ADDC_U32", "v_add_co_ci_u32">;
1731defm V_SUB_CO_CI_U32 :
1732  VOP2be_Real_gfx12<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">;
1733defm V_SUBREV_CO_CI_U32 :
1734  VOP2be_Real_gfx12<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
1735
1736defm V_MIN_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x015, "V_MIN_F32", "v_min_num_f32">;
1737defm V_MAX_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x016, "V_MAX_F32", "v_max_num_f32">;
1738defm V_MIN_NUM_F16 : VOP2_Real_FULL_t16_and_fake16_gfx12<0x030, "V_MIN_F16", "v_min_num_f16", "v_min_f16">;
1739defm V_MAX_NUM_F16 : VOP2_Real_FULL_t16_and_fake16_gfx12<0x031, "V_MAX_F16", "v_max_num_f16", "v_max_f16">;
1740
1741let SubtargetPredicate = isGFX12Plus in {
1742  defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx12>;
1743
1744  defm : VOP2bInstAliases<
1745    V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx12, "v_add_co_ci_u32">;
1746  defm : VOP2bInstAliases<
1747    V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx12, "v_sub_co_ci_u32">;
1748  defm : VOP2bInstAliases<
1749    V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">;
1750} // End SubtargetPredicate = isGFX12Plus
1751
1752//===----------------------------------------------------------------------===//
1753// GFX11.
1754//===----------------------------------------------------------------------===//
1755
1756multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> :
1757  VOP2be_Real<GFX11Gen, op, opName, asmName>;
1758
1759// Only for CNDMASK
1760multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> :
1761  VOP2e_Real<GFX11Gen, op, opName, asmName>;
1762
1763multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName,
1764                                           string asmName, bit isSingle = 0> {
1765  defm NAME : VOP2_Real_e32_with_name<GFX11Gen, op, opName, asmName, isSingle>,
1766              VOP2_Real_dpp_with_name<GFX11Gen, op, opName, asmName>,
1767              VOP2_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>;
1768  defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
1769  def _gfx11_alias : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> {
1770    let AssemblerPredicate = isGFX11Only;
1771  }
1772}
1773
1774multiclass VOP2_Real_FULL_t16_gfx11<bits<6> op, string asmName, string opName = NAME> :
1775  VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>;
1776
1777multiclass VOP2_Real_FULL_t16_and_fake16_gfx11<bits<6> op, string asmName, string opName = NAME> {
1778  defm opName#"_t16": VOP2_Real_FULL_t16_gfx11<op, asmName, opName#"_t16">;
1779  defm opName#"_fake16": VOP2_Real_FULL_t16_gfx11<op, asmName, opName#"_fake16">;
1780}
1781
1782multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName,
1783                                           string asmName> :
1784  VOP2_Real_NO_DPP_with_name<GFX11Gen, op, opName, asmName>;
1785
1786multiclass VOP2_Real_FULL_gfx11_gfx12<bits<6> op> :
1787  VOP2_Real_FULL<GFX11Gen, op>, VOP2_Real_FULL<GFX12Gen, op>;
1788
1789multiclass VOP2_Real_FULL_with_name_gfx11_gfx12<bits<6> op, string opName,
1790                                                string asmName> :
1791  VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
1792  VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
1793
1794multiclass VOP2_Real_e32_gfx11_gfx12<bits<6> op> :
1795  VOP2Only_Real<GFX11Gen, op>, VOP2Only_Real<GFX12Gen, op>;
1796
1797multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> :
1798  VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>;
1799
1800multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> :
1801  VOP3_Realtriple_t16_gfx11<op, asmName, OpName, "", /*IsSingle*/1>,
1802  VOP3_Realtriple_t16_gfx12<op, asmName, OpName, "", /*IsSingle*/1>;
1803
1804multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string OpName = NAME> {
1805  defm _t16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_t16">;
1806  defm _fake16: VOP3Only_Realtriple_t16_gfx11_gfx12<op, asmName, OpName#"_fake16">;
1807}
1808
1809multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> :
1810  VOP3beOnly_Realtriple<GFX11Gen, op>, VOP3beOnly_Realtriple<GFX12Gen, op>;
1811
1812multiclass VOP2Only_Real_MADK_t16_gfx11_gfx12<bits<6> op, string asmName,
1813                                              string opName = NAME> :
1814  VOP2Only_Real_MADK_with_name<GFX11Gen, op, asmName, opName>,
1815  VOP2Only_Real_MADK_with_name<GFX12Gen, op, asmName, opName>;
1816
1817multiclass VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmName,
1818                                                         string opName = NAME> {
1819  defm _t16: VOP2Only_Real_MADK_t16_gfx11_gfx12<op, asmName, opName#"_t16">;
1820  defm _fake16: VOP2Only_Real_MADK_t16_gfx11_gfx12<op, asmName, opName#"_fake16">;
1821}
1822
1823multiclass VOP2_Real_FULL_t16_gfx11_gfx12<bits<6> op, string asmName,
1824                                          string opName = NAME> :
1825  VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
1826  VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
1827
1828multiclass VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<bits<6> op, string asmName,
1829                                          string opName = NAME> {
1830  defm _t16:    VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_t16">;
1831  defm _fake16: VOP2_Real_FULL_t16_gfx11_gfx12<op, asmName, opName#"_fake16">;
1832}
1833
1834multiclass VOP2_Real_FULL_gfx11<bits<6> op> :
1835  VOP2_Real_FULL<GFX11Gen, op>;
1836
1837defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32",
1838  "v_cndmask_b32">;
1839defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002,
1840  "V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>;
1841defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006,
1842  "V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">;
1843defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x007,
1844  "V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">;
1845defm V_LSHLREV_B32        : VOP2_Real_FULL_gfx11_gfx12<0x018>;
1846defm V_LSHRREV_B32        : VOP2_Real_FULL_gfx11_gfx12<0x019>;
1847defm V_ASHRREV_I32        : VOP2_Real_FULL_gfx11_gfx12<0x01a>;
1848defm V_ADD_CO_CI_U32 :
1849  VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">;
1850defm V_SUB_CO_CI_U32 :
1851  VOP2be_Real_gfx11<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">;
1852defm V_SUBREV_CO_CI_U32 :
1853  VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
1854
1855defm V_CVT_PK_RTZ_F16_F32  : VOP2_Real_FULL_with_name_gfx11_gfx12<0x02f,
1856  "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">;
1857defm V_PK_FMAC_F16     : VOP2_Real_e32_gfx11_gfx12<0x03c>;
1858
1859defm V_ADD_F16             : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x032, "v_add_f16">;
1860defm V_SUB_F16             : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x033, "v_sub_f16">;
1861defm V_SUBREV_F16          : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x034, "v_subrev_f16">;
1862defm V_MUL_F16             : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x035, "v_mul_f16">;
1863defm V_FMAC_F16            : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x036, "v_fmac_f16">;
1864defm V_LDEXP_F16           : VOP2_Real_FULL_t16_and_fake16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
1865defm V_MAX_F16             : VOP2_Real_FULL_t16_and_fake16_gfx11<0x039, "v_max_f16">;
1866defm V_MIN_F16             : VOP2_Real_FULL_t16_and_fake16_gfx11<0x03a, "v_min_f16">;
1867defm V_FMAMK_F16           : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x037, "v_fmamk_f16">;
1868defm V_FMAAK_F16           : VOP2Only_Real_MADK_t16_and_fake16_gfx11_gfx12<0x038, "v_fmaak_f16">;
1869
1870// VOP3 only.
1871defm V_CNDMASK_B16         : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x25d, "v_cndmask_b16">;
1872defm V_LDEXP_F32           : VOP3Only_Realtriple_gfx11_gfx12<0x31c>;
1873defm V_BFM_B32             : VOP3Only_Realtriple_gfx11_gfx12<0x31d>;
1874defm V_BCNT_U32_B32        : VOP3Only_Realtriple_gfx11_gfx12<0x31e>;
1875defm V_MBCNT_LO_U32_B32    : VOP3Only_Realtriple_gfx11_gfx12<0x31f>;
1876defm V_MBCNT_HI_U32_B32    : VOP3Only_Realtriple_gfx11_gfx12<0x320>;
1877defm V_CVT_PK_NORM_I16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x321, "V_CVT_PKNORM_I16_F32", "v_cvt_pk_norm_i16_f32">;
1878defm V_CVT_PK_NORM_U16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x322, "V_CVT_PKNORM_U16_F32", "v_cvt_pk_norm_u16_f32">;
1879defm V_CVT_PK_U16_U32      : VOP3Only_Realtriple_gfx11_gfx12<0x323>;
1880defm V_CVT_PK_I16_I32      : VOP3Only_Realtriple_gfx11_gfx12<0x324>;
1881defm V_ADD_CO_U32          : VOP3beOnly_Realtriple_gfx11_gfx12<0x300>;
1882defm V_SUB_CO_U32          : VOP3beOnly_Realtriple_gfx11_gfx12<0x301>;
1883defm V_SUBREV_CO_U32       : VOP3beOnly_Realtriple_gfx11_gfx12<0x302>;
1884
1885let SubtargetPredicate = isGFX11Only in {
1886  defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>;
1887
1888  defm : VOP2bInstAliases<
1889    V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx11, "v_add_co_ci_u32">;
1890  defm : VOP2bInstAliases<
1891    V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">;
1892  defm : VOP2bInstAliases<
1893    V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">;
1894} // End SubtargetPredicate = isGFX11Only
1895
1896//===----------------------------------------------------------------------===//
1897// GFX10.
1898//===----------------------------------------------------------------------===//
1899
1900let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
1901  //===------------------------------- VOP2 -------------------------------===//
1902  multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> {
1903    def _gfx10 :
1904      VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>,
1905      VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
1906  }
1907  multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName,
1908                                                string asmName> {
1909    def _gfx10 :
1910        VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>,
1911        VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> {
1912      VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName);
1913      let AsmString = asmName # ps.AsmOperands;
1914    }
1915  }
1916  multiclass VOP2_Real_e32_gfx10<bits<6> op> {
1917    def _e32_gfx10 :
1918      VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>,
1919      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
1920  }
1921  multiclass VOP2_Real_e64_gfx10<bits<6> op> {
1922    def _e64_gfx10 :
1923      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
1924      VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1925  }
1926  multiclass VOP2_Real_sdwa_gfx10<bits<6> op> {
1927    if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
1928    def _sdwa_gfx10 :
1929      VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
1930      VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
1931  }
1932  multiclass VOP2_Real_dpp_gfx10<bits<6> op> {
1933    if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
1934    def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10>;
1935  }
1936  multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
1937    if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
1938    def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
1939  }
1940
1941  //===------------------------- VOP2 (with name) -------------------------===//
1942  multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName,
1943                                           string asmName> {
1944    def _e32_gfx10 :
1945      VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>,
1946      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> {
1947        VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
1948        let AsmString = asmName # ps.AsmOperands;
1949      }
1950  }
1951  multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName,
1952                                           string asmName> {
1953    def _e64_gfx10 :
1954      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
1955      VOP3e_gfx10<{0, 1, 0, 0, op{5-0}},
1956                  !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
1957        VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64");
1958        let AsmString = asmName # ps.AsmOperands;
1959      }
1960  }
1961  multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName,
1962                                            string asmName> {
1963    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
1964    def _sdwa_gfx10 :
1965      VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
1966      VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
1967        VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
1968        let AsmString = asmName # ps.AsmOperands;
1969      }
1970  }
1971  multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
1972                                           string asmName> {
1973    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
1974    def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> {
1975      VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
1976      let AsmString = asmName # ps.Pfl.AsmDPP16;
1977    }
1978  }
1979  multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
1980                                            string asmName> {
1981    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
1982    def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
1983      VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
1984      let AsmString = asmName # ps.Pfl.AsmDPP8;
1985    }
1986  }
1987
1988  //===------------------------------ VOP2be ------------------------------===//
1989  multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> {
1990    def _e32_gfx10 :
1991      VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>,
1992      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> {
1993        VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32");
1994        let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
1995      }
1996  }
1997  multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> {
1998    def _e64_gfx10 :
1999      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
2000      VOP3be_gfx10<{0, 1, 0, 0, op{5-0}},
2001                   !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
2002        VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64");
2003        let AsmString = asmName # Ps.AsmOperands;
2004      }
2005  }
2006  multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> {
2007    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
2008    def _sdwa_gfx10 :
2009      VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
2010      VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
2011        VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
2012        let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
2013      }
2014    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
2015    def _sdwa_w32_gfx10 :
2016      Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
2017      VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
2018        VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
2019        let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands);
2020        let isAsmParserOnly = 1;
2021        let WaveSizePredicate = isWave32;
2022     }
2023    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
2024    def _sdwa_w64_gfx10 :
2025      Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
2026      VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
2027        VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa");
2028        let AsmString = asmName # Ps.AsmOperands;
2029        let isAsmParserOnly = 1;
2030        let WaveSizePredicate = isWave64;
2031      }
2032  }
2033  multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> {
2034    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
2035    def _dpp_gfx10 :
2036      VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> {
2037        string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
2038        let AsmString = asmName # !subst(", vcc", "", AsmDPP);
2039      }
2040    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
2041    def _dpp_w32_gfx10 :
2042      Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
2043        string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
2044        let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP);
2045        let isAsmParserOnly = 1;
2046        let WaveSizePredicate = isWave32;
2047      }
2048    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
2049    def _dpp_w64_gfx10 :
2050      Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
2051        string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
2052        let AsmString = asmName # AsmDPP;
2053        let isAsmParserOnly = 1;
2054        let WaveSizePredicate = isWave64;
2055      }
2056  }
2057  multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> {
2058    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
2059    def _dpp8_gfx10 :
2060      VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
2061        string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
2062        let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
2063      }
2064    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
2065    def _dpp8_w32_gfx10 :
2066      VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
2067        string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
2068        let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
2069        let isAsmParserOnly = 1;
2070        let WaveSizePredicate = isWave32;
2071      }
2072    if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
2073    def _dpp8_w64_gfx10 :
2074      VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
2075        string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
2076        let AsmString = asmName # AsmDPP8;
2077        let isAsmParserOnly = 1;
2078        let WaveSizePredicate = isWave64;
2079      }
2080  }
2081
2082  //===----------------------------- VOP3Only -----------------------------===//
2083  multiclass VOP3Only_Real_gfx10<bits<10> op> {
2084    def _e64_gfx10 :
2085      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
2086      VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
2087        let IsSingle = 1;
2088      }
2089  }
2090
2091  //===---------------------------- VOP3beOnly ----------------------------===//
2092  multiclass VOP3beOnly_Real_gfx10<bits<10> op> {
2093    def _e64_gfx10 :
2094      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
2095      VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
2096        let IsSingle = 1;
2097      }
2098  }
2099} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
2100
2101multiclass VOP2Only_Real_MADK_gfx10_gfx11<bits<6> op> :
2102  VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK<GFX11Gen, op>;
2103
2104multiclass VOP2Only_Real_MADK_gfx10_gfx11_gfx12<bits<6> op> :
2105  VOP2Only_Real_MADK_gfx10_gfx11<op>, VOP2Only_Real_MADK<GFX12Gen, op>;
2106
2107multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> :
2108  VOP2be_Real_e32_gfx10<op, opName, asmName>,
2109  VOP2be_Real_e64_gfx10<op, opName, asmName>,
2110  VOP2be_Real_sdwa_gfx10<op, opName, asmName>,
2111  VOP2be_Real_dpp_gfx10<op, opName, asmName>,
2112  VOP2be_Real_dpp8_gfx10<op, opName, asmName>;
2113
2114multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> :
2115  VOP2_Real_e32_gfx10<op>,
2116  VOP2_Real_e64_gfx10<op>,
2117  VOP2be_Real_sdwa_gfx10<op, opName, asmName>,
2118  VOP2be_Real_dpp_gfx10<op, opName, asmName>,
2119  VOP2be_Real_dpp8_gfx10<op, opName, asmName>;
2120
2121multiclass VOP2_Real_gfx10<bits<6> op> :
2122  VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>,
2123  VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>;
2124
2125multiclass VOP2_Real_gfx10_gfx11<bits<6> op> :
2126  VOP2_Real_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>;
2127
2128multiclass VOP2_Real_gfx10_gfx11_gfx12<bits<6> op> :
2129  VOP2_Real_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>;
2130
2131multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName,
2132                                     string asmName> :
2133  VOP2_Real_e32_gfx10_with_name<op, opName, asmName>,
2134  VOP2_Real_e64_gfx10_with_name<op, opName, asmName>,
2135  VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>,
2136  VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>,
2137  VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>;
2138
2139multiclass VOP2_Real_with_name_gfx10_gfx11_gfx12<bits<6> op, string opName,
2140                                                 string asmName> :
2141  VOP2_Real_with_name_gfx10<op, opName, asmName>,
2142  VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
2143  VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
2144
2145// NB: Same opcode as v_mac_legacy_f32
2146let DecoderNamespace = "GFX10_B" in
2147defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>;
2148
2149defm V_XNOR_B32        : VOP2_Real_gfx10_gfx11_gfx12<0x01e>;
2150defm V_FMAC_F32        : VOP2_Real_gfx10_gfx11_gfx12<0x02b>;
2151defm V_FMAMK_F32       : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02c>;
2152defm V_FMAAK_F32       : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02d>;
2153defm V_ADD_F16         : VOP2_Real_gfx10<0x032>;
2154defm V_SUB_F16         : VOP2_Real_gfx10<0x033>;
2155defm V_SUBREV_F16      : VOP2_Real_gfx10<0x034>;
2156defm V_MUL_F16         : VOP2_Real_gfx10<0x035>;
2157defm V_FMAC_F16        : VOP2_Real_gfx10<0x036>;
2158defm V_FMAMK_F16       : VOP2Only_Real_MADK_gfx10<0x037>;
2159defm V_FMAAK_F16       : VOP2Only_Real_MADK_gfx10<0x038>;
2160defm V_MAX_F16         : VOP2_Real_gfx10<0x039>;
2161defm V_MIN_F16         : VOP2_Real_gfx10<0x03a>;
2162defm V_LDEXP_F16       : VOP2_Real_gfx10<0x03b>;
2163
2164let IsSingle = 1 in {
2165  defm V_PK_FMAC_F16     : VOP2_Real_e32_gfx10<0x03c>;
2166}
2167
2168// VOP2 no carry-in, carry-out.
2169defm V_ADD_NC_U32 :
2170  VOP2_Real_with_name_gfx10_gfx11_gfx12<0x025, "V_ADD_U32", "v_add_nc_u32">;
2171defm V_SUB_NC_U32 :
2172  VOP2_Real_with_name_gfx10_gfx11_gfx12<0x026, "V_SUB_U32", "v_sub_nc_u32">;
2173defm V_SUBREV_NC_U32 :
2174  VOP2_Real_with_name_gfx10_gfx11_gfx12<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">;
2175
2176// VOP2 carry-in, carry-out.
2177defm V_ADD_CO_CI_U32 :
2178  VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">;
2179defm V_SUB_CO_CI_U32 :
2180  VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">;
2181defm V_SUBREV_CO_CI_U32 :
2182  VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">;
2183
2184defm V_CNDMASK_B32 :
2185  VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">;
2186
2187// VOP3 only.
2188defm V_BFM_B32            : VOP3Only_Real_gfx10<0x363>;
2189defm V_BCNT_U32_B32       : VOP3Only_Real_gfx10<0x364>;
2190defm V_MBCNT_LO_U32_B32   : VOP3Only_Real_gfx10<0x365>;
2191defm V_MBCNT_HI_U32_B32   : VOP3Only_Real_gfx10<0x366>;
2192defm V_LDEXP_F32          : VOP3Only_Real_gfx10<0x362>;
2193defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>;
2194defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>;
2195defm V_CVT_PK_U16_U32     : VOP3Only_Real_gfx10<0x36a>;
2196defm V_CVT_PK_I16_I32     : VOP3Only_Real_gfx10<0x36b>;
2197
2198// VOP3 carry-out.
2199defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>;
2200defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>;
2201defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>;
2202
2203let SubtargetPredicate = isGFX10Only in {
2204  defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>;
2205
2206  defm : VOP2bInstAliases<
2207    V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">;
2208  defm : VOP2bInstAliases<
2209    V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">;
2210  defm : VOP2bInstAliases<
2211    V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">;
2212} // End SubtargetPredicate = isGFX10Only
2213
2214//===----------------------------------------------------------------------===//
2215// GFX6, GFX7, GFX10, GFX11
2216//===----------------------------------------------------------------------===//
2217
2218class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> :
2219  VOP_DPPe <P> {
2220  bits<8> vdst;
2221  bits<8> src1;
2222  let Inst{8-0}   = 0xfa; //dpp
2223  let Inst{16-9}  = !if(P.HasSrc1, src1{7-0}, 0);
2224  let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
2225  let Inst{30-25} = op;
2226  let Inst{31}    = 0x0; //encoding
2227}
2228
2229let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
2230  multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> {
2231    def _gfx6_gfx7 :
2232      VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
2233      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
2234  }
2235  multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> {
2236    def _gfx6_gfx7 :
2237      VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
2238      VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
2239  }
2240  multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> {
2241    def _e32_gfx6_gfx7 :
2242      VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>,
2243      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>;
2244  }
2245  multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> {
2246    def _e64_gfx6_gfx7 :
2247      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>,
2248      VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>;
2249  }
2250  multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> {
2251    def _e64_gfx6_gfx7 :
2252      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>,
2253      VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>;
2254  }
2255} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
2256
2257multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> :
2258  VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>;
2259
2260multiclass VOP2_Real_gfx6_gfx7<bits<6> op> :
2261  VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>;
2262
2263multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> :
2264  VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>;
2265
2266multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<6> op> :
2267  VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>;
2268
2269multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<6> op> :
2270  VOP2_Real_gfx6_gfx7_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>;
2271
2272multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> :
2273  VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>;
2274
2275multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op,
2276  string opName, string asmName>  {
2277  defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32");
2278  defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64");
2279
2280  let AsmString = asmName # ps32.AsmOperands in {
2281    defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>;
2282  }
2283
2284   let AsmString = asmName # ps64.AsmOperands in {
2285    defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>;
2286  }
2287}
2288
2289defm V_CNDMASK_B32        : VOP2_Real_gfx6_gfx7<0x000>;
2290defm V_MIN_LEGACY_F32     : VOP2_Real_gfx6_gfx7<0x00d>;
2291defm V_MAX_LEGACY_F32     : VOP2_Real_gfx6_gfx7<0x00e>;
2292defm V_LSHR_B32           : VOP2_Real_gfx6_gfx7<0x015>;
2293defm V_ASHR_I32           : VOP2_Real_gfx6_gfx7<0x017>;
2294defm V_LSHL_B32           : VOP2_Real_gfx6_gfx7<0x019>;
2295defm V_BFM_B32            : VOP2_Real_gfx6_gfx7<0x01e>;
2296defm V_BCNT_U32_B32       : VOP2_Real_gfx6_gfx7<0x022>;
2297defm V_MBCNT_LO_U32_B32   : VOP2_Real_gfx6_gfx7<0x023>;
2298defm V_MBCNT_HI_U32_B32   : VOP2_Real_gfx6_gfx7<0x024>;
2299defm V_LDEXP_F32          : VOP2_Real_gfx6_gfx7<0x02b>;
2300defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>;
2301defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>;
2302defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>;
2303defm V_CVT_PK_U16_U32     : VOP2_Real_gfx6_gfx7<0x030>;
2304defm V_CVT_PK_I16_I32     : VOP2_Real_gfx6_gfx7<0x031>;
2305
2306// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in
2307// VI, but the VI instructions behave the same as the SI versions.
2308defm V_ADD_I32            : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">;
2309defm V_SUB_I32            : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">;
2310defm V_SUBREV_I32         : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">;
2311defm V_ADDC_U32           : VOP2be_Real_gfx6_gfx7<0x028>;
2312defm V_SUBB_U32           : VOP2be_Real_gfx6_gfx7<0x029>;
2313defm V_SUBBREV_U32        : VOP2be_Real_gfx6_gfx7<0x02a>;
2314
2315defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>;
2316
2317let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
2318  defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>;
2319} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
2320
2321let SubtargetPredicate = isGFX6GFX7 in {
2322  defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>;
2323  defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>;
2324  defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>;
2325  defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>;
2326
2327  def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>;
2328  def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>;
2329  def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>;
2330} // End SubtargetPredicate = isGFX6GFX7
2331
2332defm V_ADD_F32            : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>;
2333defm V_SUB_F32            : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>;
2334defm V_SUBREV_F32         : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>;
2335defm V_MAC_LEGACY_F32     : VOP2_Real_gfx6_gfx7_gfx10<0x006>;
2336defm V_MUL_LEGACY_F32     : VOP2_Real_gfx6_gfx7_gfx10<0x007>;
2337defm V_MUL_F32            : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>;
2338defm V_MUL_I32_I24        : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>;
2339defm V_MUL_HI_I32_I24     : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>;
2340defm V_MUL_U32_U24        : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>;
2341defm V_MUL_HI_U32_U24     : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>;
2342defm V_MIN_F32            : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>;
2343defm V_MAX_F32            : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>;
2344defm V_MIN_I32            : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x011>;
2345defm V_MAX_I32            : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x012>;
2346defm V_MIN_U32            : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x013>;
2347defm V_MAX_U32            : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>;
2348defm V_LSHRREV_B32        : VOP2_Real_gfx6_gfx7_gfx10<0x016>;
2349defm V_ASHRREV_I32        : VOP2_Real_gfx6_gfx7_gfx10<0x018>;
2350defm V_LSHLREV_B32        : VOP2_Real_gfx6_gfx7_gfx10<0x01a>;
2351defm V_AND_B32            : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01b>;
2352defm V_OR_B32             : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01c>;
2353defm V_XOR_B32            : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01d>;
2354defm V_MAC_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x01f>;
2355defm V_CVT_PKRTZ_F16_F32  : VOP2_Real_gfx6_gfx7_gfx10<0x02f>;
2356defm V_MADMK_F32          : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>;
2357defm V_MADAK_F32          : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>;
2358
2359//===----------------------------------------------------------------------===//
2360// GFX8, GFX9 (VI).
2361//===----------------------------------------------------------------------===//
2362
2363let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
2364
2365multiclass VOP2_Real_MADK_vi <bits<6> op> {
2366  def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
2367            VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
2368}
2369
2370multiclass VOP2_Real_MADK_gfx940 <bits<6> op> {
2371  def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>,
2372                VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> {
2373    let DecoderNamespace = "GFX9";
2374  }
2375}
2376
2377multiclass VOP2_Real_e32_vi <bits<6> op> {
2378  def _e32_vi :
2379    VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
2380    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
2381}
2382
2383multiclass VOP2_Real_e64_vi <bits<10> op> {
2384  def _e64_vi :
2385    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
2386    VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
2387}
2388
2389multiclass VOP2_Real_e64only_vi <bits<10> op> {
2390  def _e64_vi :
2391    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
2392    VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
2393      let IsSingle = 1;
2394    }
2395}
2396
2397multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
2398  VOP2_Real_e32_vi<op>,
2399  VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
2400
2401} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8"
2402
2403multiclass VOP2_SDWA8_Real <bits<6> op> {
2404  if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then
2405  def _sdwa_vi :
2406    VOP_SDWA8_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
2407    VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
2408}
2409
2410multiclass VOP2_SDWA9_Real <bits<6> op> {
2411  if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
2412  def _sdwa_gfx9 :
2413    VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
2414    VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
2415}
2416
2417let AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" in {
2418
2419multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> {
2420  def _e32_vi :
2421    VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>,
2422    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
2423      VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
2424      let AsmString = AsmName # ps.AsmOperands;
2425    }
2426  def _e64_vi :
2427    VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>,
2428    VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
2429      VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
2430      let AsmString = AsmName # ps.AsmOperands;
2431    }
2432  if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA then
2433    def _sdwa_vi :
2434      VOP_SDWA8_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
2435      VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
2436        VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
2437        let AsmString = AsmName # ps.AsmOperands;
2438      }
2439  if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then
2440    def _dpp_vi :
2441      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>,
2442      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
2443        VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
2444        let AsmString = AsmName # ps.AsmOperands;
2445      }
2446}
2447
2448} // End AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8"
2449
2450let DecoderNamespace = "GFX9" in {
2451
2452multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
2453  def _e32_gfx9 :
2454    VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>,
2455    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> {
2456      VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32");
2457      let AsmString = AsmName # ps.AsmOperands;
2458    }
2459  def _e64_gfx9 :
2460    VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>,
2461    VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
2462      VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
2463      let AsmString = AsmName # ps.AsmOperands;
2464    }
2465  if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9 then
2466    def _sdwa_gfx9 :
2467      VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
2468      VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
2469        VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
2470        let AsmString = AsmName # ps.AsmOperands;
2471      }
2472  if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then
2473    def _dpp_gfx9 :
2474      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>,
2475      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
2476        VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp");
2477        let AsmString = AsmName # ps.AsmOperands;
2478      }
2479}
2480
2481multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
2482  def _e32_gfx9 :
2483    VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>,
2484    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
2485  def _e64_gfx9 :
2486    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
2487    VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
2488  if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
2489    def _sdwa_gfx9 :
2490      VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
2491      VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
2492      }
2493  if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
2494    def _dpp_gfx9 :
2495      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
2496      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
2497}
2498
2499} // End DecoderNamespace = "GFX9"
2500
2501multiclass VOP2_Real_e32e64_vi <bits<6> op> :
2502  Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA8_Real<op>, VOP2_SDWA9_Real<op> {
2503
2504  if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
2505    def _dpp_vi :
2506      VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
2507      VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
2508}
2509
2510defm V_CNDMASK_B32        : VOP2_Real_e32e64_vi <0x0>;
2511defm V_ADD_F32            : VOP2_Real_e32e64_vi <0x1>;
2512defm V_SUB_F32            : VOP2_Real_e32e64_vi <0x2>;
2513defm V_SUBREV_F32         : VOP2_Real_e32e64_vi <0x3>;
2514let OtherPredicates = [isGCN3ExcludingGFX90A] in
2515defm V_MUL_LEGACY_F32     : VOP2_Real_e32e64_vi <0x4>;
2516defm V_MUL_F32            : VOP2_Real_e32e64_vi <0x5>;
2517defm V_MUL_I32_I24        : VOP2_Real_e32e64_vi <0x6>;
2518defm V_MUL_HI_I32_I24     : VOP2_Real_e32e64_vi <0x7>;
2519defm V_MUL_U32_U24        : VOP2_Real_e32e64_vi <0x8>;
2520defm V_MUL_HI_U32_U24     : VOP2_Real_e32e64_vi <0x9>;
2521defm V_MIN_F32            : VOP2_Real_e32e64_vi <0xa>;
2522defm V_MAX_F32            : VOP2_Real_e32e64_vi <0xb>;
2523defm V_MIN_I32            : VOP2_Real_e32e64_vi <0xc>;
2524defm V_MAX_I32            : VOP2_Real_e32e64_vi <0xd>;
2525defm V_MIN_U32            : VOP2_Real_e32e64_vi <0xe>;
2526defm V_MAX_U32            : VOP2_Real_e32e64_vi <0xf>;
2527defm V_LSHRREV_B32        : VOP2_Real_e32e64_vi <0x10>;
2528defm V_ASHRREV_I32        : VOP2_Real_e32e64_vi <0x11>;
2529defm V_LSHLREV_B32        : VOP2_Real_e32e64_vi <0x12>;
2530defm V_AND_B32            : VOP2_Real_e32e64_vi <0x13>;
2531defm V_OR_B32             : VOP2_Real_e32e64_vi <0x14>;
2532defm V_XOR_B32            : VOP2_Real_e32e64_vi <0x15>;
2533defm V_MAC_F32            : VOP2_Real_e32e64_vi <0x16>;
2534defm V_MADMK_F32          : VOP2_Real_MADK_vi <0x17>;
2535defm V_MADAK_F32          : VOP2_Real_MADK_vi <0x18>;
2536
2537defm V_ADD_U32            : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32",     "v_add_u32">;
2538defm V_SUB_U32            : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32",     "v_sub_u32">;
2539defm V_SUBREV_U32         : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32",  "v_subrev_u32">;
2540defm V_ADDC_U32           : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32",    "v_addc_u32">;
2541defm V_SUBB_U32           : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32",    "v_subb_u32">;
2542defm V_SUBBREV_U32        : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">;
2543
2544let AssemblerPredicate = isGFX9Only in {
2545defm V_ADD_CO_U32         : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32",     "v_add_co_u32">;
2546defm V_SUB_CO_U32         : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32",     "v_sub_co_u32">;
2547defm V_SUBREV_CO_U32      : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32",  "v_subrev_co_u32">;
2548defm V_ADDC_CO_U32        : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32",    "v_addc_co_u32">;
2549defm V_SUBB_CO_U32        : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32",    "v_subb_co_u32">;
2550defm V_SUBBREV_CO_U32     : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">;
2551
2552defm V_ADD_U32            : VOP2_Real_e32e64_gfx9 <0x34>;
2553defm V_SUB_U32            : VOP2_Real_e32e64_gfx9 <0x35>;
2554defm V_SUBREV_U32         : VOP2_Real_e32e64_gfx9 <0x36>;
2555} // End AssemblerPredicate = isGFX9Only
2556
2557defm V_BFM_B32            : VOP2_Real_e64only_vi <0x293>;
2558defm V_BCNT_U32_B32       : VOP2_Real_e64only_vi <0x28b>;
2559defm V_MBCNT_LO_U32_B32   : VOP2_Real_e64only_vi <0x28c>;
2560defm V_MBCNT_HI_U32_B32   : VOP2_Real_e64only_vi <0x28d>;
2561defm V_LDEXP_F32          : VOP2_Real_e64only_vi <0x288>;
2562defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>;
2563defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>;
2564defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>;
2565defm V_CVT_PKRTZ_F16_F32  : VOP2_Real_e64only_vi <0x296>;
2566defm V_CVT_PK_U16_U32     : VOP2_Real_e64only_vi <0x297>;
2567defm V_CVT_PK_I16_I32     : VOP2_Real_e64only_vi <0x298>;
2568
2569defm V_ADD_F16            : VOP2_Real_e32e64_vi <0x1f>;
2570defm V_SUB_F16            : VOP2_Real_e32e64_vi <0x20>;
2571defm V_SUBREV_F16         : VOP2_Real_e32e64_vi <0x21>;
2572defm V_MUL_F16            : VOP2_Real_e32e64_vi <0x22>;
2573defm V_MAC_F16            : VOP2_Real_e32e64_vi <0x23>;
2574defm V_MADMK_F16          : VOP2_Real_MADK_vi <0x24>;
2575defm V_MADAK_F16          : VOP2_Real_MADK_vi <0x25>;
2576defm V_ADD_U16            : VOP2_Real_e32e64_vi <0x26>;
2577defm V_SUB_U16            : VOP2_Real_e32e64_vi <0x27>;
2578defm V_SUBREV_U16         : VOP2_Real_e32e64_vi <0x28>;
2579defm V_MUL_LO_U16         : VOP2_Real_e32e64_vi <0x29>;
2580defm V_LSHLREV_B16        : VOP2_Real_e32e64_vi <0x2a>;
2581defm V_LSHRREV_B16        : VOP2_Real_e32e64_vi <0x2b>;
2582defm V_ASHRREV_I16        : VOP2_Real_e32e64_vi <0x2c>;
2583defm V_MAX_F16            : VOP2_Real_e32e64_vi <0x2d>;
2584defm V_MIN_F16            : VOP2_Real_e32e64_vi <0x2e>;
2585defm V_MAX_U16            : VOP2_Real_e32e64_vi <0x2f>;
2586defm V_MAX_I16            : VOP2_Real_e32e64_vi <0x30>;
2587defm V_MIN_U16            : VOP2_Real_e32e64_vi <0x31>;
2588defm V_MIN_I16            : VOP2_Real_e32e64_vi <0x32>;
2589defm V_LDEXP_F16          : VOP2_Real_e32e64_vi <0x33>;
2590
2591let SubtargetPredicate = isGFX8GFX9 in {
2592
2593// Aliases to simplify matching of floating-point instructions that
2594// are VOP2 on SI and VOP3 on VI.
2595class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias <
2596  name#" $dst, $src0, $src1",
2597  !if(inst.Pfl.HasOMod,
2598      (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0),
2599      (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0))
2600>, PredicateControl {
2601  let UseInstAsmMatchConverter = 0;
2602  let AsmVariantName = AMDGPUAsmVariants.VOP3;
2603}
2604
2605def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
2606def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
2607def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
2608def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
2609def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
2610
2611defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>;
2612
2613} // End SubtargetPredicate = isGFX8GFX9
2614
2615let SubtargetPredicate = isGFX9Only in {
2616
2617defm : VOP2bInstAliases<V_ADD_U32_e32,     V_ADD_CO_U32_e32_gfx9,     "v_add_co_u32">;
2618defm : VOP2bInstAliases<V_ADDC_U32_e32,    V_ADDC_CO_U32_e32_gfx9,    "v_addc_co_u32">;
2619defm : VOP2bInstAliases<V_SUB_U32_e32,     V_SUB_CO_U32_e32_gfx9,     "v_sub_co_u32">;
2620defm : VOP2bInstAliases<V_SUBB_U32_e32,    V_SUBB_CO_U32_e32_gfx9,    "v_subb_co_u32">;
2621defm : VOP2bInstAliases<V_SUBREV_U32_e32,  V_SUBREV_CO_U32_e32_gfx9,  "v_subrev_co_u32">;
2622defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">;
2623
2624} // End SubtargetPredicate = isGFX9Only
2625
2626let SubtargetPredicate = HasDLInsts in {
2627
2628defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>;
2629defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>;
2630
2631} // End SubtargetPredicate = HasDLInsts
2632
2633let DecoderNamespace = "GFX90A" in {
2634  multiclass VOP2_Real_e32_gfx90a <bits<6> op> {
2635    def _e32_gfx90a :
2636      VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>,
2637      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
2638  }
2639
2640  multiclass VOP2_Real_e64_gfx90a <bits<10> op> {
2641    def _e64_gfx90a :
2642      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>,
2643      VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
2644  }
2645
2646  multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> :
2647    VOP2_Real_e32_gfx90a<op>,
2648    VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>;
2649
2650  multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> :
2651    Base_VOP2_Real_e32e64_gfx90a<op> {
2652
2653    if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
2654      def _dpp_gfx90a :
2655        VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>,
2656        VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
2657          let DecoderNamespace = "GFX9";
2658        }
2659  }
2660} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A"
2661
2662let SubtargetPredicate = HasFmacF64Inst in {
2663  defm V_FMAC_F64       : VOP2_Real_e32e64_gfx90a <0x4>;
2664} // End SubtargetPredicate = HasFmacF64Inst
2665
2666let IsSingle = 1 in {
2667  defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>;
2668}
2669
2670let SubtargetPredicate = HasFmaakFmamkF32Insts in {
2671defm V_FMAMK_F32        : VOP2_Real_MADK_gfx940 <0x17>;
2672defm V_FMAAK_F32        : VOP2_Real_MADK_gfx940 <0x18>;
2673}
2674
2675multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> {
2676  let SubtargetPredicate = isGFX9Only in
2677  def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
2678}
2679
2680multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> :
2681  VOP2_Real_e32_gfx10<op>,
2682  VOP2_Real_dpp_gfx10<op>,
2683  VOP2_Real_dpp8_gfx10<op>;
2684
2685multiclass VOP2Only_Real_DOT_ACC_gfx10<bits<6> op> : VOP2_Real_dpp_gfx10<op>,
2686                                                     VOP2_Real_dpp8_gfx10<op> {
2687  let IsSingle = 1 in
2688    defm NAME : VOP2_Real_e32_gfx10<op>;
2689}
2690
2691let OtherPredicates = [HasDot5Insts] in {
2692  defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>;
2693  // NB: Opcode conflicts with V_DOT8C_I32_I4
2694  // This opcode exists in gfx 10.1* only
2695  defm V_DOT2C_F32_F16 : VOP2Only_Real_DOT_ACC_gfx10<0x02>;
2696}
2697
2698let OtherPredicates = [HasDot6Insts] in {
2699  defm V_DOT4C_I32_I8  : VOP2_Real_DOT_ACC_gfx9<0x39>;
2700  defm V_DOT4C_I32_I8  : VOP2Only_Real_DOT_ACC_gfx10<0x0d>;
2701}
2702
2703let OtherPredicates = [HasDot4Insts] in {
2704  defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>;
2705}
2706let OtherPredicates = [HasDot3Insts] in {
2707  defm V_DOT8C_I32_I4  : VOP2_Real_DOT_ACC_gfx9<0x3a>;
2708}
2709
2710let SubtargetPredicate = HasPkFmacF16Inst in {
2711defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>;
2712} // End SubtargetPredicate = HasPkFmacF16Inst
2713
2714let SubtargetPredicate = HasDot3Insts in {
2715  // NB: Opcode conflicts with V_DOT2C_F32_F16
2716  let DecoderNamespace = "GFX10_B" in
2717  defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>;
2718}
2719
2720let OtherPredicates = [HasDot13Insts] in {
2721  let DecoderNamespace = "GFX950" in
2722  defm V_DOT2C_F32_BF16 : VOP2_Real_DOT_ACC_gfx9<0x16>;
2723}
2724