xref: /llvm-project/llvm/lib/Target/AMDGPU/VOP3Instructions.td (revision 5d1c596ab47b9412bb36bdfb0520d9af1343a5ce)
1//===-- VOP3Instructions.td - Vector Instruction Definitions --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def BITOP3_32 : ComplexPattern<i32, 4, "SelectBITOP3", [and, or, xor]>;
10def BITOP3_16 : ComplexPattern<i16, 4, "SelectBITOP3", [and, or, xor]>;
11
12// Special case for v_div_fmas_{f32|f64}, since it seems to be the
13// only VOP instruction that implicitly reads VCC.
14let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in {
15def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> {
16  let Outs64 = (outs DstRC.RegClass:$vdst);
17  let HasExtVOP3DPP = 0;
18  let HasExtDPP = 0;
19  let IsSingle = 1;
20}
21def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> {
22  let Outs64 = (outs DstRC.RegClass:$vdst);
23  let IsSingle = 1;
24}
25}
26
27class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
28  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
29  let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
30  let IsSingle = 1;
31  let HasExtVOP3DPP = 0;
32  let HasExtDPP = 0;
33}
34
35def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>;
36def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>;
37
38def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
39  let HasClamp = 1;
40
41  let IsSingle = 1;
42  let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst);
43  let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp";
44}
45
46class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
47  let HasExtVOP3DPP = 0;
48  let HasExtDPP = 0;
49}
50
51def DIV_FIXUP_F32_PROF : VOP3_Profile<VOP_F32_F32_F32_F32> {
52  let HasExtVOP3DPP = 0;
53  let HasExtDPP = 0;
54}
55
56//===----------------------------------------------------------------------===//
57// VOP3 INTERP
58//===----------------------------------------------------------------------===//
59
60class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
61                 VOP3_Pseudo<OpName, P, pattern> {
62  let AsmMatchConverter = "cvtVOP3Interp";
63  let mayRaiseFPException = 0;
64}
65
66def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
67  let Src0Mod = FPVRegInputMods;
68  let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
69                   InterpAttr:$attr, InterpAttrChan:$attrchan,
70                   Clamp0:$clamp, omod0:$omod);
71
72  let Asm64 = "$vdst, $src0_modifiers, $attr$attrchan$clamp$omod";
73}
74
75def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> {
76  let Ins64 = (ins InterpSlot:$src0,
77                   InterpAttr:$attr, InterpAttrChan:$attrchan,
78                   Clamp0:$clamp, omod0:$omod);
79
80  let Asm64 = "$vdst, $src0, $attr$attrchan$clamp$omod";
81
82  let HasClamp = 1;
83  let HasSrc0Mods = 0;
84}
85
86class getInterp16Asm <bit HasSrc2, bit HasOMod> {
87  string src2 = !if(HasSrc2, ", $src2_modifiers", "");
88  string omod = !if(HasOMod, "$omod", "");
89  string ret =
90    " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod;
91}
92
93class getInterp16Ins <bit HasSrc2, bit HasOMod,
94                      Operand Src0Mod, Operand Src2Mod> {
95  dag ret = !if(HasSrc2,
96                !if(HasOMod,
97                    (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
98                         InterpAttr:$attr, InterpAttrChan:$attrchan,
99                         Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
100                         highmod:$high, Clamp0:$clamp, omod0:$omod),
101                    (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
102                         InterpAttr:$attr, InterpAttrChan:$attrchan,
103                         Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
104                         highmod:$high, Clamp0:$clamp)
105                ),
106                (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
107                     InterpAttr:$attr, InterpAttrChan:$attrchan,
108                     highmod:$high, Clamp0:$clamp, omod0:$omod)
109            );
110}
111
112class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
113  let IsSingle = 1;
114  let HasOMod = !ne(DstVT.Value, f16.Value);
115  let HasHigh = 1;
116
117  let Src0Mod = FPVRegInputMods;
118  let Src2Mod = FPVRegInputMods;
119
120  let Outs64 = (outs DstRC.RegClass:$vdst);
121  let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod>.ret;
122  let Asm64 = getInterp16Asm<HasSrc2, HasOMod>.ret;
123}
124
125//===----------------------------------------------------------------------===//
126// VOP3 Instructions
127//===----------------------------------------------------------------------===//
128
129let isCommutable = 1 in {
130
131let isReMaterializable = 1 in {
132let mayRaiseFPException = 0 in {
133let SubtargetPredicate = HasMadMacF32Insts in {
134defm V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
135defm V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fmad>;
136} // End SubtargetPredicate = HasMadMacInsts
137
138let SubtargetPredicate = HasFmaLegacy32 in
139defm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
140                                 VOP3_Profile<VOP_F32_F32_F32_F32>,
141                                 int_amdgcn_fma_legacy>;
142}
143
144defm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
145defm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
146defm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>;
147defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
148
149let SchedRW = [WriteDoubleAdd] in {
150let FPDPRounding = 1 in {
151defm V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, any_fma>;
152let SubtargetPredicate = isNotGFX12Plus in {
153defm V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fadd>;
154defm V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fmul>;
155} // End SubtargetPredicate = isNotGFX12Plus
156} // End FPDPRounding = 1
157let SubtargetPredicate = isNotGFX12Plus in {
158defm V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like>;
159defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like>;
160} // End SubtargetPredicate = isNotGFX12Plus
161} // End SchedRW = [WriteDoubleAdd]
162
163let SchedRW = [WriteIntMul] in {
164defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", V_MUL_PROF<VOP_I32_I32_I32>, DivergentBinFrag<mul>>;
165defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", V_MUL_PROF<VOP_I32_I32_I32>, mulhu>;
166defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF<VOP_I32_I32_I32>>;
167defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs>;
168} // End SchedRW = [WriteIntMul]
169
170let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1 in {
171defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
172defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
173defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
174defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
175
176let SchedRW = [WriteDoubleAdd] in {
177defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
178defm V_MAXIMUM_F64 : VOP3Inst <"v_maximum_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaximum>;
179} // End SchedRW = [WriteDoubleAdd]
180} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1
181
182} // End isReMaterializable = 1
183
184let Uses = [MODE, VCC, EXEC] in {
185// v_div_fmas_f32:
186//   result = src0 * src1 + src2
187//   if (vcc)
188//     result *= 2^32
189//
190let SchedRW = [WriteFloatFMA] in
191defm V_DIV_FMAS_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, []>;
192// v_div_fmas_f64:
193//   result = src0 * src1 + src2
194//   if (vcc)
195//     result *= 2^64
196//
197let SchedRW = [WriteDouble], FPDPRounding = 1 in
198defm V_DIV_FMAS_F64 : VOP3Inst_Pseudo_Wrapper  <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []>;
199} // End Uses = [MODE, VCC, EXEC]
200
201} // End isCommutable = 1
202
203let isReMaterializable = 1 in {
204let mayRaiseFPException = 0 in {
205defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
206defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
207defm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
208defm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
209} // End mayRaiseFPException
210
211defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
212defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
213defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
214defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>;
215defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
216
217// XXX - No FPException seems suspect but manual doesn't say it does
218let mayRaiseFPException = 0 in {
219  let isCommutable = 1 in {
220    defm V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
221    defm V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
222    defm V_MAX3_I32 : VOP3Inst <"v_max3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmax3>;
223    defm V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumax3>;
224    defm V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>;
225    defm V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>;
226  } // End isCommutable = 1
227  defm V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
228  defm V_MAX3_F32 : VOP3Inst <"v_max3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmax3>;
229  defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
230} // End mayRaiseFPException = 0
231
232let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
233  defm V_MINIMUM3_F32 : VOP3Inst <"v_minimum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfminimum3>;
234  defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
235} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
236
237let isCommutable = 1 in {
238  defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
239  defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
240  defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
241  defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
242} // End isCommutable = 1
243defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
244
245defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>;
246
247let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
248  defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
249  defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, any_fldexp>;
250} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
251} // End isReMaterializable = 1
252
253
254let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does.
255  let SchedRW = [WriteFloatFMA, WriteSALU] in
256  defm V_DIV_SCALE_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32> ;
257
258  // Double precision division pre-scale.
259  let SchedRW = [WriteDouble, WriteSALU], FPDPRounding = 1 in
260  defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>;
261} // End mayRaiseFPException = 0
262
263let isReMaterializable = 1 in
264defm V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
265
266let Constraints = "@earlyclobber $vdst" in {
267defm V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
268} // End Constraints = "@earlyclobber $vdst"
269
270
271let isReMaterializable = 1 in {
272let SchedRW = [WriteDouble] in {
273defm V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, int_amdgcn_trig_preop>;
274} // End SchedRW = [WriteDouble]
275
276let SchedRW = [Write64Bit] in {
277  let SubtargetPredicate = isGFX6GFX7 in {
278  defm V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>, cshl_64>;
279  defm V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>, csrl_64>;
280  defm V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, csra_64>;
281  } // End SubtargetPredicate = isGFX6GFX7
282
283  let SubtargetPredicate = isGFX8Plus in {
284  defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshr_rev_64>;
285  defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, cashr_rev_64>;
286  } // End SubtargetPredicate = isGFX8Plus
287
288  let SubtargetPredicate = isGFX8GFX9GFX10GFX11 in {
289  defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshl_rev_64>;
290  } // End SubtargetPredicate = isGFX8GFX9GFX10GFX11
291} // End SchedRW = [Write64Bit]
292} // End isReMaterializable = 1
293
294def : GCNPat<
295  (i32 (DivergentUnaryFrag<sext> i16:$src)),
296  (i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10)))
297>;
298
299let isReMaterializable = 1 in {
300let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
301defm V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
302} // End SubtargetPredicate = isGFX6GFX7GFX10Plus
303
304let SchedRW = [Write32Bit] in {
305let SubtargetPredicate = isGFX8Plus in {
306defm V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUperm>;
307} // End SubtargetPredicate = isGFX8Plus
308} // End SchedRW = [Write32Bit]
309} // End isReMaterializable = 1
310
311def VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> {
312  let HasModifiers = 0;
313}
314
315let SubtargetPredicate = isGFX7Plus in {
316let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in {
317defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
318defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>;
319} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32]
320} // End SubtargetPredicate = isGFX7Plus
321
322let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in {
323  let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in {
324    defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
325    defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
326  }
327  let SubtargetPredicate = isGFX11Only, OtherPredicates = [HasMADIntraFwdBug],
328      Constraints = "@earlyclobber $vdst" in {
329    defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
330    defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
331  }
332} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU]
333
334
335let FPDPRounding = 1 in {
336  let Predicates = [Has16BitInsts, isGFX8Only] in {
337    defm V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
338    let isCommutable = 1 in {
339      defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma>;
340    } // End isCommutable = 1
341  } // End Predicates = [Has16BitInsts, isGFX8Only]
342
343  let SubtargetPredicate = isGFX9Plus in {
344    defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>;
345    defm V_FMA_F16_gfx9 : VOP3Inst_t16 <"v_fma_f16_gfx9", VOP_F16_F16_F16_F16, any_fma>;
346  } // End SubtargetPredicate = isGFX9Plus
347} // End FPDPRounding = 1
348
349let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in {
350
351defm V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
352defm V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
353let FPDPRounding = 1 in {
354  defm V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fmad>;
355  let Uses = [MODE, M0, EXEC] in {
356  let OtherPredicates = [isNotGFX90APlus] in
357  // For some reason the intrinsic operands are in a different order
358  // from the instruction operands.
359  def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
360          [(set f16:$vdst,
361            (int_amdgcn_interp_p2_f16 (VOP3Mods f32:$src2, i32:$src2_modifiers),
362                                      (VOP3Mods f32:$src0, i32:$src0_modifiers),
363                                      (i32 timm:$attrchan),
364                                      (i32 timm:$attr),
365                                      (i1 timm:$high),
366                                      M0))]>;
367  } // End Uses = [M0, MODE, EXEC]
368} // End FPDPRounding = 1
369
370let SubtargetPredicate = isGFX9Only, FPDPRounding = 1 in {
371  defm V_MAD_F16_gfx9   : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>> ;
372} // End SubtargetPredicate = isGFX9Only, FPDPRounding = 1
373
374let SubtargetPredicate = isGFX9Plus in {
375defm V_MAD_U16_gfx9   : VOP3Inst_t16 <"v_mad_u16_gfx9", VOP_I16_I16_I16_I16>;
376defm V_MAD_I16_gfx9   : VOP3Inst_t16 <"v_mad_i16_gfx9", VOP_I16_I16_I16_I16>;
377let OtherPredicates = [isNotGFX90APlus] in
378def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
379} // End SubtargetPredicate = isGFX9Plus
380
381// This predicate should only apply to the selection pattern. The
382// instruction still exists and should decode on subtargets with
383// other bank counts.
384let OtherPredicates = [isNotGFX90APlus, has32BankLDS], Uses = [MODE, M0, EXEC], FPDPRounding = 1 in {
385def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
386       [(set f32:$vdst, (int_amdgcn_interp_p1_f16 (VOP3Mods f32:$src0, i32:$src0_modifiers),
387                                                  (i32 timm:$attrchan),
388                                                  (i32 timm:$attr),
389                                                  (i1 timm:$high), M0))]>;
390} // End OtherPredicates = [isNotGFX90APlus, has32BankLDS], Uses = [MODE, M0, EXEC], FPDPRounding = 1
391
392let OtherPredicates = [isNotGFX90APlus], Uses = [MODE, M0, EXEC], FPDPRounding = 1 in {
393def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>;
394} // End OtherPredicates = [isNotGFX90APlus], Uses = [MODE, M0, EXEC], FPDPRounding = 1
395
396} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
397
398def : GCNPat<
399  (i64 (DivergentUnaryFrag<sext> i16:$src)),
400    (REG_SEQUENCE VReg_64,
401      (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))), sub0,
402      (i32 (COPY_TO_REGCLASS
403         (V_ASHRREV_I32_e32 (S_MOV_B32 (i32 0x1f)), (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
404      ), VGPR_32)), sub1)
405>;
406
407let SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC], OtherPredicates = [isNotGFX90APlus] in {
408def V_INTERP_P1_F32_e64  : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>;
409def V_INTERP_P2_F32_e64  : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;
410def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>;
411} // End SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC], OtherPredicates = [isNotGFX90APlus]
412
413// Note: 16-bit instructions produce a 0 result in the high 16-bits
414// on GFX8 and GFX9 and preserve high 16 bits on GFX10+
415multiclass Arithmetic_i16_0Hi_TernaryPats <SDPatternOperator op, Instruction inst> {
416  def : GCNPat<
417    (i32 (zext (op i16:$src0, i16:$src1, i16:$src2))),
418    (inst VSrc_b16:$src0, VSrc_b16:$src1, VSrc_b16:$src2)
419  >;
420}
421
422let Predicates = [Has16BitInsts, isGFX8GFX9] in {
423defm : Arithmetic_i16_0Hi_TernaryPats<imad, V_MAD_U16_e64>;
424}
425
426let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {
427
428// FIXME: Should be able to just pass imad to the instruction
429// definition pattern, but the implied clamp input interferes.
430multiclass Ternary_i16_Pats <SDPatternOperator op, Instruction inst> {
431  def : GCNPat <
432    (op i16:$src0, i16:$src1, i16:$src2),
433    (inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
434  >;
435}
436
437defm: Ternary_i16_Pats<imad, V_MAD_U16_e64>;
438
439} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9]
440
441multiclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
442                                 Instruction inst> {
443  def : GCNPat <
444    (op2 (op1 i16:$src0, i16:$src1), i16:$src2),
445    (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
446  >;
447}
448
449let True16Predicate = UseRealTrue16Insts in {
450  defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_t16_e64>;
451} // End True16Predicates = UseRealTrue16Insts
452let True16Predicate = UseFakeTrue16Insts in {
453  defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_fake16_e64>;
454} // End True16Predicates = UseFakeTrue16Insts
455let OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
456  defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>;
457} // End OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue16BitInsts
458
459class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
460  (ops node:$x, node:$y, node:$z),
461  // When the inner operation is used multiple times, selecting 3-op
462  // instructions may still be beneficial -- if the other users can be
463  // combined similarly. Let's be conservative for now.
464  (op2 (HasOneUseBinOp<op1> node:$x, node:$y), node:$z),
465  [{
466    // Only use VALU ops when the result is divergent.
467    if (!N->isDivergent())
468      return false;
469
470    // Check constant bus limitations.
471    //
472    // Note: Use !isDivergent as a conservative proxy for whether the value
473    //       is in an SGPR (uniform values can end up in VGPRs as well).
474    unsigned ConstantBusUses = 0;
475    for (unsigned i = 0; i < 3; ++i) {
476      if (!Operands[i]->isDivergent() &&
477          !isInlineImmediate(Operands[i].getNode())) {
478        ConstantBusUses++;
479        // This uses AMDGPU::V_ADD3_U32_e64, but all three operand instructions
480        // have the same constant bus limit.
481        if (ConstantBusUses > Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32_e64))
482          return false;
483      }
484    }
485
486    return true;
487  }]> {
488  let PredicateCodeUsesOperands = 1;
489}
490
491class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : ThreeOpFragSDAG<op1, op2> {
492  // The divergence predicate is irrelevant in GlobalISel, as we have
493  // proper register bank checks. We just need to verify the constant
494  // bus restriction when all the sources are considered.
495  //
496  // FIXME: With unlucky SGPR operands, we could penalize code by
497  // blocking folding SGPR->VGPR copies later.
498  // FIXME: There's no register bank verifier
499  let GISelPredicateCode = [{
500    const int ConstantBusLimit = Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32_e64);
501    int ConstantBusUses = 0;
502    for (unsigned i = 0; i < 3; ++i) {
503      const RegisterBank *RegBank = RBI.getRegBank(Operands[i]->getReg(), MRI, TRI);
504      if (RegBank->getID() == AMDGPU::SGPRRegBankID) {
505        if (++ConstantBusUses > ConstantBusLimit)
506          return false;
507      }
508    }
509    return true;
510  }];
511}
512
513def shl_0_to_4 : PatFrag<
514  (ops node:$src0, node:$src1), (shl node:$src0, node:$src1),
515  [{
516     if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
517       return C->getZExtValue() <= 4;
518     }
519     return false;
520   }]> {
521  let GISelPredicateCode = [{
522    int64_t Imm = 0;
523    if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Imm)) &&
524        !mi_match(MI.getOperand(2).getReg(), MRI, m_Copy(m_ICst(Imm))))
525      return false;
526    return (uint64_t)Imm <= 4;
527  }];
528}
529
530def VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> {
531  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
532                          FP32InputMods:$src1_modifiers, Src1RC64:$src1,
533                          VGPR_32:$vdst_in, op_sel0:$op_sel);
534  let InsVOP3DPP = (ins VGPR_32:$old,
535                        FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
536                        FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
537                        VGPR_32:$vdst_in, op_sel0:$op_sel,
538                        dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
539                        DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl);
540
541  let InsVOP3DPP16 = (ins VGPR_32:$old,
542                          FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
543                          FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
544                          VGPR_32:$vdst_in, op_sel0:$op_sel,
545                          dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
546                          DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi);
547  let InsVOP3DPP8 = (ins VGPR_32:$old,
548                         FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
549                         FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
550                         VGPR_32:$vdst_in, op_sel0:$op_sel, dpp8:$dpp8, Dpp8FI:$fi);
551
552  let HasClamp = 0;
553  let HasExtVOP3DPP = 1;
554}
555
556def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
557                                              VOP3_OPSEL> {
558  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
559                          FP32InputMods:$src1_modifiers, Src1RC64:$src1,
560                          FP32InputMods:$src2_modifiers, VGPR_32:$src2,
561                          op_sel0:$op_sel);
562  let InsVOP3DPP16 = (ins VGPR_32:$old,
563                          FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
564                          FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
565                          FP32InputMods:$src2_modifiers, VGPR_32:$src2,
566                          op_sel0:$op_sel, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
567                          DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi);
568  let InsVOP3DPP8 = (ins VGPR_32:$old,
569                         FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0,
570                         FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1,
571                         FP32InputMods:$src2_modifiers, VGPR_32:$src2,
572                         op_sel0:$op_sel, dpp8:$dpp8, Dpp8FI:$fi);
573  let HasClamp = 0;
574  let HasSrc2 = 0;
575  let HasSrc2Mods = 1;
576  let HasExtVOP3DPP = 1;
577  let HasOpSel = 1;
578  let HasFP8DstByteSel = 1;
579  let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
580                            getAsmVOP3OpSel<3, HasClamp, HasOMod,
581                                            HasSrc0FloatMods, HasSrc1FloatMods,
582                                            HasSrc2FloatMods>.ret);
583  let AsmVOP3Base = !subst(", $src2_modifiers", "",
584                    getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
585                    HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, 0/*Src1Mods*/,
586                    HasModifiers, DstVT>.ret);
587}
588
589class VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> :
590  VOP3_Profile<VOPProfile<[i32, SrcVT, i32, untyped]>> {
591  let IsFP8DstByteSel = 1;
592  let HasFP8DstByteSel = 1;
593  let HasClamp = 0;
594  defvar bytesel = (ins VGPR_32:$vdst_in, ByteSel:$byte_sel);
595  let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
596                            HasClamp, HasModifiers, HasSrc2Mods,
597                            HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
598                   bytesel);
599  let InsVOP3Base = !con(
600    getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
601                   Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
602                   Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret,
603    bytesel);
604}
605
606def IsPow2Plus1: PatLeaf<(i32 imm), [{
607  uint32_t V = N->getZExtValue();
608  return isPowerOf2_32(V - 1);
609}]>;
610
611def Log2_32: SDNodeXForm<imm, [{
612  uint32_t V = N->getZExtValue();
613  return CurDAG->getTargetConstant(Log2_32(V - 1), SDLoc(N), MVT::i32);
614}]>;
615
616let SubtargetPredicate = isGFX9Plus in {
617let isCommutable = 1, isReMaterializable = 1 in {
618  defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
619  defm V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
620  defm V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
621  defm V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
622  defm V_ADD_I32 : VOP3Inst <"v_add_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
623  defm V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
624} // End isCommutable = 1, isReMaterializable = 1
625// TODO src0 contains the opsel bit for dst, so if we commute, need to mask and swap this
626// to the new src0.
627defm V_MED3_F16 : VOP3Inst_t16 <"v_med3_f16", VOP_F16_F16_F16_F16, AMDGPUfmed3>;
628defm V_MED3_I16 : VOP3Inst_t16 <"v_med3_i16", VOP_I16_I16_I16_I16, AMDGPUsmed3>;
629defm V_MED3_U16 : VOP3Inst_t16 <"v_med3_u16", VOP_I16_I16_I16_I16, AMDGPUumed3>;
630
631defm V_MIN3_F16 : VOP3Inst_t16 <"v_min3_f16", VOP_F16_F16_F16_F16, AMDGPUfmin3>;
632defm V_MIN3_I16 : VOP3Inst_t16 <"v_min3_i16", VOP_I16_I16_I16_I16, AMDGPUsmin3>;
633defm V_MIN3_U16 : VOP3Inst_t16 <"v_min3_u16", VOP_I16_I16_I16_I16, AMDGPUumin3>;
634
635defm V_MAX3_F16 : VOP3Inst_t16 <"v_max3_f16", VOP_F16_F16_F16_F16, AMDGPUfmax3>;
636defm V_MAX3_I16 : VOP3Inst_t16 <"v_max3_i16", VOP_I16_I16_I16_I16, AMDGPUsmax3>;
637defm V_MAX3_U16 : VOP3Inst_t16 <"v_max3_u16", VOP_I16_I16_I16_I16, AMDGPUumax3>;
638
639let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
640  defm V_MINIMUM3_F16 : VOP3Inst_t16 <"v_minimum3_f16", VOP_F16_F16_F16_F16, AMDGPUfminimum3>;
641  defm V_MAXIMUM3_F16 : VOP3Inst_t16 <"v_maximum3_f16", VOP_F16_F16_F16_F16, AMDGPUfmaximum3>;
642} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
643
644defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
645defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
646
647let isCommutable = 1 in {
648  defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
649  defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
650} // End isCommutable = 1
651
652defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
653defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
654
655defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>;
656
657let isReMaterializable = 1 in {
658defm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
659defm V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
660defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
661} // End isReMaterializable = 1
662
663// V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64
664// src0 is shifted left by 0-4 (use “0” to get ADD_U64).
665let SubtargetPredicate = isGFX940Plus in
666defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
667
668let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
669    SchedRW = [WriteFloatCvt] in {
670  let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in {
671    defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>;
672    defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>;
673
674    let SubtargetPredicate = isGFX12Plus in {
675      defm V_CVT_SR_FP8_F32_gfx12 : VOP3Inst<"v_cvt_sr_fp8_f32_gfx12", VOP3_CVT_SR_F8_ByteSel_Profile<f32>>;
676      defm V_CVT_SR_BF8_F32_gfx12 : VOP3Inst<"v_cvt_sr_bf8_f32_gfx12", VOP3_CVT_SR_F8_ByteSel_Profile<f32>>;
677    }
678  }
679
680  // These instructions have non-standard use of op_sel. In particular they are
681  // using op_sel bits 2 and 3 while only having two sources. Therefore dummy
682  // src2 is used to hold the op_sel value.
683  let Constraints = "$vdst = $src2", DisableEncoding = "$src2", SubtargetPredicate = isGFX940Plus in {
684    defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>;
685    defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>;
686  }
687}
688
689class Cvt_PK_F8_F32_Pat<SDPatternOperator node, int index, VOP3_Pseudo inst> : GCNPat<
690    (i32 (node f32:$src0, f32:$src1, i32:$old, index)),
691    (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, 0)
692>;
693
694class Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst> : GCNPat<
695    (i32 (node f32:$src0, i32:$src1, i32:$old, index)),
696    (inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1,
697          !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, 0)
698>;
699
700class Cvt_SR_F8_ByteSel_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType SrcVT> : GCNPat<
701    (i32 (node (VOP3Mods SrcVT:$src0, i32:$src0_modifiers), (VOP3Mods i32:$src1, i32:$src1_modifiers),
702          i32:$old, timm:$byte_sel)),
703    (inst $src0_modifiers, $src0, $src1_modifiers, $src1, $old, (as_i32timm $byte_sel))
704>;
705
706let OtherPredicates = [HasFP8ConversionInsts] in {
707foreach Index = [0, -1] in {
708  def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>;
709  def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>;
710}
711
712let SubtargetPredicate = isGFX940Plus in {
713  foreach Index = [0, 1, 2, 3] in {
714    def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>;
715    def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>;
716  }
717}
718
719let SubtargetPredicate = isGFX12Plus in {
720  def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_fp8_f32, V_CVT_SR_FP8_F32_gfx12_e64, f32>;
721  def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_bf8_f32, V_CVT_SR_BF8_F32_gfx12_e64, f32>;
722}
723}
724
725class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
726  // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
727  (ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
728  (inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
729>;
730
731def : ThreeOp_i32_Pats<cshl_32, add, V_LSHL_ADD_U32_e64>;
732def : ThreeOp_i32_Pats<add, cshl_32, V_ADD_LSHL_U32_e64>;
733def : ThreeOp_i32_Pats<add, add, V_ADD3_U32_e64>;
734def : ThreeOp_i32_Pats<ptradd, ptradd, V_ADD3_U32_e64>;
735def : ThreeOp_i32_Pats<cshl_32, or, V_LSHL_OR_B32_e64>;
736def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32_e64>;
737def : ThreeOp_i32_Pats<or, or, V_OR3_B32_e64>;
738def : ThreeOp_i32_Pats<xor, add, V_XAD_U32_e64>;
739
740def : GCNPat<
741 (DivergentBinFrag<mul> i32:$src0, IsPow2Plus1:$src1),
742 (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>;
743
744let SubtargetPredicate = isGFX940Plus in
745def : GCNPat<
746  (ThreeOpFrag<shl_0_to_4, add> i64:$src0, i32:$src1, i64:$src2),
747  (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
748>;
749
750def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
751def : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>;
752
753def : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2),
754             (REG_SEQUENCE VReg_64,
755               (V_OR3_B32_e64 (i32 (EXTRACT_SUBREG $src0, sub0)),
756                              (i32 (EXTRACT_SUBREG $src1, sub0)),
757                              (i32 (EXTRACT_SUBREG $src2, sub0))), sub0,
758               (V_OR3_B32_e64 (i32 (EXTRACT_SUBREG $src0, sub1)),
759                              (i32 (EXTRACT_SUBREG $src1, sub1)),
760                              (i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>;
761
762} // End SubtargetPredicate = isGFX9Plus
763
764// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
765class OpSelBinOpClampPat<SDPatternOperator node,
766                         Instruction inst> : GCNPat<
767 (node (i16 (VOP3OpSel i16:$src0, i32:$src0_modifiers)),
768       (i16 (VOP3OpSel i16:$src1, i32:$src1_modifiers))),
769  (inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE, 0)
770>;
771
772let SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts in {
773  def : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>;
774  def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>;
775} // End SubtargetPredicate = isGFX9Plus, True16Predicate = NotHasTrue16BitInsts
776let True16Predicate = UseRealTrue16Insts in {
777  def : OpSelBinOpClampPat<saddsat, V_ADD_I16_t16_e64>;
778  def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_t16_e64>;
779} // End True16Predicate = UseRealTrue16Insts
780let True16Predicate = UseFakeTrue16Insts in {
781  def : OpSelBinOpClampPat<saddsat, V_ADD_I16_fake16_e64>;
782  def : OpSelBinOpClampPat<ssubsat, V_SUB_I16_fake16_e64>;
783} // End True16Predicate = UseFakeTrue16Insts
784
785multiclass IMAD32_Pats <VOP3_Pseudo inst> {
786  def : GCNPat <
787        (ThreeOpFrag<mul, add> i32:$src0, i32:$src1, i32:$src2),
788        (EXTRACT_SUBREG (inst i32:$src0, i32:$src1,
789                              (REG_SEQUENCE SReg_64, // Use scalar and let it be legalized
790                                            $src2, sub0,
791                                            (i32 (IMPLICIT_DEF)), sub1),
792                                            0 /* clamp */),
793                        sub0)
794        >;
795
796  // GISel-specific pattern that avoids creating a SGPR->VGPR copy if
797  // $src2 is a VGPR.
798  def : GCNPat <
799        (ThreeOpFrag<mul, add> i32:$src0, i32:$src1, VGPR_32:$src2),
800        (EXTRACT_SUBREG (inst i32:$src0, i32:$src1,
801                              (REG_SEQUENCE VReg_64,
802                                            $src2, sub0,
803                                            (i32 (IMPLICIT_DEF)), sub1),
804                                            0 /* clamp */),
805                        sub0)
806        >;
807
808  // Immediate src2 in the pattern above will not fold because it would be partially
809  // undef. Hence define specialized pattern for this case.
810  def : GCNPat <
811        (ThreeOpFrag<mul, add> i32:$src0, i32:$src1, (i32 imm:$src2)),
812        (EXTRACT_SUBREG (inst i32:$src0, i32:$src1, (i64 (as_i64imm $src2)), 0 /* clamp */), sub0)
813        >;
814}
815
816// Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a normal mul.
817// We need to separate this because otherwise OtherPredicates would be overriden.
818class IMAD32_Mul24_Pat<VOP3_Pseudo inst>: GCNPat <
819    (i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)),
820    (inst $src0, $src1, $src2, 0 /* clamp */)
821    >;
822
823// exclude pre-GFX9 where it was slow
824let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in {
825  defm : IMAD32_Pats<V_MAD_U64_U32_e64>;
826  def : IMAD32_Mul24_Pat<V_MAD_U64_U32_e64>;
827}
828let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in {
829  defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
830  def : IMAD32_Mul24_Pat<V_MAD_U64_U32_gfx11_e64>;
831}
832
833def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
834  let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
835                          IntOpSelMods:$src1_modifiers, SSrc_b32:$src1,
836                          IntOpSelMods:$src2_modifiers, SSrc_b32:$src2,
837                          VGPR_32:$vdst_in, op_sel0:$op_sel);
838  let HasClamp = 0;
839  let HasExtVOP3DPP = 0;
840  let HasExtDPP = 0;
841}
842
843def VOP3_PERMLANE_VAR_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, untyped]>, VOP3_OPSEL> {
844  let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
845                          IntOpSelMods:$src1_modifiers, VRegSrc_32:$src1,
846                          VGPR_32:$vdst_in, op_sel0:$op_sel);
847  let HasClamp = 0;
848  let HasExtVOP3DPP = 0;
849  let HasExtDPP = 0;
850}
851
852def opsel_i1timm : SDNodeXForm<timm, [{
853  return CurDAG->getTargetConstant(
854      N->getZExtValue() ? SISrcMods::OP_SEL_0 : SISrcMods::NONE,
855      SDLoc(N), MVT::i32);
856}]>;
857def gi_opsel_i1timm : GICustomOperandRenderer<"renderOpSelTImm">,
858  GISDNodeXFormEquiv<opsel_i1timm>;
859
860class SrcAndDstSelToOpSelXForm<int modifier_idx, bit dest_sel> : SDNodeXForm<timm, [{
861  unsigned Val = N->getZExtValue();
862  unsigned New = 0;
863  if (}] # modifier_idx # [{ == 0) {
864    New = (}] # dest_sel # [{ == 1) ? ((Val & 0x2) ? (SISrcMods::OP_SEL_0 | SISrcMods::DST_OP_SEL) : SISrcMods::DST_OP_SEL)
865                                    : ((Val & 0x2) ? SISrcMods::OP_SEL_0 : SISrcMods::NONE);
866  } else if (}] # modifier_idx # [{== 1 || }] # modifier_idx # [{ == 2) {
867      New = (Val & 0x1) ? SISrcMods::OP_SEL_0 : SISrcMods::NONE;
868  }
869  return CurDAG->getTargetConstant(New, SDLoc(N), MVT::i32);
870}]>;
871
872def SrcAndDstSelToOpSelXForm_0_0 : SrcAndDstSelToOpSelXForm<0,0>;
873def SrcAndDstSelToOpSelXForm_0_1 : SrcAndDstSelToOpSelXForm<0,1>;
874def SrcAndDstSelToOpSelXForm_1_0 : SrcAndDstSelToOpSelXForm<1,0>;
875def SrcAndDstSelToOpSelXForm_1_1 : SrcAndDstSelToOpSelXForm<1,1>;
876def SrcAndDstSelToOpSelXForm_2_0 : SrcAndDstSelToOpSelXForm<2,0>;
877
878// The global isel renderer has no way to access the templatized args of (SrcAndDstSelToOpSelXForm) in
879// renderer C++ APIs. Therefore, combinations of modifier_idx & dest_sel are embedded in renderer name itself.
880// FixMe: Avoid combinations of modifier_idx & dest_sel for global isel cases.
881def gi_SrcAndDstSelToOpSelXForm_0_0 : GICustomOperandRenderer<"renderSrcAndDstSelToOpSelXForm_0_0">,
882  GISDNodeXFormEquiv<SrcAndDstSelToOpSelXForm_0_0>;
883def gi_SrcAndDstSelToOpSelXForm_0_1 : GICustomOperandRenderer<"renderSrcAndDstSelToOpSelXForm_0_1">,
884  GISDNodeXFormEquiv<SrcAndDstSelToOpSelXForm_0_1>;
885def gi_SrcAndDstSelToOpSelXForm_1_0 : GICustomOperandRenderer<"renderSrcAndDstSelToOpSelXForm_1_0">,
886  GISDNodeXFormEquiv<SrcAndDstSelToOpSelXForm_1_0>;
887def gi_SrcAndDstSelToOpSelXForm_1_1 : GICustomOperandRenderer<"renderSrcAndDstSelToOpSelXForm_1_1">,
888  GISDNodeXFormEquiv<SrcAndDstSelToOpSelXForm_1_1>;
889def gi_SrcAndDstSelToOpSelXForm_2_0 : GICustomOperandRenderer<"renderSrcAndDstSelToOpSelXForm_2_0">,
890  GISDNodeXFormEquiv<SrcAndDstSelToOpSelXForm_2_0>;
891
892def DstSelToOpSelXForm : SDNodeXForm<timm, [{
893  return CurDAG->getTargetConstant(
894      N->getZExtValue() ? SISrcMods::DST_OP_SEL : SISrcMods::NONE,
895      SDLoc(N), MVT::i32);
896}]>;
897def gi_DstSelToOpSelXForm : GICustomOperandRenderer<"renderDstSelToOpSelXForm">,
898  GISDNodeXFormEquiv<DstSelToOpSelXForm>;
899
900def SrcSelToOpSelXForm : SDNodeXForm<timm, [{
901  return CurDAG->getTargetConstant(
902      N->getZExtValue() ? SISrcMods::OP_SEL_0 : SISrcMods::NONE,
903      SDLoc(N), MVT::i32);
904}]>;
905def gi_SrcSelToOpSelXForm : GICustomOperandRenderer<"renderSrcSelToOpSelXForm">,
906  GISDNodeXFormEquiv<SrcSelToOpSelXForm>;
907
908def DstSelToOpSel3XForm : SDNodeXForm<timm, [{
909  uint32_t V = N->getZExtValue();
910  return CurDAG->getTargetConstant(
911      (V & 0x2) ? SISrcMods::DST_OP_SEL : SISrcMods::NONE,
912      SDLoc(N), MVT::i32);
913}]>;
914def gi_DstSelToOpSel3XForm : GICustomOperandRenderer<"renderDstSelToOpSel3XFormXForm">,
915  GISDNodeXFormEquiv<DstSelToOpSel3XForm>;
916
917class PermlanePat<SDPatternOperator permlane,
918  Instruction inst, ValueType vt> : GCNPat<
919  (vt (permlane vt:$vdst_in, vt:$src0, i32:$src1, i32:$src2,
920            timm:$fi, timm:$bc)),
921  (inst (opsel_i1timm $fi), VGPR_32:$src0, (opsel_i1timm $bc),
922        SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in)
923>;
924
925class PermlaneVarPat<SDPatternOperator permlane,
926  Instruction inst> : GCNPat<
927  (permlane i32:$vdst_in, i32:$src0, i32:$src1,
928            timm:$fi, timm:$bc),
929  (inst (opsel_i1timm $fi), VGPR_32:$src0, (opsel_i1timm $bc),
930        VGPR_32:$src1, VGPR_32:$vdst_in)
931>;
932
933class VOP3_BITOP3_Profile<VOPProfile pfl, VOP3Features f> : VOP3_Profile<pfl, f> {
934  let HasClamp = 0;
935  let HasOMod = 0;
936  let HasModifiers = 0;
937
938  let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
939                            0 /* HasIntClamp */, HasModifiers, HasSrc2Mods,
940                            HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret,
941                   (ins bitop3_0:$bitop3));
942
943  let InsVOP3OpSel = !con(getInsVOP3Base<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 0, 1, 1, 0,
944                                         Src0Mod, Src1Mod, Src2Mod, 0>.ret,
945                          (ins bitop3_0:$bitop3, op_sel0:$op_sel));
946
947  let Asm64 = "$vdst, $src0, $src1, $src2$bitop3";
948  let AsmVOP3OpSel = !subst("$op_sel", "$bitop3$op_sel", getAsmVOP3OpSel<3, 0, 0, 0, 0, 0>.ret);
949}
950
951class VOP3_CVT_SCALE_F1632_FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
952                                              VOP3_OPSEL> {
953  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
954                          FP32InputMods:$src1_modifiers, Src1RC64:$src1,
955                          op_sel0:$op_sel);
956  let HasClamp = 0;
957  let HasSrc2 = 0;
958  let HasSrc2Mods = 0;
959  let HasExtVOP3DPP = 0;
960  let HasOpSel = 1;
961  let HasOMod = 0;
962}
963
964class VOP3_CVT_SCALE_F1632_FP8BF8_TiedInput_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
965  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
966                          FP32InputMods:$src1_modifiers, Src1RC64:$src1,
967                          VGPR_32:$vdst_in, op_sel0:$op_sel);
968  let HasClamp = 0;
969  let HasSrc2 = 0;
970  let HasSrc2Mods = 0;
971  let HasExtVOP3DPP = 0;
972  let HasOpSel = 1;
973  let HasOMod = 0;
974}
975
976class VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
977  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
978                          FP32InputMods:$src1_modifiers, Src1RC64:$src1,
979                          FP32InputMods:$src2_modifiers, Src2RC64:$src2,
980                          VGPR_32:$vdst_in, op_sel0:$op_sel);
981  let HasClamp = 0;
982  let HasExtVOP3DPP = 0;
983  let HasOpSel = 1;
984  let HasOMod = 0;
985}
986
987class VOP3_CVT_SCALE_FP4_F32_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<P> {
988  let HasFP8DstByteSel = 1;
989}
990
991class VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<P> {
992  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
993                          Int32InputMods:$src1_modifiers, Src1RC64:$src1,
994                          FP32InputMods:$src2_modifiers, Src2RC64:$src2,
995                          VGPR_32:$vdst_in, op_sel0:$op_sel);
996  let HasFP8DstByteSel = 1;
997}
998
999
1000class VOP3_CVT_SCALE_FP4_F16BF16_TiedInput_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
1001  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
1002                          FP32InputMods:$src1_modifiers, Src1RC64:$src1,
1003                          FP32InputMods:$src2_modifiers, VGPR_32:$src2,
1004                          op_sel0:$op_sel);
1005  let HasClamp = 0;
1006  let HasSrc2 = 0;
1007  let HasSrc2Mods = 1;
1008  let HasOpSel = 1;
1009  let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
1010                            getAsmVOP3OpSel<3, HasClamp, HasOMod,
1011                                            HasSrc0FloatMods, HasSrc1FloatMods,
1012                                            HasSrc2FloatMods>.ret);
1013  let HasExtVOP3DPP = 0;
1014  let HasFP8DstByteSel = 1;
1015}
1016
1017class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
1018    VOP3_Profile<VOPProfile<[i32, Src0Ty, i32, f32]>, VOP3_OPSEL> {
1019  let InsVOP3OpSel = (ins PackedF16InputMods: $src0_modifiers, Src0RC64:$src0,
1020                          Int32InputMods:     $src1_modifiers, Src1RC64:$src1,
1021                          FP32InputMods:      $src2_modifiers, Src2RC64:$src2,
1022                          VGPR_32:$vdst_in,   op_sel0:$op_sel);
1023  let HasClamp = 0;
1024  let HasExtVOP3DPP = 0;
1025  let HasOpSel = 1;
1026  let HasOMod = 0;
1027  let HasFP4DstByteSel = 1;
1028}
1029
1030def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32, v2f32, i32, f32]>, VOP3_OPSEL> {
1031  let InsVOP3OpSel = (ins PackedF32InputMods: $src0_modifiers, Src0RC64:$src0,
1032                          Int32InputMods:     $src1_modifiers, Src1RC64:$src1,
1033                          FP32InputMods:      $src2_modifiers, Src2RC64:$src2,
1034                          VGPR_32:$vdst_in,   op_sel0:$op_sel);
1035  let HasClamp = 0;
1036  let HasExtVOP3DPP = 0;
1037  let HasOpSel = 1;
1038  let HasOMod = 0;
1039  let HasFP4DstByteSel = 1;
1040}
1041
1042class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
1043                                              VOP3_OPSEL> {
1044  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
1045                          FP32InputMods:$src1_modifiers, Src1RC64:$src1,
1046                          op_sel0:$op_sel);
1047  let HasClamp = 0;
1048  let HasSrc2 = 0;
1049  let HasSrc2Mods = 0;
1050  let HasExtVOP3DPP = 0;
1051  let HasOpSel = 1;
1052  let HasOMod = 0;
1053}
1054
1055class VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_TiedInput_Profile<VOPProfile P> : VOP3_Profile<P,VOP3_OPSEL> {
1056  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
1057                          FP32InputMods:$src1_modifiers, Src1RC64:$src1,
1058                          VGPR_32:$vdst_in, op_sel0:$op_sel);
1059  let HasClamp = 0;
1060  let HasSrc2 = 0;
1061  let HasSrc2Mods = 0;
1062  let HasExtVOP3DPP = 0;
1063  let HasOpSel = 1;
1064  let HasOMod = 0;
1065}
1066
1067class VOP3_CVT_SCALEF32_PK_F864_Profile<VOPProfile P> : VOP3_Profile<P> {
1068  let HasModifiers = 0;
1069  let HasSrc0IntMods = 0;
1070  let HasSrc1IntMods = 0;
1071  let HasOMod = 0;
1072  let HasOpSel = 0;
1073  let HasClamp = 0;
1074  let HasExtDPP = 0;
1075  let HasExt32BitDPP = 0;
1076  let HasExtVOP3DPP = 0;
1077  let HasExt64BitDPP = 0;
1078}
1079
1080let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
1081  let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
1082    defm V_CVT_SCALEF32_SR_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_sr_fp8_bf16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_BF16_I32_F32>>;
1083    defm V_CVT_SCALEF32_SR_FP8_F16 : VOP3Inst<"v_cvt_scalef32_sr_fp8_f16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F16_I32_F32>>;
1084    defm V_CVT_SCALEF32_SR_FP8_F32 : VOP3Inst<"v_cvt_scalef32_sr_fp8_f32", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F32_I32_F32>>;
1085    defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_TiedInput_Profile<VOP_V2F16_I32_F32>>;
1086    defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOP_V2I16_F32_F32_F32>>;
1087    defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_TiedInput_Profile<VOP_V2I16_V2F16_F32>>;
1088    defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_TiedInput_Profile<VOP_V2I16_V2BF16_F32>>;
1089  }
1090  defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
1091  defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
1092  defm V_CVT_SCALEF32_PK_F16_FP8    : VOP3Inst<"v_cvt_scalef32_pk_f16_fp8",  VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
1093  defm V_CVT_SCALEF32_PK_BF16_FP8   : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
1094}
1095
1096let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
1097  let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
1098    defm V_CVT_SCALEF32_SR_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_sr_bf8_bf16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_BF16_I32_F32>>;
1099    defm V_CVT_SCALEF32_SR_BF8_F16 : VOP3Inst<"v_cvt_scalef32_sr_bf8_f16", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F16_I32_F32>>;
1100    defm V_CVT_SCALEF32_SR_BF8_F32 : VOP3Inst<"v_cvt_scalef32_sr_bf8_f32", VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOP_I32_F32_I32_F32>>;
1101    defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_TiedInput_Profile<VOP_V2F16_I32_F32>>;
1102    defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOP_V2I16_F32_F32_F32>>;
1103    defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_TiedInput_Profile<VOP_V2I16_V2F16_F32>>;
1104    defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_TiedInput_Profile<VOP_V2I16_V2BF16_F32>>;
1105  }
1106  defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
1107  defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
1108  defm V_CVT_SCALEF32_PK_F16_BF8    : VOP3Inst<"v_cvt_scalef32_pk_f16_bf8",  VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
1109  defm V_CVT_SCALEF32_PK_BF16_BF8   : VOP3Inst<"v_cvt_scalef32_pk_bf16_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
1110}
1111
1112let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
1113  defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
1114  let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
1115    defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4_F32_TiedInput_Profile<VOP_I32_F32_F32_F32>>;
1116    let Constraints = "@earlyclobber $vdst" in {
1117      defm V_CVT_SCALEF32_SR_PK_FP4_F16:  VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2f16>>;
1118      defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_bf16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2bf16>>;
1119      defm V_CVT_SCALEF32_SR_PK_FP4_F32:  VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32", VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile>;
1120    }
1121  }
1122  defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
1123  defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
1124
1125  // These instructions have non-standard use of op_sel. In particular they are
1126  // using op_sel bits 2 and 3 while only having two sources.
1127  let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in {
1128    defm V_CVT_SCALEF32_PK_FP4_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f16", VOP3_CVT_SCALE_FP4_F16BF16_TiedInput_Profile<VOP_I32_V2F16_F32_F32>>;
1129    defm V_CVT_SCALEF32_PK_FP4_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp4_bf16", VOP3_CVT_SCALE_FP4_F16BF16_TiedInput_Profile<VOP_I32_V2BF16_F32_F32>>;
1130  }
1131}
1132
1133let SubtargetPredicate = HasFP6BF6ConversionScaleInsts, mayRaiseFPException = 0, Constraints = "@earlyclobber $vdst" in {
1134  defm V_CVT_SCALEF32_PK32_F32_FP6  : VOP3Inst<"v_cvt_scalef32_pk32_f32_fp6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F32_V6I32_F32>, int_amdgcn_cvt_scalef32_pk32_f32_fp6>;
1135  defm V_CVT_SCALEF32_PK32_F32_BF6  : VOP3Inst<"v_cvt_scalef32_pk32_f32_bf6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F32_V6I32_F32>, int_amdgcn_cvt_scalef32_pk32_f32_bf6>;
1136  defm V_CVT_SCALEF32_PK32_F16_FP6  : VOP3Inst<"v_cvt_scalef32_pk32_f16_fp6",  VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F16_V6I32_F32>, int_amdgcn_cvt_scalef32_pk32_f16_fp6>;
1137  defm V_CVT_SCALEF32_PK32_BF16_FP6 : VOP3Inst<"v_cvt_scalef32_pk32_bf16_fp6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32BF16_V6I32_F32>, int_amdgcn_cvt_scalef32_pk32_bf16_fp6>;
1138  defm V_CVT_SCALEF32_PK32_F16_BF6  : VOP3Inst<"v_cvt_scalef32_pk32_f16_bf6",  VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F16_V6I32_F32>, int_amdgcn_cvt_scalef32_pk32_f16_bf6>;
1139  defm V_CVT_SCALEF32_PK32_BF16_BF6 : VOP3Inst<"v_cvt_scalef32_pk32_bf16_bf6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32BF16_V6I32_F32>, int_amdgcn_cvt_scalef32_pk32_bf16_bf6>;
1140}
1141
1142let SubtargetPredicate = HasF16BF16ToFP6BF6ConversionScaleInsts, mayRaiseFPException = 0, Constraints = "@earlyclobber $vdst" in {
1143  defm V_CVT_SCALEF32_PK32_FP6_F16   : VOP3Inst<"v_cvt_scalef32_pk32_fp6_f16",  VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F16_F32>,  int_amdgcn_cvt_scalef32_pk32_fp6_f16>;
1144  defm V_CVT_SCALEF32_PK32_BF6_F16   : VOP3Inst<"v_cvt_scalef32_pk32_bf6_f16",  VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F16_F32>,  int_amdgcn_cvt_scalef32_pk32_bf6_f16>;
1145  defm V_CVT_SCALEF32_PK32_FP6_BF16  : VOP3Inst<"v_cvt_scalef32_pk32_fp6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_F32>, int_amdgcn_cvt_scalef32_pk32_fp6_bf16>;
1146  defm V_CVT_SCALEF32_PK32_BF6_BF16  : VOP3Inst<"v_cvt_scalef32_pk32_bf6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_F32>, int_amdgcn_cvt_scalef32_pk32_bf6_bf16>;
1147  defm V_CVT_SCALEF32_SR_PK32_BF6_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk32_bf6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16>;
1148  defm V_CVT_SCALEF32_SR_PK32_BF6_F16  : VOP3Inst<"v_cvt_scalef32_sr_pk32_bf6_f16",  VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F16_I32_F32>,  int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16>;
1149  defm V_CVT_SCALEF32_SR_PK32_BF6_F32  : VOP3Inst<"v_cvt_scalef32_sr_pk32_bf6_f32",  VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F32_I32_F32>,  int_amdgcn_cvt_scalef32_sr_pk32_bf6_f32>;
1150  defm V_CVT_SCALEF32_SR_PK32_FP6_BF16 : VOP3Inst<"v_cvt_scalef32_sr_pk32_fp6_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32BF16_I32_F32>, int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16>;
1151  defm V_CVT_SCALEF32_SR_PK32_FP6_F16  : VOP3Inst<"v_cvt_scalef32_sr_pk32_fp6_f16",  VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F16_I32_F32>,  int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16>;
1152  defm V_CVT_SCALEF32_SR_PK32_FP6_F32  : VOP3Inst<"v_cvt_scalef32_sr_pk32_fp6_f32",  VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V32F32_I32_F32>,  int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32>;
1153}
1154
1155let SubtargetPredicate = HasGFX950Insts, mayRaiseFPException = 0 in {
1156  defm V_CVT_SCALEF32_2XPK16_FP6_F32 : VOP3Inst<"v_cvt_scalef32_2xpk16_fp6_f32",  VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V16F32_V16F32_F32>,  int_amdgcn_cvt_scalef32_2xpk16_fp6_f32>;
1157  defm V_CVT_SCALEF32_2XPK16_BF6_F32 : VOP3Inst<"v_cvt_scalef32_2xpk16_bf6_f32",  VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V6I32_V16F32_V16F32_F32>,  int_amdgcn_cvt_scalef32_2xpk16_bf6_f32>;
1158}
1159
1160let SubtargetPredicate = HasCvtPkF16F32Inst in {
1161  let ReadsModeReg = 0 in {
1162    defm V_CVT_PK_F16_F32 : VOP3Inst<"v_cvt_pk_f16_f32", VOP3_Profile<VOP_V2F16_F32_F32>>;
1163  }
1164
1165  def : GCNPat<(v2f16 (fpround v2f32:$src)),
1166               (V_CVT_PK_F16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>;
1167  def : GCNPat<(v2f16 (fpround v2f64:$src)),
1168               (V_CVT_PK_F16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)),
1169                                     0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>;
1170  def : GCNPat<(v2f16 (build_vector (f16 (fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1171                                    (f16 (fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)))))),
1172               (V_CVT_PK_F16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>;
1173}
1174
1175class Cvt_Scale_FP4FP8BF8ToF16F32_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType DstTy> : GCNPat<
1176    (DstTy (node i32:$src0, f32:$src1, timm:$index)),
1177    (inst (SrcAndDstSelToOpSelXForm_0_0 $index), $src0, (SrcAndDstSelToOpSelXForm_1_0 $index), $src1)
1178>;
1179def : Cvt_Scale_FP4FP8BF8ToF16F32_Pat<int_amdgcn_cvt_scalef32_f32_fp8, V_CVT_SCALEF32_F32_FP8_e64, f32>;
1180def : Cvt_Scale_FP4FP8BF8ToF16F32_Pat<int_amdgcn_cvt_scalef32_f32_bf8, V_CVT_SCALEF32_F32_BF8_e64, f32>;
1181def : Cvt_Scale_FP4FP8BF8ToF16F32_Pat<int_amdgcn_cvt_scalef32_pk_f16_fp4, V_CVT_SCALEF32_PK_F16_FP4_e64, v2f16>;
1182def : Cvt_Scale_FP4FP8BF8ToF16F32_Pat<int_amdgcn_cvt_scalef32_pk_bf16_fp4, V_CVT_SCALEF32_PK_BF16_FP4_e64, v2bf16>;
1183
1184class Cvt_Scale_FP8BF8ToF16_Pat<SDPatternOperator node, VOP3_Pseudo inst, int dst_sel> : GCNPat<
1185    (v2f16 (node v2f16:$vdst_in, i32:$src0, f32:$src1, timm:$src_sel, dst_sel)),
1186    (inst !if(!eq(dst_sel, 0), (SrcAndDstSelToOpSelXForm_0_0 $src_sel), (SrcAndDstSelToOpSelXForm_0_1 $src_sel)), $src0,
1187          !if(!eq(dst_sel, 0), (SrcAndDstSelToOpSelXForm_1_0 $src_sel), (SrcAndDstSelToOpSelXForm_1_1 $src_sel)), $src1, VGPR_32:$vdst_in)
1188>;
1189foreach DstSel = [0, -1] in {
1190  def : Cvt_Scale_FP8BF8ToF16_Pat<int_amdgcn_cvt_scalef32_f16_fp8, V_CVT_SCALEF32_F16_FP8_e64, DstSel>;
1191  def : Cvt_Scale_FP8BF8ToF16_Pat<int_amdgcn_cvt_scalef32_f16_bf8, V_CVT_SCALEF32_F16_BF8_e64, DstSel>;
1192}
1193
1194class Cvt_Scale_PK_F32ToFP8BF8_Pat<SDPatternOperator node, VOP3_Pseudo inst> : GCNPat<
1195    (v2i16 (node v2i16:$vdst_in, f32:$src0, f32:$src1, f32:$src2, timm:$word_sel)),
1196    (inst (DstSelToOpSelXForm $word_sel), $src0, 0, $src1, 0, $src2, VGPR_32:$vdst_in)
1197>;
1198def : Cvt_Scale_PK_F32ToFP8BF8_Pat<int_amdgcn_cvt_scalef32_pk_fp8_f32, V_CVT_SCALEF32_PK_FP8_F32_e64>;
1199def : Cvt_Scale_PK_F32ToFP8BF8_Pat<int_amdgcn_cvt_scalef32_pk_bf8_f32, V_CVT_SCALEF32_PK_BF8_F32_e64>;
1200
1201class Cvt_Scale_PK_FP8BF8ToF16F32_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType DstTy> : GCNPat<
1202    (DstTy (node i32:$src0, f32:$src1, timm:$word_sel)),
1203    (inst (SrcSelToOpSelXForm $word_sel), $src0, 0, $src1)
1204>;
1205def : Cvt_Scale_PK_FP8BF8ToF16F32_Pat<int_amdgcn_cvt_scalef32_pk_f32_fp8, V_CVT_SCALEF32_PK_F32_FP8_e64, v2f32>;
1206def : Cvt_Scale_PK_FP8BF8ToF16F32_Pat<int_amdgcn_cvt_scalef32_pk_f32_bf8, V_CVT_SCALEF32_PK_F32_BF8_e64, v2f32>;
1207def : Cvt_Scale_PK_FP8BF8ToF16F32_Pat<int_amdgcn_cvt_scalef32_pk_f16_bf8, V_CVT_SCALEF32_PK_F16_BF8_e64, v2f16>;
1208def : Cvt_Scale_PK_FP8BF8ToF16F32_Pat<int_amdgcn_cvt_scalef32_pk_bf16_bf8, V_CVT_SCALEF32_PK_BF16_BF8_e64, v2bf16>;
1209def : Cvt_Scale_PK_FP8BF8ToF16F32_Pat<int_amdgcn_cvt_scalef32_pk_f16_fp8, V_CVT_SCALEF32_PK_F16_FP8_e64, v2f16>;
1210def : Cvt_Scale_PK_FP8BF8ToF16F32_Pat<int_amdgcn_cvt_scalef32_pk_bf16_fp8, V_CVT_SCALEF32_PK_BF16_FP8_e64, v2bf16>;
1211
1212class Cvt_Scale_PK_F16BF16ToFP8BF8_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType SrcTy> : GCNPat<
1213    (v2i16 (node v2i16:$vdst_in, SrcTy:$src0, f32:$src1, timm:$word_sel)),
1214    (inst (DstSelToOpSelXForm $word_sel), $src0, 0, $src1, VGPR_32:$vdst_in)
1215>;
1216def : Cvt_Scale_PK_F16BF16ToFP8BF8_Pat<int_amdgcn_cvt_scalef32_pk_fp8_f16, V_CVT_SCALEF32_PK_FP8_F16_e64, v2f16>;
1217def : Cvt_Scale_PK_F16BF16ToFP8BF8_Pat<int_amdgcn_cvt_scalef32_pk_fp8_bf16, V_CVT_SCALEF32_PK_FP8_BF16_e64, v2bf16>;
1218def : Cvt_Scale_PK_F16BF16ToFP8BF8_Pat<int_amdgcn_cvt_scalef32_pk_bf8_f16, V_CVT_SCALEF32_PK_BF8_F16_e64, v2f16>;
1219def : Cvt_Scale_PK_F16BF16ToFP8BF8_Pat<int_amdgcn_cvt_scalef32_pk_bf8_bf16, V_CVT_SCALEF32_PK_BF8_BF16_e64, v2bf16>;
1220
1221class Cvt_Scale_PK_F32ToFP4_Pat<SDPatternOperator node, VOP3_Pseudo inst> : GCNPat<
1222    (i32 (node i32:$vdst_in, f32:$src0, f32:$src1, f32:$src2, timm:$index)),
1223    (inst (DstSelToOpSel3XForm $index), $src0, 0, $src1, (SrcAndDstSelToOpSelXForm_2_0 $index), $src2, VGPR_32:$vdst_in)
1224>;
1225def : Cvt_Scale_FP4FP8BF8ToF16F32_Pat<int_amdgcn_cvt_scalef32_pk_f32_fp4, V_CVT_SCALEF32_PK_F32_FP4_e64, v2f32>;
1226def : Cvt_Scale_PK_F32ToFP4_Pat<int_amdgcn_cvt_scalef32_pk_fp4_f32, V_CVT_SCALEF32_PK_FP4_F32_e64>;
1227
1228class Cvt_Scale_PK_F16ToFP4_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType SrcTy> : GCNPat<
1229    (i32 (node i32:$src2, SrcTy:$src0, f32:$src1, timm:$index)),
1230    (inst (DstSelToOpSel3XForm $index), $src0, 0, $src1, (SrcAndDstSelToOpSelXForm_2_0 $index), $src2)
1231>;
1232def : Cvt_Scale_PK_F16ToFP4_Pat<int_amdgcn_cvt_scalef32_pk_fp4_f16, V_CVT_SCALEF32_PK_FP4_F16_e64, v2f16>;
1233def : Cvt_Scale_PK_F16ToFP4_Pat<int_amdgcn_cvt_scalef32_pk_fp4_bf16, V_CVT_SCALEF32_PK_FP4_BF16_e64, v2bf16>;
1234
1235class Cvt_Scale_SR_PK_BF16F16F32ToFP4BF8FP8_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType SrcTy> : GCNPat<
1236    (i32 (node i32:$vdst_in, SrcTy:$src0, i32:$src1, f32:$src2, timm:$index)),
1237    (inst (DstSelToOpSel3XForm $index), $src0, 0, $src1, (SrcAndDstSelToOpSelXForm_2_0 $index), $src2, VGPR_32:$vdst_in)
1238>;
1239def : Cvt_Scale_SR_PK_BF16F16F32ToFP4BF8FP8_Pat<int_amdgcn_cvt_scalef32_sr_pk_fp4_f16, V_CVT_SCALEF32_SR_PK_FP4_F16_e64, v2f16>;
1240def : Cvt_Scale_SR_PK_BF16F16F32ToFP4BF8FP8_Pat<int_amdgcn_cvt_scalef32_sr_pk_fp4_bf16, V_CVT_SCALEF32_SR_PK_FP4_BF16_e64, v2bf16>;
1241def : Cvt_Scale_SR_PK_BF16F16F32ToFP4BF8FP8_Pat<int_amdgcn_cvt_scalef32_sr_pk_fp4_f32, V_CVT_SCALEF32_SR_PK_FP4_F32_e64, v2f32>;
1242def : Cvt_Scale_SR_PK_BF16F16F32ToFP4BF8FP8_Pat<int_amdgcn_cvt_scalef32_sr_bf8_bf16, V_CVT_SCALEF32_SR_BF8_BF16_e64, bf16>;
1243def : Cvt_Scale_SR_PK_BF16F16F32ToFP4BF8FP8_Pat<int_amdgcn_cvt_scalef32_sr_bf8_f16, V_CVT_SCALEF32_SR_BF8_F16_e64, f16>;
1244def : Cvt_Scale_SR_PK_BF16F16F32ToFP4BF8FP8_Pat<int_amdgcn_cvt_scalef32_sr_bf8_f32, V_CVT_SCALEF32_SR_BF8_F32_e64, f32>;
1245def : Cvt_Scale_SR_PK_BF16F16F32ToFP4BF8FP8_Pat<int_amdgcn_cvt_scalef32_sr_fp8_bf16, V_CVT_SCALEF32_SR_FP8_BF16_e64, bf16>;
1246def : Cvt_Scale_SR_PK_BF16F16F32ToFP4BF8FP8_Pat<int_amdgcn_cvt_scalef32_sr_fp8_f16, V_CVT_SCALEF32_SR_FP8_F16_e64, f16>;
1247def : Cvt_Scale_SR_PK_BF16F16F32ToFP4BF8FP8_Pat<int_amdgcn_cvt_scalef32_sr_fp8_f32, V_CVT_SCALEF32_SR_FP8_F32_e64, f32>;
1248
1249let SubtargetPredicate = isGFX10Plus in {
1250  let isCommutable = 1, isReMaterializable = 1 in {
1251    defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
1252  } // End isCommutable = 1, isReMaterializable = 1
1253  def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32_e64>;
1254
1255  let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
1256    defm V_PERMLANE16_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile>;
1257    defm V_PERMLANEX16_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile>;
1258  } // End $vdst = $vdst_in, DisableEncoding $vdst_in
1259
1260  foreach vt = Reg32Types.types in {
1261    def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64, vt>;
1262    def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>;
1263  }
1264  let isCommutable = 1 in {
1265    defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>;
1266  } // End isCommutable = 1
1267  defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>;
1268
1269} // End SubtargetPredicate = isGFX10Plus
1270
1271let True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus in {
1272   def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>;
1273   def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>;
1274   // Undo sub x, c -> add x, -c canonicalization since c is more likely
1275   // an inline immediate than -c.
1276   def : GCNPat<
1277     (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
1278     (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
1279   >;
1280} // End True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus
1281
1282let True16Predicate = UseRealTrue16Insts in {
1283  def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_t16_e64>;
1284  def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_t16_e64>;
1285  def : GCNPat<
1286    (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
1287    (V_SUB_NC_U16_t16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
1288  >;
1289} // End True16Predicate = UseRealTrue16Insts
1290
1291let True16Predicate = UseFakeTrue16Insts in {
1292   def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_fake16_e64>;
1293   def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_fake16_e64>;
1294   def : GCNPat<
1295     (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
1296     (V_SUB_NC_U16_fake16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
1297   >;
1298} // End True16Predicate = UseFakeTrue16Insts
1299
1300let SubtargetPredicate = isGFX12Plus in {
1301  let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
1302    defm V_PERMLANE16_VAR_B32  : VOP3Inst<"v_permlane16_var_b32",  VOP3_PERMLANE_VAR_Profile>;
1303    defm V_PERMLANEX16_VAR_B32 : VOP3Inst<"v_permlanex16_var_b32", VOP3_PERMLANE_VAR_Profile>;
1304  } // End $vdst = $vdst_in, DisableEncoding $vdst_in
1305
1306  def : PermlaneVarPat<int_amdgcn_permlane16_var,  V_PERMLANE16_VAR_B32_e64>;
1307  def : PermlaneVarPat<int_amdgcn_permlanex16_var, V_PERMLANEX16_VAR_B32_e64>;
1308
1309} // End SubtargetPredicate = isGFX12Plus
1310
1311let SubtargetPredicate = HasBitOp3Insts  in {
1312  let isReMaterializable = 1 in {
1313    defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16",
1314                                  VOP3_BITOP3_Profile<VOPProfile <[i16, i16, i16, i16, i32]>, VOP3_OPSEL>>;
1315    defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32",
1316                                  VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i32]>, VOP3_REGULAR>>;
1317  }
1318  def : GCNPat<
1319    (i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
1320    (i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
1321  >;
1322
1323  def : GCNPat<
1324    (i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
1325    (i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
1326  >;
1327
1328  def : GCNPat<
1329    (i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
1330    (i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
1331  >;
1332
1333  def : GCNPat<
1334    (i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
1335    (i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
1336  >;
1337} // End SubtargetPredicate = HasBitOp3Insts
1338
1339class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
1340  (AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
1341                  (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)),
1342                  (vt (VOP3Mods vt:$src2, i32:$src2_modifiers)),
1343                  (i1 CondReg)),
1344  (inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2)
1345>;
1346
1347let WaveSizePredicate = isWave64 in {
1348def : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC>;
1349def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC>;
1350}
1351
1352let WaveSizePredicate = isWave32 in {
1353def : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC_LO>;
1354def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>;
1355}
1356
1357class VOP3_DOT_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
1358  let HasClamp = 0;
1359  let HasOMod = 0;
1360}
1361
1362class VOP3_DOT_Profile_t16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_True16<P, Features> {
1363  let HasClamp = 0;
1364  let HasOMod = 0;
1365  // Override modifiers for bf16(i16) (same as float modifiers).
1366  let HasSrc0Mods = 1;
1367  let HasSrc1Mods = 1;
1368  let HasSrc2Mods = 1;
1369  let Src0ModVOP3DPP = FPVRegInputMods;
1370  let Src1ModVOP3DPP = FP32VCSrcInputMods;
1371  let Src2ModVOP3DPP = FPT16VCSrcInputMods</*IsFake16*/0>;
1372}
1373
1374class VOP3_DOT_Profile_fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP3_Profile_Fake16<P, Features> {
1375  let HasClamp = 0;
1376  let HasOMod = 0;
1377  // Override modifiers for bf16(i16) (same as float modifiers).
1378  let HasSrc0Mods = 1;
1379  let HasSrc1Mods = 1;
1380  let HasSrc2Mods = 1;
1381  let AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
1382   HasOpSel, HasOMod, IsVOP3P, HasModifiers, 1/*HasSrc0Mods*/, 1/*HasSrc1Mods*/,
1383   1/*HasSrc2Mods*/, DstVT>.ret;
1384}
1385
1386let SubtargetPredicate = isGFX11Plus in {
1387  defm V_MAXMIN_F32     : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
1388  defm V_MINMAX_F32     : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
1389  defm V_MAXMIN_F16     : VOP3Inst_t16<"v_maxmin_f16", VOP_F16_F16_F16_F16>;
1390  defm V_MINMAX_F16     : VOP3Inst_t16<"v_minmax_f16", VOP_F16_F16_F16_F16>;
1391  defm V_MAXMIN_U32     : VOP3Inst<"v_maxmin_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
1392  defm V_MINMAX_U32     : VOP3Inst<"v_minmax_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
1393  defm V_MAXMIN_I32     : VOP3Inst<"v_maxmin_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
1394  defm V_MINMAX_I32     : VOP3Inst<"v_minmax_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
1395  defm V_CVT_PK_I16_F32 : VOP3Inst<"v_cvt_pk_i16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
1396  defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>;
1397} // End SubtargetPredicate = isGFX11Plus
1398
1399class VOP3_CVT_SR_FP16_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_F1632_FP8BF8_TiedInput_Profile<P> {
1400  let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
1401                          Int32InputMods:$src1_modifiers, Src1RC64:$src1,
1402                          VGPR_32:$vdst_in, op_sel0:$op_sel);
1403}
1404
1405// FIXME: GlobalISel cannot distinguish f16 and bf16 and may start using bf16 patterns
1406//        instead of less complex f16. Disable GlobalISel for these for now.
1407def bf16_fpround : PatFrag <(ops node:$src0),  (fpround $src0), [{ return true; }]> {
1408  let GISelPredicateCode = [{return false;}];
1409}
1410
1411let SubtargetPredicate = HasBF16ConversionInsts in {
1412  let ReadsModeReg = 0 in {
1413    defm V_CVT_PK_BF16_F32    : VOP3Inst<"v_cvt_pk_bf16_f32", VOP3_Profile<VOP_V2BF16_F32_F32>>;
1414  }
1415  def : GCNPat<(v2bf16 (bf16_fpround v2f32:$src)),
1416               (V_CVT_PK_BF16_F32_e64 0, (EXTRACT_SUBREG VReg_64:$src, sub0), 0, (EXTRACT_SUBREG VReg_64:$src, sub1))>;
1417  def : GCNPat<(v2bf16 (bf16_fpround v2f64:$src)),
1418               (V_CVT_PK_BF16_F32_e64 0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub0_sub1)),
1419                                      0, (V_CVT_F32_F64_e64 0, (EXTRACT_SUBREG VReg_128:$src, sub2_sub3)))>;
1420  def : GCNPat<(v2bf16 (build_vector (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1421                                     (bf16 (bf16_fpround (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)))))),
1422               (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1)>;
1423  def : GCNPat<(bf16 (bf16_fpround (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))),
1424               (V_CVT_PK_BF16_F32_e64 $src0_modifiers, $src0, 0, (f32 (IMPLICIT_DEF)))>;
1425  def : GCNPat<(bf16 (bf16_fpround (f64 (VOP3Mods f64:$src0, i32:$src0_modifiers)))),
1426               (V_CVT_PK_BF16_F32_e64 0, (f32 (V_CVT_F32_F64_e64 $src0_modifiers, $src0)), 0, (f32 (IMPLICIT_DEF)))>;
1427}
1428
1429class Cvt_Scale_Sr_F32ToBF16F16_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType DstTy> : GCNPat<
1430    (DstTy (node DstTy:$vdst_in, f32:$src0, i32:$src1, timm:$word_sel)),
1431    (inst (DstSelToOpSelXForm $word_sel), $src0, 0, $src1, VGPR_32:$vdst_in)
1432>;
1433
1434let SubtargetPredicate = HasF32ToF16BF16ConversionSRInsts in {
1435  let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in {
1436    defm V_CVT_SR_F16_F32   : VOP3Inst<"v_cvt_sr_f16_f32", VOP3_CVT_SR_FP16_TiedInput_Profile<VOP_F16_F32_I32>>;
1437    defm V_CVT_SR_BF16_F32  : VOP3Inst<"v_cvt_sr_bf16_f32", VOP3_CVT_SR_FP16_TiedInput_Profile<VOP_BF16_F32_I32>>;
1438  }
1439  def : Cvt_Scale_Sr_F32ToBF16F16_Pat<int_amdgcn_cvt_sr_bf16_f32, V_CVT_SR_BF16_F32_e64, v2bf16>;
1440  def : Cvt_Scale_Sr_F32ToBF16F16_Pat<int_amdgcn_cvt_sr_f16_f32, V_CVT_SR_F16_F32_e64, v2f16>;
1441}
1442
1443let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
1444  defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
1445  defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
1446  defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16<"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;
1447  defm V_MINIMUMMAXIMUM_F16 : VOP3Inst_t16<"v_minimummaximum_f16", VOP_F16_F16_F16_F16>;
1448} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
1449
1450let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
1451  defm V_DOT2_F16_F16 :   VOP3Inst_t16_with_profiles<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>,
1452                                                      VOP3_DOT_Profile_t16<VOP_F16_V2F16_V2F16_F16>,
1453                                                      VOP3_DOT_Profile_fake16<VOP_F16_V2F16_V2F16_F16>,
1454                                                      int_amdgcn_fdot2_f16_f16>;
1455  defm V_DOT2_BF16_BF16 : VOP3Inst_t16_with_profiles<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_BF16_V2BF16_V2BF16_BF16>,
1456                                                      VOP3_DOT_Profile_t16<VOP_BF16_V2BF16_V2BF16_BF16>,
1457                                                      VOP3_DOT_Profile_fake16<VOP_BF16_V2BF16_V2BF16_BF16>,
1458                                                      int_amdgcn_fdot2_bf16_bf16>;
1459}
1460
1461class VOP_Pseudo_Scalar<RegisterClass Dst, RegisterOperand SrcOp,
1462                        ValueType dstVt, ValueType srcVt = dstVt>
1463    : VOPProfile<[dstVt, srcVt, untyped, untyped]> {
1464  let DstRC = VOPDstOperand<Dst>;
1465  let Src0RC64 = SrcOp;
1466
1467  let HasOMod = 1;
1468  let HasModifiers = 1;
1469}
1470
1471def VOP_Pseudo_Scalar_F32 : VOP_Pseudo_Scalar<SReg_32_XEXEC, SSrc_f32, f32>;
1472def VOP_Pseudo_Scalar_F16 : VOP_Pseudo_Scalar<SReg_32_XEXEC, SSrc_f16, f32, f16>;
1473
1474let SubtargetPredicate = HasPseudoScalarTrans, TRANS = 1,
1475    isReMaterializable = 1, SchedRW = [WritePseudoScalarTrans] in {
1476  defm V_S_EXP_F32  : VOP3PseudoScalarInst<"v_s_exp_f32", VOP_Pseudo_Scalar_F32, AMDGPUexp>;
1477  defm V_S_EXP_F16  : VOP3PseudoScalarInst<"v_s_exp_f16", VOP_Pseudo_Scalar_F16>;
1478  defm V_S_LOG_F32  : VOP3PseudoScalarInst<"v_s_log_f32", VOP_Pseudo_Scalar_F32, AMDGPUlog>;
1479  defm V_S_LOG_F16  : VOP3PseudoScalarInst<"v_s_log_f16", VOP_Pseudo_Scalar_F16>;
1480  defm V_S_RCP_F32  : VOP3PseudoScalarInst<"v_s_rcp_f32", VOP_Pseudo_Scalar_F32, AMDGPUrcp>;
1481  defm V_S_RCP_F16  : VOP3PseudoScalarInst<"v_s_rcp_f16", VOP_Pseudo_Scalar_F16>;
1482  defm V_S_RSQ_F32  : VOP3PseudoScalarInst<"v_s_rsq_f32", VOP_Pseudo_Scalar_F32, AMDGPUrsq>;
1483  defm V_S_RSQ_F16  : VOP3PseudoScalarInst<"v_s_rsq_f16", VOP_Pseudo_Scalar_F16>;
1484  defm V_S_SQRT_F32 : VOP3PseudoScalarInst<"v_s_sqrt_f32", VOP_Pseudo_Scalar_F32, any_amdgcn_sqrt>;
1485  defm V_S_SQRT_F16 : VOP3PseudoScalarInst<"v_s_sqrt_f16", VOP_Pseudo_Scalar_F16>;
1486}
1487
1488class PseudoScalarPatF16<SDPatternOperator node, VOP3_Pseudo inst> : GCNPat <
1489  (f16 (UniformUnaryFrag<node> (f16 (VOP3Mods0 f16:$src0, i32:$src0_modifiers,
1490                                               i1:$clamp, i32:$omod)))),
1491  (f16 (COPY_TO_REGCLASS (f32 (inst i32:$src0_modifiers, f16:$src0, i1:$clamp,
1492                                    i32:$omod)),
1493                         SReg_32_XEXEC))
1494>;
1495
1496let SubtargetPredicate = HasPseudoScalarTrans in {
1497  def : PseudoScalarPatF16<AMDGPUexpf16, V_S_EXP_F16_e64>;
1498  def : PseudoScalarPatF16<AMDGPUlogf16, V_S_LOG_F16_e64>;
1499  def : PseudoScalarPatF16<AMDGPUrcp, V_S_RCP_F16_e64>;
1500  def : PseudoScalarPatF16<AMDGPUrsq, V_S_RSQ_F16_e64>;
1501  def : PseudoScalarPatF16<any_amdgcn_sqrt, V_S_SQRT_F16_e64>;
1502}
1503
1504let SubtargetPredicate = HasAshrPkInsts, isReMaterializable = 1 in {
1505  defm V_ASHR_PK_I8_I32 : VOP3Inst<"v_ashr_pk_i8_i32", VOP3_Profile<VOP_I16_I32_I32_I32, VOP3_OPSEL_ONLY>, int_amdgcn_ashr_pk_i8_i32>;
1506  defm V_ASHR_PK_U8_I32 : VOP3Inst<"v_ashr_pk_u8_i32", VOP3_Profile<VOP_I16_I32_I32_I32, VOP3_OPSEL_ONLY>, int_amdgcn_ashr_pk_u8_i32>;
1507} // End SubtargetPredicate = HasAshrPkInsts, isReMaterializable = 1
1508
1509class AshrPkI8Pat<VOP3_Pseudo inst, int lo, int hi>: GCNPat<
1510    (i16 (or (i16 (shl (i16 (trunc (i32 (AMDGPUsmed3 (i32 (sra i32:$src1, i32:$src2)), (i32 lo), (i32 hi))))), (i16 8))),
1511             (i16 (and (i16 (trunc (i32 (AMDGPUsmed3 (i32 (sra i32:$src0, i32:$src2)), (i32 lo), (i32 hi))))), (i16 255))))),
1512    (inst 0, VSrc_b32:$src0, 0,  VSrc_b32:$src1, 0, VSrc_b32:$src2, 0 )
1513>;
1514
1515class AshrPkU8Pat<VOP3_Pseudo inst, int lo, int hi>: GCNPat<
1516    (i16 (or (i16 (shl (i16 (trunc (i32 (AMDGPUsmed3 (i32 (sra i32:$src1, i32:$src2)), (i32 lo), (i32 hi))))), (i16 8))),
1517             (i16 (trunc (i32 (AMDGPUsmed3 (i32 (sra i32:$src0, i32:$src2)), (i32 lo), (i32 hi))))))),
1518    (inst 0, VSrc_b32:$src0, 0,  VSrc_b32:$src1, 0, VSrc_b32:$src2, 0 )
1519>;
1520
1521let SubtargetPredicate = HasAshrPkInsts in {
1522  def : AshrPkI8Pat<V_ASHR_PK_I8_I32_e64, -128, 127>;
1523  def : AshrPkU8Pat<V_ASHR_PK_U8_I32_e64, 0, 255>;
1524}
1525
1526//===----------------------------------------------------------------------===//
1527// Integer Clamp Patterns
1528//===----------------------------------------------------------------------===//
1529
1530class getClampPat<VOPProfile P, SDPatternOperator node> {
1531  dag ret3 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2));
1532  dag ret2 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1));
1533  dag ret1 = (P.DstVT (node P.Src0VT:$src0));
1534  dag ret = !if(!eq(P.NumSrcArgs, 3), ret3,
1535            !if(!eq(P.NumSrcArgs, 2), ret2,
1536            ret1));
1537}
1538
1539class getClampRes<VOPProfile P, Instruction inst> {
1540  dag ret3 = (inst P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, (i1 0));
1541  dag ret2 = (inst P.Src0VT:$src0, P.Src1VT:$src1, (i1 0));
1542  dag ret1 = (inst P.Src0VT:$src0, (i1 0));
1543  dag ret = !if(!eq(P.NumSrcArgs, 3), ret3,
1544            !if(!eq(P.NumSrcArgs, 2), ret2,
1545            ret1));
1546}
1547
1548class IntClampPat<VOP3InstBase inst, SDPatternOperator node> : GCNPat<
1549  getClampPat<inst.Pfl, node>.ret,
1550  getClampRes<inst.Pfl, inst>.ret
1551>;
1552
1553def : IntClampPat<V_MAD_I32_I24_e64, AMDGPUmad_i24>;
1554def : IntClampPat<V_MAD_U32_U24_e64, AMDGPUmad_u24>;
1555
1556def : IntClampPat<V_SAD_U8_e64, int_amdgcn_sad_u8>;
1557def : IntClampPat<V_SAD_HI_U8_e64, int_amdgcn_sad_hi_u8>;
1558def : IntClampPat<V_SAD_U16_e64, int_amdgcn_sad_u16>;
1559
1560def : IntClampPat<V_MSAD_U8_e64, int_amdgcn_msad_u8>;
1561def : IntClampPat<V_MQSAD_PK_U16_U8_e64, int_amdgcn_mqsad_pk_u16_u8>;
1562
1563def : IntClampPat<V_QSAD_PK_U16_U8_e64, int_amdgcn_qsad_pk_u16_u8>;
1564def : IntClampPat<V_MQSAD_U32_U8_e64, int_amdgcn_mqsad_u32_u8>;
1565
1566//===----------------------------------------------------------------------===//
1567// Floating-point operation Patterns
1568//===----------------------------------------------------------------------===//
1569
1570// Implement fminimum(x, y) by using minimum3(x, y, y)
1571class MinimumMaximumByMinimum3Maximum3<SDPatternOperator node, ValueType vt,
1572                                       Instruction inst> : GCNPat<
1573  (vt (node (VOP3Mods vt:$src0, i32:$src0_mods), (VOP3Mods vt:$src1, i32:$src1_mods))),
1574  (inst $src0_mods, $src0, $src1_mods, $src1, $src1_mods, $src1)
1575>;
1576
1577// Prefer the real 2 operand form if legal
1578let SubtargetPredicate = HasMinimum3Maximum3F32 in {
1579def : MinimumMaximumByMinimum3Maximum3<fminimum, f32, V_MINIMUM3_F32_e64>;
1580def : MinimumMaximumByMinimum3Maximum3<fmaximum, f32, V_MAXIMUM3_F32_e64>;
1581}
1582
1583//===----------------------------------------------------------------------===//
1584// Target-specific instruction encodings.
1585//===----------------------------------------------------------------------===//
1586
1587//===----------------------------------------------------------------------===//
1588// GFX12.
1589//===----------------------------------------------------------------------===//
1590
1591defm V_MIN3_NUM_F32       : VOP3_Realtriple_with_name_gfx12<0x229, "V_MIN3_F32", "v_min3_num_f32">;
1592defm V_MAX3_NUM_F32       : VOP3_Realtriple_with_name_gfx12<0x22a, "V_MAX3_F32", "v_max3_num_f32">;
1593defm V_MIN3_NUM_F16       : VOP3_Realtriple_t16_and_fake16_gfx12<0x22b, "v_min3_num_f16", "V_MIN3_F16", "v_min3_f16">;
1594defm V_MAX3_NUM_F16       : VOP3_Realtriple_t16_and_fake16_gfx12<0x22c, "v_max3_num_f16", "V_MAX3_F16", "v_max3_f16">;
1595defm V_MINIMUM3_F32       : VOP3Only_Realtriple_gfx12<0x22d>;
1596defm V_MAXIMUM3_F32       : VOP3Only_Realtriple_gfx12<0x22e>;
1597defm V_MINIMUM3_F16       : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x22f, "v_minimum3_f16">;
1598defm V_MAXIMUM3_F16       : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x230, "v_maximum3_f16">;
1599defm V_MED3_NUM_F32       : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">;
1600defm V_MED3_NUM_F16       : VOP3_Realtriple_t16_and_fake16_gfx12<0x232, "v_med3_num_f16", "V_MED3_F16", "v_med3_f16">;
1601defm V_MINMAX_NUM_F32     : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
1602defm V_MAXMIN_NUM_F32     : VOP3_Realtriple_with_name_gfx12<0x269, "V_MAXMIN_F32", "v_maxmin_num_f32">;
1603defm V_MINMAX_NUM_F16     : VOP3_Realtriple_t16_and_fake16_gfx12<0x26a, "v_minmax_num_f16", "V_MINMAX_F16", "v_minmax_f16">;
1604defm V_MAXMIN_NUM_F16     : VOP3_Realtriple_t16_and_fake16_gfx12<0x26b, "v_maxmin_num_f16", "V_MAXMIN_F16", "v_maxmin_f16">;
1605defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
1606defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
1607defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x26e, "v_minimummaximum_f16">;
1608defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x26f, "v_maximumminimum_f16">;
1609defm V_S_EXP_F32          : VOP3Only_Real_Base_gfx12<0x280>;
1610defm V_S_EXP_F16          : VOP3Only_Real_Base_gfx12<0x281>;
1611defm V_S_LOG_F32          : VOP3Only_Real_Base_gfx12<0x282>;
1612defm V_S_LOG_F16          : VOP3Only_Real_Base_gfx12<0x283>;
1613defm V_S_RCP_F32          : VOP3Only_Real_Base_gfx12<0x284>;
1614defm V_S_RCP_F16          : VOP3Only_Real_Base_gfx12<0x285>;
1615defm V_S_RSQ_F32          : VOP3Only_Real_Base_gfx12<0x286>;
1616defm V_S_RSQ_F16          : VOP3Only_Real_Base_gfx12<0x287>;
1617defm V_S_SQRT_F32         : VOP3Only_Real_Base_gfx12<0x288>;
1618defm V_S_SQRT_F16         : VOP3Only_Real_Base_gfx12<0x289>;
1619defm V_MAD_CO_U64_U32     : VOP3be_Real_with_name_gfx12<0x2fe, "V_MAD_U64_U32", "v_mad_co_u64_u32">;
1620defm V_MAD_CO_I64_I32     : VOP3be_Real_with_name_gfx12<0x2ff, "V_MAD_I64_I32", "v_mad_co_i64_i32">;
1621defm V_MINIMUM_F64        : VOP3Only_Real_Base_gfx12<0x341>;
1622defm V_MAXIMUM_F64        : VOP3Only_Real_Base_gfx12<0x342>;
1623defm V_MINIMUM_F32        : VOP3Only_Realtriple_gfx12<0x365>;
1624defm V_MAXIMUM_F32        : VOP3Only_Realtriple_gfx12<0x366>;
1625defm V_MINIMUM_F16        : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x367, "v_minimum_f16">;
1626defm V_MAXIMUM_F16        : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x368, "v_maximum_f16">;
1627
1628defm V_PERMLANE16_VAR_B32  : VOP3Only_Real_Base_gfx12<0x30f>;
1629defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
1630
1631defm V_CVT_PK_FP8_F32  : VOP3Only_Realtriple_gfx12<0x369>;
1632defm V_CVT_PK_BF8_F32  : VOP3Only_Realtriple_gfx12<0x36a>;
1633defm V_CVT_SR_FP8_F32_gfx12 : VOP3_Realtriple_with_name_gfx12<0x36b, "V_CVT_SR_FP8_F32_gfx12", "v_cvt_sr_fp8_f32" >;
1634defm V_CVT_SR_BF8_F32_gfx12 : VOP3_Realtriple_with_name_gfx12<0x36c, "V_CVT_SR_BF8_F32_gfx12", "v_cvt_sr_bf8_f32">;
1635
1636//===----------------------------------------------------------------------===//
1637// GFX11, GFX12
1638//===----------------------------------------------------------------------===//
1639
1640multiclass VOP3_Real_with_name_gfx11_gfx12<bits<10> op, string opName,
1641                                           string asmName> :
1642  VOP3_Real_with_name<GFX11Gen, op, opName, asmName>,
1643  VOP3_Real_with_name<GFX12Gen, op, opName, asmName>;
1644
1645multiclass VOP3_Realtriple_gfx11_gfx12<bits<10> op> :
1646  VOP3_Realtriple<GFX11Gen, op>, VOP3_Realtriple<GFX12Gen, op>;
1647
1648multiclass VOP3_Real_Base_gfx11_gfx12<bits<10> op> :
1649  VOP3_Real_Base<GFX11Gen, op>, VOP3_Real_Base<GFX12Gen, op>;
1650
1651multiclass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName,
1652                                                 string asmName> :
1653  VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>,
1654  VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName>;
1655
1656multiclass VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME> {
1657  defm _t16: VOP3Dot_Realtriple_gfx11_gfx12<op, asmName, 0, opName#"_t16">;
1658  defm _fake16: VOP3Dot_Realtriple_gfx11_gfx12<op, asmName, 0, opName#"_fake16">;
1659}
1660
1661multiclass VOP3_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1662                                           string pseudo_mnemonic = "", bit isSingle = 0> :
1663  VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>,
1664  VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
1665
1666multiclass VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<bits<10> op, string asmName, string opName = NAME,
1667                                                      string pseudo_mnemonic = "", bit isSingle = 0> {
1668  defm opName#"_t16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
1669  defm opName#"_fake16": VOP3_Realtriple_t16_gfx11_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
1670}
1671
1672multiclass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> :
1673  VOP3be_Real<GFX11Gen, op, opName, asmName>,
1674  VOP3be_Real<GFX12Gen, op, opName, asmName>;
1675
1676multiclass VOP3_Real_No_Suffix_gfx11_gfx12<bits<10> op> :
1677  VOP3_Real_No_Suffix<GFX11Gen, op>, VOP3_Real_No_Suffix<GFX12Gen, op>;
1678
1679defm V_FMA_DX9_ZERO_F32    : VOP3_Real_with_name_gfx11_gfx12<0x209, "V_FMA_LEGACY_F32", "v_fma_dx9_zero_f32">;
1680defm V_MAD_I32_I24         : VOP3_Realtriple_gfx11_gfx12<0x20a>;
1681defm V_MAD_U32_U24         : VOP3_Realtriple_gfx11_gfx12<0x20b>;
1682defm V_CUBEID_F32          : VOP3_Realtriple_gfx11_gfx12<0x20c>;
1683defm V_CUBESC_F32          : VOP3_Realtriple_gfx11_gfx12<0x20d>;
1684defm V_CUBETC_F32          : VOP3_Realtriple_gfx11_gfx12<0x20e>;
1685defm V_CUBEMA_F32          : VOP3_Realtriple_gfx11_gfx12<0x20f>;
1686defm V_BFE_U32             : VOP3_Realtriple_gfx11_gfx12<0x210>;
1687defm V_BFE_I32             : VOP3_Realtriple_gfx11_gfx12<0x211>;
1688defm V_BFI_B32             : VOP3_Realtriple_gfx11_gfx12<0x212>;
1689defm V_FMA_F32             : VOP3_Realtriple_gfx11_gfx12<0x213>;
1690defm V_FMA_F64             : VOP3_Real_Base_gfx11_gfx12<0x214>;
1691defm V_LERP_U8             : VOP3_Realtriple_gfx11_gfx12<0x215>;
1692defm V_ALIGNBIT_B32        : VOP3_Realtriple_gfx11_gfx12<0x216>;
1693defm V_ALIGNBYTE_B32       : VOP3_Realtriple_gfx11_gfx12<0x217>;
1694defm V_MULLIT_F32          : VOP3_Realtriple_gfx11_gfx12<0x218>;
1695defm V_MIN3_F32            : VOP3_Realtriple_gfx11<0x219>;
1696defm V_MIN3_I32            : VOP3_Realtriple_gfx11_gfx12<0x21a>;
1697defm V_MIN3_U32            : VOP3_Realtriple_gfx11_gfx12<0x21b>;
1698defm V_MAX3_F32            : VOP3_Realtriple_gfx11<0x21c>;
1699defm V_MAX3_I32            : VOP3_Realtriple_gfx11_gfx12<0x21d>;
1700defm V_MAX3_U32            : VOP3_Realtriple_gfx11_gfx12<0x21e>;
1701defm V_MED3_F32            : VOP3_Realtriple_gfx11<0x21f>;
1702defm V_MED3_I32            : VOP3_Realtriple_gfx11_gfx12<0x220>;
1703defm V_MED3_U32            : VOP3_Realtriple_gfx11_gfx12<0x221>;
1704defm V_SAD_U8              : VOP3_Realtriple_gfx11_gfx12<0x222>;
1705defm V_SAD_HI_U8           : VOP3_Realtriple_gfx11_gfx12<0x223>;
1706defm V_SAD_U16             : VOP3_Realtriple_gfx11_gfx12<0x224>;
1707defm V_SAD_U32             : VOP3_Realtriple_gfx11_gfx12<0x225>;
1708defm V_CVT_PK_U8_F32       : VOP3_Realtriple_gfx11_gfx12<0x226>;
1709defm V_DIV_FIXUP_F32       : VOP3_Real_Base_gfx11_gfx12<0x227>;
1710defm V_DIV_FIXUP_F64       : VOP3_Real_Base_gfx11_gfx12<0x228>;
1711defm V_DIV_FMAS_F32        : VOP3_Real_Base_gfx11_gfx12<0x237>;
1712defm V_DIV_FMAS_F64        : VOP3_Real_Base_gfx11_gfx12<0x238>;
1713defm V_MSAD_U8             : VOP3_Realtriple_gfx11_gfx12<0x239>;
1714defm V_QSAD_PK_U16_U8      : VOP3_Real_Base_gfx11_gfx12<0x23a>;
1715defm V_MQSAD_PK_U16_U8     : VOP3_Real_Base_gfx11_gfx12<0x23b>;
1716defm V_MQSAD_U32_U8        : VOP3_Real_Base_gfx11_gfx12<0x23d>;
1717defm V_XOR3_B32            : VOP3_Realtriple_gfx11_gfx12<0x240>;
1718defm V_MAD_U16             : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x241, "v_mad_u16", "V_MAD_U16_gfx9">;
1719defm V_PERM_B32            : VOP3_Realtriple_gfx11_gfx12<0x244>;
1720defm V_XAD_U32             : VOP3_Realtriple_gfx11_gfx12<0x245>;
1721defm V_LSHL_ADD_U32        : VOP3_Realtriple_gfx11_gfx12<0x246>;
1722defm V_ADD_LSHL_U32        : VOP3_Realtriple_gfx11_gfx12<0x247>;
1723defm V_FMA_F16             : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x248, "v_fma_f16", "V_FMA_F16_gfx9">;
1724defm V_MIN3_F16            : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x249, "v_min3_f16">;
1725defm V_MIN3_I16            : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24a, "v_min3_i16">;
1726defm V_MIN3_U16            : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24b, "v_min3_u16">;
1727defm V_MAX3_F16            : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x24c, "v_max3_f16">;
1728defm V_MAX3_I16            : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x24d, "v_max3_i16">;
1729defm V_MAX3_U16            : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x24e, "v_max3_u16">;
1730defm V_MED3_F16            : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x24f, "v_med3_f16">;
1731defm V_MED3_I16            : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x250, "v_med3_i16">;
1732defm V_MED3_U16            : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x251, "v_med3_u16">;
1733defm V_MAD_I16             : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x253, "v_mad_i16", "V_MAD_I16_gfx9">;
1734defm V_DIV_FIXUP_F16       : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x254, "v_div_fixup_f16", "V_DIV_FIXUP_F16_gfx9">;
1735defm V_ADD3_U32            : VOP3_Realtriple_gfx11_gfx12<0x255>;
1736defm V_LSHL_OR_B32         : VOP3_Realtriple_gfx11_gfx12<0x256>;
1737defm V_AND_OR_B32          : VOP3_Realtriple_gfx11_gfx12<0x257>;
1738defm V_OR3_B32             : VOP3_Realtriple_gfx11_gfx12<0x258>;
1739defm V_MAD_U32_U16         : VOP3_Realtriple_gfx11_gfx12<0x259>;
1740defm V_MAD_I32_I16         : VOP3_Realtriple_gfx11_gfx12<0x25a>;
1741defm V_PERMLANE16_B32      : VOP3_Real_Base_gfx11_gfx12<0x25b>;
1742defm V_PERMLANEX16_B32     : VOP3_Real_Base_gfx11_gfx12<0x25c>;
1743defm V_MAXMIN_F32          : VOP3_Realtriple_gfx11<0x25e>;
1744defm V_MINMAX_F32          : VOP3_Realtriple_gfx11<0x25f>;
1745defm V_MAXMIN_F16          : VOP3_Realtriple_t16_and_fake16_gfx11<0x260, "v_maxmin_f16">;
1746defm V_MINMAX_F16          : VOP3_Realtriple_t16_and_fake16_gfx11<0x261, "v_minmax_f16">;
1747defm V_MAXMIN_U32          : VOP3_Realtriple_gfx11_gfx12<0x262>;
1748defm V_MINMAX_U32          : VOP3_Realtriple_gfx11_gfx12<0x263>;
1749defm V_MAXMIN_I32          : VOP3_Realtriple_gfx11_gfx12<0x264>;
1750defm V_MINMAX_I32          : VOP3_Realtriple_gfx11_gfx12<0x265>;
1751defm V_DOT2_F16_F16        : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x266, "v_dot2_f16_f16">;
1752defm V_DOT2_BF16_BF16      : VOP3Dot_Realtriple_t16_and_fake16_gfx11_gfx12<0x267, "v_dot2_bf16_bf16">;
1753defm V_DIV_SCALE_F32       : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">;
1754defm V_DIV_SCALE_F64       : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">;
1755defm V_MAD_U64_U32_gfx11   : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">;
1756defm V_MAD_I64_I32_gfx11   : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">;
1757defm V_ADD_NC_U16          : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x303, "v_add_nc_u16">;
1758defm V_SUB_NC_U16          : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x304, "v_sub_nc_u16">;
1759defm V_MUL_LO_U16          : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x305, "v_mul_lo_u16">;
1760defm V_CVT_PK_I16_F32      : VOP3_Realtriple_gfx11_gfx12<0x306>;
1761defm V_CVT_PK_U16_F32      : VOP3_Realtriple_gfx11_gfx12<0x307>;
1762defm V_MAX_U16             : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x309, "v_max_u16">;
1763defm V_MAX_I16             : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30a, "v_max_i16">;
1764defm V_MIN_U16             : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30b, "v_min_u16">;
1765defm V_MIN_I16             : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30c, "v_min_i16">;
1766defm V_ADD_NC_I16          : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">;
1767defm V_SUB_NC_I16          : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">;
1768defm V_PACK_B32_F16        : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x311, "v_pack_b32_f16">;
1769defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x312, "v_cvt_pk_norm_i16_f16", "V_CVT_PKNORM_I16_F16", "v_cvt_pknorm_i16_f16">;
1770defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x313, "v_cvt_pk_norm_u16_f16", "V_CVT_PKNORM_U16_F16", "v_cvt_pknorm_u16_f16">;
1771defm V_SUB_NC_I32          : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">;
1772defm V_ADD_NC_I32          : VOP3_Realtriple_with_name_gfx11_gfx12<0x326, "V_ADD_I32", "v_add_nc_i32">;
1773defm V_ADD_F64             : VOP3_Real_Base_gfx11<0x327>;
1774defm V_MUL_F64             : VOP3_Real_Base_gfx11<0x328>;
1775defm V_MIN_F64             : VOP3_Real_Base_gfx11<0x329>;
1776defm V_MAX_F64             : VOP3_Real_Base_gfx11<0x32a>;
1777defm V_LDEXP_F64           : VOP3_Real_Base_gfx11_gfx12<0x32b>;
1778defm V_MUL_LO_U32          : VOP3_Real_Base_gfx11_gfx12<0x32c>;
1779defm V_MUL_HI_U32          : VOP3_Real_Base_gfx11_gfx12<0x32d>;
1780defm V_MUL_HI_I32          : VOP3_Real_Base_gfx11_gfx12<0x32e>;
1781defm V_TRIG_PREOP_F64      : VOP3_Real_Base_gfx11_gfx12<0x32f>;
1782defm V_LSHLREV_B16         : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x338, "v_lshlrev_b16">;
1783defm V_LSHRREV_B16         : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x339, "v_lshrrev_b16">;
1784defm V_ASHRREV_I16         : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x33a, "v_ashrrev_i16">;
1785defm V_LSHLREV_B64         : VOP3_Real_Base_gfx11<0x33c>;
1786defm V_LSHRREV_B64         : VOP3_Real_Base_gfx11_gfx12<0x33d>;
1787defm V_ASHRREV_I64         : VOP3_Real_Base_gfx11_gfx12<0x33e>;
1788defm V_READLANE_B32        : VOP3_Real_No_Suffix_gfx11_gfx12<0x360>; // Pseudo in VOP2
1789let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
1790  defm V_WRITELANE_B32     : VOP3_Real_No_Suffix_gfx11_gfx12<0x361>; // Pseudo in VOP2
1791} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
1792defm V_AND_B16             : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x362, "v_and_b16">;
1793defm V_OR_B16              : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x363, "v_or_b16">;
1794defm V_XOR_B16             : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x364, "v_xor_b16">;
1795
1796let AssemblerPredicate = isGFX11Plus in {
1797  def : AMDGPUMnemonicAlias<"v_add3_nc_u32", "v_add3_u32">;
1798  def : AMDGPUMnemonicAlias<"v_xor_add_u32", "v_xad_u32">;
1799}
1800
1801//===----------------------------------------------------------------------===//
1802// GFX10.
1803//===----------------------------------------------------------------------===//
1804
1805let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
1806  multiclass VOP3_Real_gfx10<bits<10> op> {
1807    def _gfx10 :
1808      VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
1809      VOP3e_gfx10<op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
1810  }
1811  multiclass VOP3_Real_No_Suffix_gfx10<bits<10> op> {
1812    def _gfx10 :
1813      VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.GFX10>,
1814      VOP3e_gfx10<op, !cast<VOP_Pseudo>(NAME).Pfl>;
1815  }
1816  multiclass VOP3_Real_gfx10_with_name<bits<10> op, string opName,
1817                                       string asmName> {
1818    def _gfx10 :
1819      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
1820      VOP3e_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
1821        VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64");
1822        let AsmString = asmName # ps.AsmOperands;
1823        let IsSingle = 1;
1824      }
1825  }
1826  multiclass VOP3be_Real_gfx10<bits<10> op> {
1827    def _gfx10 :
1828      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
1829      VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1830  }
1831  multiclass VOP3Interp_Real_gfx10<bits<10> op> {
1832    def _gfx10 :
1833      VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>,
1834      VOP3Interp_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
1835  }
1836  multiclass VOP3OpSel_Real_gfx10<bits<10> op> {
1837    def _gfx10 :
1838      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
1839      VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1840  }
1841  multiclass VOP3OpSel_Real_gfx10_with_name<bits<10> op, string opName,
1842                                            string asmName> {
1843    def _gfx10 :
1844      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
1845      VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
1846        VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64");
1847        let AsmString = asmName # ps.AsmOperands;
1848      }
1849  }
1850} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
1851
1852defm V_READLANE_B32  : VOP3_Real_No_Suffix_gfx10<0x360>;
1853
1854let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in {
1855  defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx10<0x361>;
1856} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
1857
1858let SubtargetPredicate = isGFX10Before1030 in {
1859  defm V_MUL_LO_I32      : VOP3_Real_gfx10<0x16b>;
1860}
1861
1862defm V_XOR3_B32           : VOP3_Real_gfx10<0x178>;
1863defm V_LSHLREV_B64        : VOP3_Real_gfx10<0x2ff>;
1864defm V_LSHRREV_B64        : VOP3_Real_gfx10<0x300>;
1865defm V_ASHRREV_I64        : VOP3_Real_gfx10<0x301>;
1866defm V_PERM_B32           : VOP3_Real_gfx10<0x344>;
1867defm V_XAD_U32            : VOP3_Real_gfx10<0x345>;
1868defm V_LSHL_ADD_U32       : VOP3_Real_gfx10<0x346>;
1869defm V_ADD_LSHL_U32       : VOP3_Real_gfx10<0x347>;
1870defm V_ADD3_U32           : VOP3_Real_gfx10<0x36d>;
1871defm V_LSHL_OR_B32        : VOP3_Real_gfx10<0x36f>;
1872defm V_AND_OR_B32         : VOP3_Real_gfx10<0x371>;
1873defm V_OR3_B32            : VOP3_Real_gfx10<0x372>;
1874
1875// TODO-GFX10: add MC tests for v_add/sub_nc_i16
1876defm V_ADD_NC_I16 :
1877  VOP3OpSel_Real_gfx10_with_name<0x30d, "V_ADD_I16", "v_add_nc_i16">;
1878defm V_SUB_NC_I16 :
1879  VOP3OpSel_Real_gfx10_with_name<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
1880defm V_SUB_NC_I32 :
1881  VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32", "v_sub_nc_i32">;
1882defm V_ADD_NC_I32 :
1883  VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32", "v_add_nc_i32">;
1884
1885defm V_INTERP_P1_F32_e64  : VOP3Interp_Real_gfx10<0x200>;
1886defm V_INTERP_P2_F32_e64  : VOP3Interp_Real_gfx10<0x201>;
1887defm V_INTERP_MOV_F32_e64 : VOP3Interp_Real_gfx10<0x202>;
1888
1889defm V_INTERP_P1LL_F16    : VOP3Interp_Real_gfx10<0x342>;
1890defm V_INTERP_P1LV_F16    : VOP3Interp_Real_gfx10<0x343>;
1891defm V_INTERP_P2_F16      : VOP3Interp_Real_gfx10<0x35a>;
1892
1893defm V_PACK_B32_F16       : VOP3OpSel_Real_gfx10<0x311>;
1894defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx10<0x312>;
1895defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx10<0x313>;
1896
1897defm V_MIN3_F16           : VOP3OpSel_Real_gfx10<0x351>;
1898defm V_MIN3_I16           : VOP3OpSel_Real_gfx10<0x352>;
1899defm V_MIN3_U16           : VOP3OpSel_Real_gfx10<0x353>;
1900defm V_MAX3_F16           : VOP3OpSel_Real_gfx10<0x354>;
1901defm V_MAX3_I16           : VOP3OpSel_Real_gfx10<0x355>;
1902defm V_MAX3_U16           : VOP3OpSel_Real_gfx10<0x356>;
1903defm V_MED3_F16           : VOP3OpSel_Real_gfx10<0x357>;
1904defm V_MED3_I16           : VOP3OpSel_Real_gfx10<0x358>;
1905defm V_MED3_U16           : VOP3OpSel_Real_gfx10<0x359>;
1906defm V_MAD_U32_U16        : VOP3OpSel_Real_gfx10<0x373>;
1907defm V_MAD_I32_I16        : VOP3OpSel_Real_gfx10<0x375>;
1908
1909defm V_MAD_U16 :
1910  VOP3OpSel_Real_gfx10_with_name<0x340, "V_MAD_U16_gfx9", "v_mad_u16">;
1911defm V_FMA_F16 :
1912  VOP3OpSel_Real_gfx10_with_name<0x34b, "V_FMA_F16_gfx9", "v_fma_f16">;
1913defm V_MAD_I16 :
1914  VOP3OpSel_Real_gfx10_with_name<0x35e, "V_MAD_I16_gfx9", "v_mad_i16">;
1915defm V_DIV_FIXUP_F16 :
1916  VOP3OpSel_Real_gfx10_with_name<0x35f, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
1917
1918defm V_ADD_NC_U16      : VOP3OpSel_Real_gfx10<0x303>;
1919defm V_SUB_NC_U16      : VOP3OpSel_Real_gfx10<0x304>;
1920
1921// FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these
1922// (they do not support SDWA or DPP).
1923defm V_MUL_LO_U16      : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16", "v_mul_lo_u16">;
1924defm V_LSHRREV_B16     : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16", "v_lshrrev_b16">;
1925defm V_ASHRREV_I16     : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16", "v_ashrrev_i16">;
1926defm V_MAX_U16         : VOP3_Real_gfx10_with_name<0x309, "V_MAX_U16", "v_max_u16">;
1927defm V_MAX_I16         : VOP3_Real_gfx10_with_name<0x30a, "V_MAX_I16", "v_max_i16">;
1928defm V_MIN_U16         : VOP3_Real_gfx10_with_name<0x30b, "V_MIN_U16", "v_min_u16">;
1929defm V_MIN_I16         : VOP3_Real_gfx10_with_name<0x30c, "V_MIN_I16", "v_min_i16">;
1930defm V_LSHLREV_B16     : VOP3_Real_gfx10_with_name<0x314, "V_LSHLREV_B16", "v_lshlrev_b16">;
1931defm V_PERMLANE16_B32  : VOP3OpSel_Real_gfx10<0x377>;
1932defm V_PERMLANEX16_B32 : VOP3OpSel_Real_gfx10<0x378>;
1933
1934//===----------------------------------------------------------------------===//
1935// GFX7, GFX10.
1936//===----------------------------------------------------------------------===//
1937
1938let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
1939  multiclass VOP3_Real_gfx7<bits<10> op> {
1940    def _gfx7 :
1941      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
1942      VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1943  }
1944  multiclass VOP3be_Real_gfx7<bits<10> op> {
1945    def _gfx7 :
1946      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
1947      VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1948  }
1949} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
1950
1951multiclass VOP3_Real_gfx7_gfx10<bits<10> op> :
1952  VOP3_Real_gfx7<op>, VOP3_Real_gfx10<op>;
1953
1954multiclass VOP3be_Real_gfx7_gfx10<bits<10> op> :
1955  VOP3be_Real_gfx7<op>, VOP3be_Real_gfx10<op>;
1956
1957defm V_QSAD_PK_U16_U8   : VOP3_Real_gfx7_gfx10<0x172>;
1958defm V_MQSAD_U32_U8     : VOP3_Real_gfx7_gfx10<0x175>;
1959defm V_MAD_U64_U32      : VOP3be_Real_gfx7_gfx10<0x176>;
1960defm V_MAD_I64_I32      : VOP3be_Real_gfx7_gfx10<0x177>;
1961
1962//===----------------------------------------------------------------------===//
1963// GFX6, GFX7, GFX10.
1964//===----------------------------------------------------------------------===//
1965
1966let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
1967  multiclass VOP3_Real_gfx6_gfx7<bits<10> op> {
1968    def _gfx6_gfx7 :
1969      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
1970      VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1971  }
1972  multiclass VOP3be_Real_gfx6_gfx7<bits<10> op> {
1973    def _gfx6_gfx7 :
1974      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
1975      VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1976  }
1977} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
1978
1979multiclass VOP3_Real_gfx6_gfx7_gfx10<bits<10> op> :
1980  VOP3_Real_gfx6_gfx7<op>, VOP3_Real_gfx10<op>;
1981
1982multiclass VOP3be_Real_gfx6_gfx7_gfx10<bits<10> op> :
1983  VOP3be_Real_gfx6_gfx7<op>, VOP3be_Real_gfx10<op>;
1984
1985defm V_LSHL_B64        : VOP3_Real_gfx6_gfx7<0x161>;
1986defm V_LSHR_B64        : VOP3_Real_gfx6_gfx7<0x162>;
1987defm V_ASHR_I64        : VOP3_Real_gfx6_gfx7<0x163>;
1988defm V_MUL_LO_I32      : VOP3_Real_gfx6_gfx7<0x16b>;
1989
1990defm V_MAD_LEGACY_F32  : VOP3_Real_gfx6_gfx7_gfx10<0x140>;
1991defm V_MAD_F32         : VOP3_Real_gfx6_gfx7_gfx10<0x141>;
1992defm V_MAD_I32_I24     : VOP3_Real_gfx6_gfx7_gfx10<0x142>;
1993defm V_MAD_U32_U24     : VOP3_Real_gfx6_gfx7_gfx10<0x143>;
1994defm V_CUBEID_F32      : VOP3_Real_gfx6_gfx7_gfx10<0x144>;
1995defm V_CUBESC_F32      : VOP3_Real_gfx6_gfx7_gfx10<0x145>;
1996defm V_CUBETC_F32      : VOP3_Real_gfx6_gfx7_gfx10<0x146>;
1997defm V_CUBEMA_F32      : VOP3_Real_gfx6_gfx7_gfx10<0x147>;
1998defm V_BFE_U32         : VOP3_Real_gfx6_gfx7_gfx10<0x148>;
1999defm V_BFE_I32         : VOP3_Real_gfx6_gfx7_gfx10<0x149>;
2000defm V_BFI_B32         : VOP3_Real_gfx6_gfx7_gfx10<0x14a>;
2001defm V_FMA_F32         : VOP3_Real_gfx6_gfx7_gfx10<0x14b>;
2002defm V_FMA_F64         : VOP3_Real_gfx6_gfx7_gfx10<0x14c>;
2003defm V_LERP_U8         : VOP3_Real_gfx6_gfx7_gfx10<0x14d>;
2004defm V_ALIGNBIT_B32    : VOP3_Real_gfx6_gfx7_gfx10<0x14e>;
2005defm V_ALIGNBYTE_B32   : VOP3_Real_gfx6_gfx7_gfx10<0x14f>;
2006defm V_MULLIT_F32      : VOP3_Real_gfx6_gfx7_gfx10<0x150>;
2007defm V_MIN3_F32        : VOP3_Real_gfx6_gfx7_gfx10<0x151>;
2008defm V_MIN3_I32        : VOP3_Real_gfx6_gfx7_gfx10<0x152>;
2009defm V_MIN3_U32        : VOP3_Real_gfx6_gfx7_gfx10<0x153>;
2010defm V_MAX3_F32        : VOP3_Real_gfx6_gfx7_gfx10<0x154>;
2011defm V_MAX3_I32        : VOP3_Real_gfx6_gfx7_gfx10<0x155>;
2012defm V_MAX3_U32        : VOP3_Real_gfx6_gfx7_gfx10<0x156>;
2013defm V_MED3_F32        : VOP3_Real_gfx6_gfx7_gfx10<0x157>;
2014defm V_MED3_I32        : VOP3_Real_gfx6_gfx7_gfx10<0x158>;
2015defm V_MED3_U32        : VOP3_Real_gfx6_gfx7_gfx10<0x159>;
2016defm V_SAD_U8          : VOP3_Real_gfx6_gfx7_gfx10<0x15a>;
2017defm V_SAD_HI_U8       : VOP3_Real_gfx6_gfx7_gfx10<0x15b>;
2018defm V_SAD_U16         : VOP3_Real_gfx6_gfx7_gfx10<0x15c>;
2019defm V_SAD_U32         : VOP3_Real_gfx6_gfx7_gfx10<0x15d>;
2020defm V_CVT_PK_U8_F32   : VOP3_Real_gfx6_gfx7_gfx10<0x15e>;
2021defm V_DIV_FIXUP_F32   : VOP3_Real_gfx6_gfx7_gfx10<0x15f>;
2022defm V_DIV_FIXUP_F64   : VOP3_Real_gfx6_gfx7_gfx10<0x160>;
2023defm V_ADD_F64         : VOP3_Real_gfx6_gfx7_gfx10<0x164>;
2024defm V_MUL_F64         : VOP3_Real_gfx6_gfx7_gfx10<0x165>;
2025defm V_MIN_F64         : VOP3_Real_gfx6_gfx7_gfx10<0x166>;
2026defm V_MAX_F64         : VOP3_Real_gfx6_gfx7_gfx10<0x167>;
2027defm V_LDEXP_F64       : VOP3_Real_gfx6_gfx7_gfx10<0x168>;
2028defm V_MUL_LO_U32      : VOP3_Real_gfx6_gfx7_gfx10<0x169>;
2029defm V_MUL_HI_U32      : VOP3_Real_gfx6_gfx7_gfx10<0x16a>;
2030defm V_MUL_HI_I32      : VOP3_Real_gfx6_gfx7_gfx10<0x16c>;
2031defm V_DIV_FMAS_F32    : VOP3_Real_gfx6_gfx7_gfx10<0x16f>;
2032defm V_DIV_FMAS_F64    : VOP3_Real_gfx6_gfx7_gfx10<0x170>;
2033defm V_MSAD_U8         : VOP3_Real_gfx6_gfx7_gfx10<0x171>;
2034defm V_MQSAD_PK_U16_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x173>;
2035defm V_TRIG_PREOP_F64  : VOP3_Real_gfx6_gfx7_gfx10<0x174>;
2036defm V_DIV_SCALE_F32   : VOP3be_Real_gfx6_gfx7_gfx10<0x16d>;
2037defm V_DIV_SCALE_F64   : VOP3be_Real_gfx6_gfx7_gfx10<0x16e>;
2038
2039// NB: Same opcode as v_mad_legacy_f32
2040let DecoderNamespace = "GFX10_B" in
2041defm V_FMA_LEGACY_F32  : VOP3_Real_gfx10<0x140>;
2042
2043//===----------------------------------------------------------------------===//
2044// GFX8, GFX9 (VI).
2045//===----------------------------------------------------------------------===//
2046
2047let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in {
2048
2049multiclass VOP3_Real_vi<bits<10> op> {
2050  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
2051            VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
2052}
2053multiclass VOP3_Real_No_Suffix_vi<bits<10> op> {
2054  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
2055            VOP3e_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
2056}
2057
2058multiclass VOP3be_Real_vi<bits<10> op> {
2059  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
2060            VOP3be_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
2061}
2062
2063multiclass VOP3OpSel_Real_gfx9<bits<10> op> {
2064  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
2065            VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>;
2066}
2067
2068multiclass VOP3OpSel_Real_gfx9_forced_opsel2<bits<10> op> {
2069  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
2070            VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
2071    let Inst{13} = src2_modifiers{2}; // op_sel(2)
2072  }
2073}
2074
2075multiclass VOP3Interp_Real_vi<bits<10> op> {
2076  def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>,
2077            VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>;
2078}
2079
2080} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8"
2081
2082let AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" in {
2083
2084multiclass VOP3_F16_Real_vi<bits<10> op> {
2085  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
2086            VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
2087}
2088
2089multiclass VOP3Interp_F16_Real_vi<bits<10> op> {
2090  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
2091            VOP3Interp_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
2092}
2093
2094} // End AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8"
2095
2096let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in {
2097
2098multiclass VOP3_F16_Real_gfx9<bits<10> op, string OpName, string AsmName> {
2099  def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>,
2100            VOP3e_vi <op, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> {
2101              VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64");
2102              let AsmString = AsmName # ps.AsmOperands;
2103            }
2104}
2105
2106multiclass VOP3OpSel_F16_Real_gfx9<bits<10> op, string AsmName> {
2107  def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
2108            VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
2109              VOP3_Pseudo ps = !cast<VOP3_Pseudo>(NAME#"_e64");
2110              let AsmString = AsmName # ps.AsmOperands;
2111            }
2112}
2113
2114multiclass VOP3Interp_F16_Real_gfx9<bits<10> op, string OpName, string AsmName> {
2115  def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>,
2116            VOP3Interp_vi <op, !cast<VOP3_Pseudo>(OpName).Pfl> {
2117              VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName);
2118              let AsmString = AsmName # ps.AsmOperands;
2119            }
2120}
2121
2122multiclass VOP3_Real_gfx9<bits<10> op, string AsmName> {
2123  def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
2124              VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
2125              VOP_Pseudo ps = !cast<VOP_Pseudo>(NAME#"_e64");
2126              let AsmString = AsmName # ps.AsmOperands;
2127            }
2128}
2129
2130multiclass VOP3_Real_BITOP3_gfx9<bits<10> op, string AsmName, bit isSingle = 0> {
2131  defvar ps = !cast<VOP_Pseudo>(NAME#"_e64");
2132  let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
2133    def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>,
2134                VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> {
2135      let AsmString = AsmName # ps.AsmOperands;
2136      bits<8> bitop3;
2137      let Inst{60-59} = bitop3{7-6};
2138      let Inst{10-8}  = bitop3{5-3};
2139      let Inst{63-61} = bitop3{2-0};
2140      let Inst{11} = !if(ps.Pfl.HasOpSel, src0_modifiers{2}, 0);
2141      let Inst{12} = !if(ps.Pfl.HasOpSel, src1_modifiers{2}, 0);
2142      let Inst{13} = !if(ps.Pfl.HasOpSel, src2_modifiers{2}, 0);
2143      let Inst{14} = !if(ps.Pfl.HasOpSel, src0_modifiers{3}, 0);
2144    }
2145  }
2146}
2147
2148} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"
2149
2150defm V_MAD_U64_U32      : VOP3be_Real_vi <0x1E8>;
2151defm V_MAD_I64_I32      : VOP3be_Real_vi <0x1E9>;
2152
2153defm V_MAD_LEGACY_F32   : VOP3_Real_vi <0x1c0>;
2154defm V_MAD_F32          : VOP3_Real_vi <0x1c1>;
2155defm V_MAD_I32_I24      : VOP3_Real_vi <0x1c2>;
2156defm V_MAD_U32_U24      : VOP3_Real_vi <0x1c3>;
2157defm V_CUBEID_F32       : VOP3_Real_vi <0x1c4>;
2158defm V_CUBESC_F32       : VOP3_Real_vi <0x1c5>;
2159defm V_CUBETC_F32       : VOP3_Real_vi <0x1c6>;
2160defm V_CUBEMA_F32       : VOP3_Real_vi <0x1c7>;
2161defm V_BFE_U32          : VOP3_Real_vi <0x1c8>;
2162defm V_BFE_I32          : VOP3_Real_vi <0x1c9>;
2163defm V_BFI_B32          : VOP3_Real_vi <0x1ca>;
2164defm V_FMA_F32          : VOP3_Real_vi <0x1cb>;
2165defm V_FMA_F64          : VOP3_Real_vi <0x1cc>;
2166defm V_LERP_U8          : VOP3_Real_vi <0x1cd>;
2167defm V_ALIGNBIT_B32     : VOP3_Real_vi <0x1ce>;
2168defm V_ALIGNBYTE_B32    : VOP3_Real_vi <0x1cf>;
2169defm V_MIN3_F32         : VOP3_Real_vi <0x1d0>;
2170defm V_MIN3_I32         : VOP3_Real_vi <0x1d1>;
2171defm V_MIN3_U32         : VOP3_Real_vi <0x1d2>;
2172defm V_MAX3_F32         : VOP3_Real_vi <0x1d3>;
2173defm V_MAX3_I32         : VOP3_Real_vi <0x1d4>;
2174defm V_MAX3_U32         : VOP3_Real_vi <0x1d5>;
2175defm V_MED3_F32         : VOP3_Real_vi <0x1d6>;
2176defm V_MED3_I32         : VOP3_Real_vi <0x1d7>;
2177defm V_MED3_U32         : VOP3_Real_vi <0x1d8>;
2178defm V_SAD_U8           : VOP3_Real_vi <0x1d9>;
2179defm V_SAD_HI_U8        : VOP3_Real_vi <0x1da>;
2180defm V_SAD_U16          : VOP3_Real_vi <0x1db>;
2181defm V_SAD_U32          : VOP3_Real_vi <0x1dc>;
2182defm V_CVT_PK_U8_F32    : VOP3_Real_vi <0x1dd>;
2183defm V_DIV_FIXUP_F32    : VOP3_Real_vi <0x1de>;
2184defm V_DIV_FIXUP_F64    : VOP3_Real_vi <0x1df>;
2185defm V_DIV_SCALE_F32    : VOP3be_Real_vi <0x1e0>;
2186defm V_DIV_SCALE_F64    : VOP3be_Real_vi <0x1e1>;
2187defm V_DIV_FMAS_F32     : VOP3_Real_vi <0x1e2>;
2188defm V_DIV_FMAS_F64     : VOP3_Real_vi <0x1e3>;
2189defm V_MSAD_U8          : VOP3_Real_vi <0x1e4>;
2190defm V_QSAD_PK_U16_U8   : VOP3_Real_vi <0x1e5>;
2191defm V_MQSAD_PK_U16_U8  : VOP3_Real_vi <0x1e6>;
2192defm V_MQSAD_U32_U8     : VOP3_Real_vi <0x1e7>;
2193
2194defm V_PERM_B32         : VOP3_Real_vi <0x1ed>;
2195
2196defm V_MAD_F16          : VOP3_F16_Real_vi <0x1ea>;
2197defm V_MAD_U16          : VOP3_F16_Real_vi <0x1eb>;
2198defm V_MAD_I16          : VOP3_F16_Real_vi <0x1ec>;
2199defm V_FMA_F16          : VOP3_F16_Real_vi <0x1ee>;
2200defm V_DIV_FIXUP_F16    : VOP3_F16_Real_vi <0x1ef>;
2201defm V_INTERP_P2_F16    : VOP3Interp_F16_Real_vi <0x276>;
2202
2203let FPDPRounding = 1 in {
2204defm V_MAD_LEGACY_F16       : VOP3_F16_Real_gfx9 <0x1ea, "V_MAD_F16",       "v_mad_legacy_f16">;
2205defm V_FMA_LEGACY_F16       : VOP3_F16_Real_gfx9 <0x1ee, "V_FMA_F16",       "v_fma_legacy_f16">;
2206defm V_DIV_FIXUP_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ef, "V_DIV_FIXUP_F16", "v_div_fixup_legacy_f16">;
2207defm V_INTERP_P2_LEGACY_F16 : VOP3Interp_F16_Real_gfx9 <0x276, "V_INTERP_P2_F16", "v_interp_p2_legacy_f16">;
2208} // End FPDPRounding = 1
2209
2210defm V_MAD_LEGACY_U16       : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16",       "v_mad_legacy_u16">;
2211defm V_MAD_LEGACY_I16       : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16",       "v_mad_legacy_i16">;
2212
2213defm V_MAD_F16_gfx9         : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">;
2214defm V_MAD_U16_gfx9         : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">;
2215defm V_MAD_I16_gfx9         : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">;
2216defm V_FMA_F16_gfx9         : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">;
2217defm V_DIV_FIXUP_F16_gfx9   : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">;
2218defm V_INTERP_P2_F16_gfx9   : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">;
2219
2220defm V_ADD_I32         : VOP3_Real_vi <0x29c>;
2221defm V_SUB_I32         : VOP3_Real_vi <0x29d>;
2222
2223defm V_INTERP_P1_F32_e64  : VOP3Interp_Real_vi <0x270>;
2224defm V_INTERP_P2_F32_e64  : VOP3Interp_Real_vi <0x271>;
2225defm V_INTERP_MOV_F32_e64 : VOP3Interp_Real_vi <0x272>;
2226
2227defm V_INTERP_P1LL_F16  : VOP3Interp_Real_vi <0x274>;
2228defm V_INTERP_P1LV_F16  : VOP3Interp_Real_vi <0x275>;
2229defm V_ADD_F64          : VOP3_Real_vi <0x280>;
2230defm V_MUL_F64          : VOP3_Real_vi <0x281>;
2231defm V_MIN_F64          : VOP3_Real_vi <0x282>;
2232defm V_MAX_F64          : VOP3_Real_vi <0x283>;
2233defm V_LDEXP_F64        : VOP3_Real_vi <0x284>;
2234defm V_MUL_LO_U32       : VOP3_Real_vi <0x285>;
2235
2236// removed from VI as identical to V_MUL_LO_U32
2237let isAsmParserOnly = 1 in {
2238defm V_MUL_LO_I32       : VOP3_Real_vi <0x285>;
2239}
2240
2241defm V_MUL_HI_U32       : VOP3_Real_vi <0x286>;
2242defm V_MUL_HI_I32       : VOP3_Real_vi <0x287>;
2243
2244defm V_READLANE_B32     : VOP3_Real_No_Suffix_vi <0x289>;
2245defm V_WRITELANE_B32    : VOP3_Real_No_Suffix_vi <0x28a>;
2246
2247defm V_LSHLREV_B64      : VOP3_Real_vi <0x28f>;
2248defm V_LSHRREV_B64      : VOP3_Real_vi <0x290>;
2249defm V_ASHRREV_I64      : VOP3_Real_vi <0x291>;
2250defm V_TRIG_PREOP_F64   : VOP3_Real_vi <0x292>;
2251
2252defm V_LSHL_ADD_U32 : VOP3_Real_vi <0x1fd>;
2253defm V_ADD_LSHL_U32 : VOP3_Real_vi <0x1fe>;
2254defm V_ADD3_U32 : VOP3_Real_vi <0x1ff>;
2255defm V_LSHL_OR_B32 : VOP3_Real_vi <0x200>;
2256defm V_AND_OR_B32 : VOP3_Real_vi <0x201>;
2257defm V_OR3_B32 : VOP3_Real_vi <0x202>;
2258defm V_PACK_B32_F16 : VOP3OpSel_Real_gfx9 <0x2a0>;
2259
2260defm V_XAD_U32 : VOP3_Real_vi <0x1f3>;
2261
2262defm V_MIN3_F16 : VOP3OpSel_Real_gfx9 <0x1f4>;
2263defm V_MIN3_I16 : VOP3OpSel_Real_gfx9 <0x1f5>;
2264defm V_MIN3_U16 : VOP3OpSel_Real_gfx9 <0x1f6>;
2265
2266defm V_MAX3_F16 : VOP3OpSel_Real_gfx9 <0x1f7>;
2267defm V_MAX3_I16 : VOP3OpSel_Real_gfx9 <0x1f8>;
2268defm V_MAX3_U16 : VOP3OpSel_Real_gfx9 <0x1f9>;
2269
2270defm V_MED3_F16 : VOP3OpSel_Real_gfx9 <0x1fa>;
2271defm V_MED3_I16 : VOP3OpSel_Real_gfx9 <0x1fb>;
2272defm V_MED3_U16 : VOP3OpSel_Real_gfx9 <0x1fc>;
2273
2274defm V_ADD_I16  : VOP3OpSel_Real_gfx9 <0x29e>;
2275defm V_SUB_I16  : VOP3OpSel_Real_gfx9 <0x29f>;
2276
2277defm V_MAD_U32_U16 : VOP3OpSel_Real_gfx9 <0x1f1>;
2278defm V_MAD_I32_I16 : VOP3OpSel_Real_gfx9 <0x1f2>;
2279
2280defm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>;
2281defm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>;
2282
2283defm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>;
2284
2285defm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>;
2286defm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>;
2287defm V_CVT_PK_BF16_F32: VOP3OpSel_Real_gfx9 <0x268>;
2288defm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>;
2289defm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>;
2290
2291defm V_MINIMUM3_F32 : VOP3_Real_vi <0x2a8>;
2292defm V_MAXIMUM3_F32 : VOP3_Real_vi <0x2a9>;
2293
2294defm V_BITOP3_B16         : VOP3_Real_BITOP3_gfx9<0x233, "v_bitop3_b16">;
2295defm V_BITOP3_B32         : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
2296let OtherPredicates = [HasFP8ConversionScaleInsts] in {
2297defm V_CVT_SCALEF32_SR_FP8_BF16 : VOP3OpSel_Real_gfx9<0x246>;
2298defm V_CVT_SCALEF32_SR_FP8_F16 : VOP3OpSel_Real_gfx9<0x242>;
2299defm V_CVT_SCALEF32_SR_FP8_F32 : VOP3OpSel_Real_gfx9<0x237>;
2300defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
2301defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
2302defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
2303defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3OpSel_Real_gfx9 <0x239>;
2304defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3OpSel_Real_gfx9 <0x240>;
2305defm V_CVT_SCALEF32_PK_FP8_BF16: VOP3OpSel_Real_gfx9 <0x244>;
2306defm V_CVT_SCALEF32_PK_F16_FP8  : VOP3OpSel_Real_gfx9<0x248>;
2307defm V_CVT_SCALEF32_PK_BF16_FP8 : VOP3OpSel_Real_gfx9<0x269>;
2308}
2309let OtherPredicates = [HasBF8ConversionScaleInsts] in {
2310defm V_CVT_SCALEF32_SR_BF8_BF16 : VOP3OpSel_Real_gfx9<0x247>;
2311defm V_CVT_SCALEF32_SR_BF8_F16 : VOP3OpSel_Real_gfx9<0x243>;
2312defm V_CVT_SCALEF32_SR_BF8_F32 : VOP3OpSel_Real_gfx9<0x238>;
2313defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
2314defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>;
2315defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
2316defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>;
2317defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3OpSel_Real_gfx9 <0x241>;
2318defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>;
2319defm V_CVT_SCALEF32_PK_F16_BF8  : VOP3OpSel_Real_gfx9<0x249>;
2320defm V_CVT_SCALEF32_PK_BF16_BF8 : VOP3OpSel_Real_gfx9<0x26a>;
2321}
2322let OtherPredicates = [HasFP4ConversionScaleInsts] in {
2323defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>;
2324defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>;
2325defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>;
2326defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>;
2327defm V_CVT_SCALEF32_PK_FP4_F16 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x24c>;
2328defm V_CVT_SCALEF32_PK_FP4_BF16: VOP3OpSel_Real_gfx9_forced_opsel2 <0x24d>;
2329defm V_CVT_SCALEF32_SR_PK_FP4_F16:  VOP3OpSel_Real_gfx9 <0x24e>;
2330defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3OpSel_Real_gfx9 <0x24f>;
2331defm V_CVT_SCALEF32_SR_PK_FP4_F32:  VOP3OpSel_Real_gfx9 <0x23e>;
2332}
2333let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
2334defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, "v_cvt_scalef32_pk32_f32_fp6">;
2335defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3_Real_gfx9<0x257, "v_cvt_scalef32_pk32_f32_bf6">;
2336defm V_CVT_SCALEF32_PK32_F16_FP6  : VOP3_Real_gfx9<0x260, "v_cvt_scalef32_pk32_f16_fp6">;
2337defm V_CVT_SCALEF32_PK32_BF16_FP6 : VOP3_Real_gfx9<0x261, "v_cvt_scalef32_pk32_bf16_fp6">;
2338defm V_CVT_SCALEF32_PK32_F16_BF6  : VOP3_Real_gfx9<0x262, "v_cvt_scalef32_pk32_f16_bf6">;
2339defm V_CVT_SCALEF32_PK32_BF16_BF6 : VOP3_Real_gfx9<0x263, "v_cvt_scalef32_pk32_bf16_bf6">;
2340}
2341
2342let OtherPredicates = [HasF16BF16ToFP6BF6ConversionScaleInsts] in {
2343defm V_CVT_SCALEF32_PK32_FP6_F16  : VOP3_Real_gfx9<0x258, "v_cvt_scalef32_pk32_fp6_f16">;
2344defm V_CVT_SCALEF32_PK32_FP6_BF16 : VOP3_Real_gfx9<0x259, "v_cvt_scalef32_pk32_fp6_bf16">;
2345defm V_CVT_SCALEF32_PK32_BF6_F16  : VOP3_Real_gfx9<0x25a, "v_cvt_scalef32_pk32_bf6_f16">;
2346defm V_CVT_SCALEF32_PK32_BF6_BF16 : VOP3_Real_gfx9<0x25b, "v_cvt_scalef32_pk32_bf6_bf16">;
2347defm V_CVT_SCALEF32_SR_PK32_BF6_BF16  : VOP3_Real_gfx9<0x25f, "v_cvt_scalef32_sr_pk32_bf6_bf16">;
2348defm V_CVT_SCALEF32_SR_PK32_BF6_F16   : VOP3_Real_gfx9<0x25e, "v_cvt_scalef32_sr_pk32_bf6_f16">;
2349defm V_CVT_SCALEF32_SR_PK32_BF6_F32   : VOP3_Real_gfx9<0x255, "v_cvt_scalef32_sr_pk32_bf6_f32">;
2350defm V_CVT_SCALEF32_SR_PK32_FP6_BF16  : VOP3_Real_gfx9<0x25d, "v_cvt_scalef32_sr_pk32_fp6_bf16">;
2351defm V_CVT_SCALEF32_SR_PK32_FP6_F16   : VOP3_Real_gfx9<0x25c, "v_cvt_scalef32_sr_pk32_fp6_f16">;
2352defm V_CVT_SCALEF32_SR_PK32_FP6_F32   : VOP3_Real_gfx9<0x254, "v_cvt_scalef32_sr_pk32_fp6_f32">;
2353}
2354
2355let OtherPredicates = [HasF32ToF16BF16ConversionSRInsts] in {
2356defm V_CVT_SR_F16_F32 : VOP3OpSel_Real_gfx9 <0x2a6>;
2357defm V_CVT_SR_BF16_F32: VOP3OpSel_Real_gfx9 <0x2a7>;
2358}
2359
2360defm V_ASHR_PK_I8_I32 : VOP3OpSel_Real_gfx9 <0x265>;
2361defm V_ASHR_PK_U8_I32 : VOP3OpSel_Real_gfx9 <0x266>;
2362let OtherPredicates = [HasCvtPkF16F32Inst] in {
2363defm V_CVT_PK_F16_F32 : VOP3_Real_gfx9<0x267, "v_cvt_pk_f16_f32">;
2364}
2365
2366defm V_CVT_SCALEF32_2XPK16_FP6_F32 : VOP3_Real_gfx9<0x252, "v_cvt_scalef32_2xpk16_fp6_f32">;
2367defm V_CVT_SCALEF32_2XPK16_BF6_F32 : VOP3_Real_gfx9<0x253, "v_cvt_scalef32_2xpk16_bf6_f32">;
2368