xref: /llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td (revision 5e007afa9d4f175decc328ee89533a5fe89be99b)
1//===-- SIInstrInfo.td -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def isWave32 : Predicate<"Subtarget->isWave32()">,
10  AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
11def isWave64 : Predicate<"Subtarget->isWave64()">,
12  AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
13
14class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
15    : MnemonicAlias<From, To, VariantName>, PredicateControl;
16
17// Except for the NONE field, this must be kept in sync with the
18// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the
19// getMCOpcodeGen table.
20def SIEncodingFamily {
21  int NONE = -1;
22  int SI = 0;
23  int VI = 1;
24  int SDWA = 2;
25  int SDWA9 = 3;
26  int GFX80 = 4;
27  int GFX9 = 5;
28  int GFX10 = 6;
29  int SDWA10 = 7;
30  int GFX90A = 8;
31  int GFX940 = 9;
32  int GFX11 = 10;
33  int GFX12 = 11;
34}
35
36//===----------------------------------------------------------------------===//
37// Subtarget info
38//===----------------------------------------------------------------------===//
39
40class GFXGen<Predicate pred, string dn, string suffix, int sub> {
41  Predicate AssemblerPredicate = pred;
42  string DecoderNamespace = dn;
43  string Suffix = suffix;
44  int Subtarget = sub;
45}
46
47def GFX12Gen : GFXGen<isGFX12Only, "GFX12", "_gfx12", SIEncodingFamily.GFX12>;
48def GFX11Gen : GFXGen<isGFX11Only, "GFX11", "_gfx11", SIEncodingFamily.GFX11>;
49def GFX10Gen : GFXGen<isGFX10Only, "GFX10", "_gfx10", SIEncodingFamily.GFX10>;
50
51//===----------------------------------------------------------------------===//
52// SI DAG Nodes
53//===----------------------------------------------------------------------===//
54
55def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
56
57def SDTSBufferLoad : SDTypeProfile<1, 3,
58    [                    // vdata
59     SDTCisVT<1, v4i32>, // rsrc
60     SDTCisVT<2, i32>,   // offset(imm)
61     SDTCisVT<3, i32>]>; // cachepolicy
62
63def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", SDTSBufferLoad,
64                            [SDNPMayLoad, SDNPMemOperand]>;
65
66def SIsbuffer_load_byte : SDNode<"AMDGPUISD::SBUFFER_LOAD_BYTE", SDTSBufferLoad,
67                                 [SDNPMayLoad, SDNPMemOperand]>;
68
69def SIsbuffer_load_ubyte
70    : SDNode<"AMDGPUISD::SBUFFER_LOAD_UBYTE", SDTSBufferLoad,
71             [SDNPMayLoad, SDNPMemOperand]>;
72
73def SIsbuffer_load_short
74    : SDNode<"AMDGPUISD::SBUFFER_LOAD_SHORT", SDTSBufferLoad,
75             [SDNPMayLoad, SDNPMemOperand]>;
76
77def SIsbuffer_load_ushort
78    : SDNode<"AMDGPUISD::SBUFFER_LOAD_USHORT", SDTSBufferLoad,
79             [SDNPMayLoad, SDNPMemOperand]>;
80
81def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
82  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
83  [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
84>;
85
86def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
87  SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
88]>;
89
90// load_d16_{lo|hi} ptr, tied_input
91def SIload_d16 : SDTypeProfile<1, 2, [
92  SDTCisPtrTy<1>,
93  SDTCisSameAs<0, 2>
94]>;
95
96
97def SDTtbuffer_load : SDTypeProfile<1, 8,
98  [                     // vdata
99   SDTCisVT<1, v4i32>,  // rsrc
100   SDTCisVT<2, i32>,    // vindex(VGPR)
101   SDTCisVT<3, i32>,    // voffset(VGPR)
102   SDTCisVT<4, i32>,    // soffset(SGPR)
103   SDTCisVT<5, i32>,    // offset(imm)
104   SDTCisVT<6, i32>,    // format(imm)
105   SDTCisVT<7, i32>,    // cachepolicy, swizzled buffer(imm)
106   SDTCisVT<8, i1>      // idxen(imm)
107  ]>;
108
109def SItbuffer_load :   SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load,
110                              [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
111def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16",
112                                SDTtbuffer_load,
113                                [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>;
114
115def SDTtbuffer_store : SDTypeProfile<0, 9,
116    [                     // vdata
117     SDTCisVT<1, v4i32>,  // rsrc
118     SDTCisVT<2, i32>,    // vindex(VGPR)
119     SDTCisVT<3, i32>,    // voffset(VGPR)
120     SDTCisVT<4, i32>,    // soffset(SGPR)
121     SDTCisVT<5, i32>,    // offset(imm)
122     SDTCisVT<6, i32>,    // format(imm)
123     SDTCisVT<7, i32>,    // cachepolicy, swizzled buffer(imm)
124     SDTCisVT<8, i1>      // idxen(imm)
125    ]>;
126
127def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store,
128                             [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
129def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16",
130                                SDTtbuffer_store,
131                                [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
132
133def SDTBufferLoad : SDTypeProfile<1, 7,
134    [                    // vdata
135     SDTCisVT<1, v4i32>, // rsrc
136     SDTCisVT<2, i32>,   // vindex(VGPR)
137     SDTCisVT<3, i32>,   // voffset(VGPR)
138     SDTCisVT<4, i32>,   // soffset(SGPR)
139     SDTCisVT<5, i32>,   // offset(imm)
140     SDTCisVT<6, i32>,   // cachepolicy, swizzled buffer(imm)
141     SDTCisVT<7, i1>]>;  // idxen(imm)
142
143def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
144                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
145def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad,
146                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
147def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad,
148                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
149def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad,
150                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
151def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad,
152                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
153def SIbuffer_load_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_TFE", SDTBufferLoad,
154                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
155def SIbuffer_load_ubyte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE_TFE", SDTBufferLoad,
156                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
157def SIbuffer_load_ushort_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT_TFE", SDTBufferLoad,
158                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
159def SIbuffer_load_byte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE_TFE", SDTBufferLoad,
160                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
161def SIbuffer_load_short_tfe: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT_TFE", SDTBufferLoad,
162                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
163def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
164                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
165def SIbuffer_load_format_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_TFE", SDTBufferLoad,
166                               [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
167def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16",
168                                SDTBufferLoad,
169                                [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
170
171def SDTBufferStore : SDTypeProfile<0, 8,
172    [                    // vdata
173     SDTCisVT<1, v4i32>, // rsrc
174     SDTCisVT<2, i32>,   // vindex(VGPR)
175     SDTCisVT<3, i32>,   // voffset(VGPR)
176     SDTCisVT<4, i32>,   // soffset(SGPR)
177     SDTCisVT<5, i32>,   // offset(imm)
178     SDTCisVT<6, i32>,   // cachepolicy, swizzled buffer(imm)
179     SDTCisVT<7, i1>]>;  // idxen(imm)
180
181def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
182                             [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
183def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE",
184                         SDTBufferStore,
185                         [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
186def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT",
187                           SDTBufferStore,
188                           [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
189def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT",
190                            SDTBufferStore,
191                            [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
192def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16",
193                            SDTBufferStore,
194                            [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
195
196multiclass SDBufferAtomic<string opcode> {
197  def "" : SDNode <opcode,
198    SDTypeProfile<1, 8,
199         [SDTCisVT<2, v4i32>, // rsrc
200         SDTCisVT<3, i32>,   // vindex(VGPR)
201         SDTCisVT<4, i32>,   // voffset(VGPR)
202         SDTCisVT<5, i32>,   // soffset(SGPR)
203         SDTCisVT<6, i32>,   // offset(imm)
204         SDTCisVT<7, i32>,   // cachepolicy(imm)
205         SDTCisVT<8, i1>]>,  // idxen(imm)
206    [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
207  >;
208  def "_noret" : PatFrag<
209    (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset,
210      node:$offset, node:$cachepolicy, node:$idxen),
211    (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex,
212      node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
213      node:$idxen)> {
214    let HasNoUse = true;
215  }
216}
217
218defm SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">;
219defm SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">;
220defm SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">;
221defm SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">;
222defm SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">;
223defm SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">;
224defm SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">;
225defm SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">;
226defm SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">;
227defm SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">;
228defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">;
229defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">;
230defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">;
231defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">;
232defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">;
233defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">;
234defm SIbuffer_atomic_cond_sub_u32 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_COND_SUB_U32">;
235
236def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP",
237  SDTypeProfile<1, 9,
238    [SDTCisVT<3, v4i32>, // rsrc
239     SDTCisVT<4, i32>,   // vindex(VGPR)
240     SDTCisVT<5, i32>,   // voffset(VGPR)
241     SDTCisVT<6, i32>,   // soffset(SGPR)
242     SDTCisVT<7, i32>,   // offset(imm)
243     SDTCisVT<8, i32>,   // cachepolicy(imm)
244     SDTCisVT<9, i1>]>,  // idxen(imm)
245  [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
246>;
247
248def SIbuffer_atomic_cmpswap_noret : PatFrag<
249  (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset,
250    node:$soffset, node:$offset, node:$cachepolicy, node:$idxen),
251  (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex,
252    node:$voffset, node:$soffset, node:$offset, node:$cachepolicy,
253    node:$idxen)> {
254  let HasNoUse = true;
255}
256
257class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode,
258  SDTypeProfile<0, 2,
259      [SDTCisPtrTy<0>,     // vaddr
260       SDTCisVT<1, ty>]>,  // vdata
261  [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore]
262>;
263
264def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
265  SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
266>;
267
268def SIlds : SDNode<"AMDGPUISD::LDS",
269  SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
270>;
271
272def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO",
273  SIload_d16,
274  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
275>;
276
277def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8",
278  SIload_d16,
279  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
280>;
281
282def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8",
283  SIload_d16,
284  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
285>;
286
287def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI",
288  SIload_d16,
289  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
290>;
291
292def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8",
293  SIload_d16,
294  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
295>;
296
297def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8",
298  SIload_d16,
299  [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]
300>;
301
302def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE",
303  SDTypeProfile<0 ,1, [SDTCisInt<0>]>,
304  [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]
305>;
306
307
308// Optimize v_mfma_scale* instructions to avoid the scale if the
309// scales are known 0.
310class UnscaledMFMAOptimizationPat<SDPatternOperator intrin> : PatFrag<
311  (ops node:$srca, node:$srcb, node:$srcc,
312       node:$cbsz, node:$blgp),
313  (intrin $srca, $srcb, $srcc, $cbsz, $blgp,
314          srcvalue, 0, srcvalue, 0)
315>;
316
317def mfma_f32_16x16x128_f8f6f4 : UnscaledMFMAOptimizationPat<int_amdgcn_mfma_scale_f32_16x16x128_f8f6f4>;
318def mfma_f32_32x32x64_f8f6f4 : UnscaledMFMAOptimizationPat<int_amdgcn_mfma_scale_f32_32x32x64_f8f6f4>;
319
320//===----------------------------------------------------------------------===//
321// ValueType helpers
322//===----------------------------------------------------------------------===//
323
324class isIntType<ValueType SrcVT> {
325  bit ret = !and(SrcVT.isInteger, !ne(SrcVT.Value, i1.Value));
326}
327
328def SDTSBufferPrefetch : SDTypeProfile<0, 3,
329    [SDTCisVT<0, v4i32>, // rsrc
330     SDTCisVT<1, i32>,   // offset(imm)
331     SDTCisVT<2, i32>]>; // length
332
333def SIsbuffer_prefetch : SDNode<"AMDGPUISD::SBUFFER_PREFETCH_DATA", SDTSBufferPrefetch,
334                                [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]>;
335
336//===----------------------------------------------------------------------===//
337// SDNodes PatFrags for loads/stores with a glue input.
338// This is for SDNodes and PatFrag for local loads and stores to
339// enable s_mov_b32 m0, -1 to be glued to the memory instructions.
340//
341// These mirror the regular load/store PatFrags and rely on special
342// processing during Select() to add the glued copy.
343//
344//===----------------------------------------------------------------------===//
345
346def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad,
347  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
348>;
349
350def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad,
351  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
352>;
353
354def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> {
355  let IsLoad = 1;
356  let IsUnindexed = 1;
357}
358
359def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
360  let IsLoad = 1;
361  let IsNonExtLoad = 1;
362}
363
364def atomic_load_zext_glue :
365  PatFrag<(ops node:$ptr), (AMDGPUatomic_ld_glue node:$ptr)> {
366  let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
367  let IsZeroExtLoad = true;
368}
369
370def atomic_load_sext_glue :
371  PatFrag<(ops node:$ptr), (AMDGPUatomic_ld_glue node:$ptr)> {
372  let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
373  let IsSignExtLoad = true;
374}
375
376def atomic_load_8_glue : PatFrag<(ops node:$ptr),
377  (AMDGPUatomic_ld_glue node:$ptr)> {
378  let IsAtomic = 1;
379  let MemoryVT = i8;
380}
381
382def atomic_load_16_glue : PatFrag<(ops node:$ptr),
383  (AMDGPUatomic_ld_glue node:$ptr)> {
384  let IsAtomic = 1;
385  let MemoryVT = i16;
386}
387
388def atomic_load_32_glue : PatFrag<(ops node:$ptr),
389  (AMDGPUatomic_ld_glue node:$ptr)> {
390  let IsAtomic = 1;
391  let MemoryVT = i32;
392}
393
394def atomic_load_64_glue : PatFrag<(ops node:$ptr),
395  (AMDGPUatomic_ld_glue node:$ptr)> {
396  let IsAtomic = 1;
397  let MemoryVT = i64;
398}
399
400def atomic_load_zext_8_glue : PatFrag<(ops node:$ptr),
401  (atomic_load_zext_glue node:$ptr)> {
402  let IsAtomic = 1;
403  let MemoryVT = i8;
404}
405
406def atomic_load_sext_8_glue : PatFrag<(ops node:$ptr),
407  (atomic_load_sext_glue node:$ptr)> {
408  let IsAtomic = 1;
409  let MemoryVT = i8;
410}
411
412def atomic_load_zext_16_glue : PatFrag<(ops node:$ptr),
413  (atomic_load_zext_glue node:$ptr)> {
414  let IsAtomic = 1;
415  let MemoryVT = i16;
416}
417
418def atomic_load_sext_16_glue : PatFrag<(ops node:$ptr),
419  (atomic_load_sext_glue node:$ptr)> {
420  let IsAtomic = 1;
421  let MemoryVT = i16;
422}
423
424def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
425  let IsLoad = 1;
426  let IsAnyExtLoad = 1;
427}
428
429def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
430  let IsLoad = 1;
431  let IsSignExtLoad = 1;
432}
433
434def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
435  let IsLoad = 1;
436  let IsZeroExtLoad = 1;
437}
438
439def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
440  let IsLoad = 1;
441  let MemoryVT = i8;
442}
443
444def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
445  let IsLoad = 1;
446  let MemoryVT = i8;
447}
448
449def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> {
450  let IsLoad = 1;
451  let MemoryVT = i16;
452}
453
454def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> {
455  let IsLoad = 1;
456  let MemoryVT = i16;
457}
458
459def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
460  let IsLoad = 1;
461  let MemoryVT = i8;
462}
463
464def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> {
465  let IsLoad = 1;
466  let MemoryVT = i16;
467}
468
469
470let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
471def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> {
472  let IsNonExtLoad = 1;
473}
474
475def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>;
476def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>;
477def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>;
478
479def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>;
480def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>;
481def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>;
482} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces
483
484def load_align8_local_m0 : PatFrag<(ops node:$ptr),
485                                   (load_local_m0 node:$ptr)> {
486  let IsLoad = 1;
487  int MinAlignment = 8;
488}
489
490def load_align16_local_m0 : PatFrag<(ops node:$ptr),
491                                   (load_local_m0 node:$ptr)> {
492  let IsLoad = 1;
493  int MinAlignment = 16;
494}
495
496let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
497def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
498                                      (atomic_load_8_glue node:$ptr)>;
499def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
500                                      (atomic_load_16_glue node:$ptr)>;
501def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
502                                      (atomic_load_32_glue node:$ptr)>;
503def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
504                                       (atomic_load_64_glue node:$ptr)>;
505
506def atomic_load_zext_8_local_m0 : PatFrag<(ops node:$ptr),
507                                      (atomic_load_zext_8_glue node:$ptr)>;
508def atomic_load_sext_8_local_m0 : PatFrag<(ops node:$ptr),
509                                      (atomic_load_sext_8_glue node:$ptr)>;
510def atomic_load_zext_16_local_m0 : PatFrag<(ops node:$ptr),
511                                      (atomic_load_zext_16_glue node:$ptr)>;
512def atomic_load_sext_16_local_m0 : PatFrag<(ops node:$ptr),
513                                      (atomic_load_sext_16_glue node:$ptr)>;
514} // End let AddressSpaces = LoadAddress_local.AddrSpaces
515
516
517def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore,
518  [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
519>;
520
521def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore,
522  [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue]
523>;
524
525def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr),
526                                   (AMDGPUst_glue node:$val, node:$ptr)> {
527  let IsStore = 1;
528  let IsUnindexed = 1;
529}
530
531def store_glue : PatFrag<(ops node:$val, node:$ptr),
532                         (unindexedstore_glue node:$val, node:$ptr)> {
533  let IsStore = 1;
534  let IsTruncStore = 0;
535}
536
537def truncstore_glue : PatFrag<(ops node:$val, node:$ptr),
538  (unindexedstore_glue node:$val, node:$ptr)> {
539  let IsStore = 1;
540  let IsTruncStore = 1;
541}
542
543def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr),
544                           (truncstore_glue node:$val, node:$ptr)> {
545  let IsStore = 1;
546  let MemoryVT = i8;
547  let IsTruncStore = 1;
548}
549
550def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr),
551                           (truncstore_glue node:$val, node:$ptr)> {
552  let IsStore = 1;
553  let MemoryVT = i16;
554  let IsTruncStore = 1;
555}
556
557let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
558def store_local_m0 : PatFrag<(ops node:$val, node:$ptr),
559                             (store_glue node:$val, node:$ptr)>;
560def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
561                                    (truncstorei8_glue node:$val, node:$ptr)>;
562def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
563                                    (truncstorei16_glue node:$val, node:$ptr)>;
564}
565
566def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr),
567                                     (store_local_m0 node:$value, node:$ptr)>,
568                            Aligned<8> {
569  let IsStore = 1;
570}
571
572def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
573                                     (store_local_m0 node:$value, node:$ptr)>,
574                            Aligned<16> {
575  let IsStore = 1;
576}
577
578let PredicateCode = [{return cast<MemSDNode>(N)->getAlign() < 4;}],
579    GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}],
580    AddressSpaces = [ AddrSpaces.Local ] in {
581def load_align_less_than_4_local : PatFrag<(ops node:$ptr),
582                                           (load_local node:$ptr)> {
583  let IsLoad = 1;
584  let IsNonExtLoad = 1;
585}
586
587def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr),
588                                              (load_local_m0 node:$ptr)> {
589  let IsLoad = 1;
590  let IsNonExtLoad = 1;
591}
592
593def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr),
594                                             (store_local node:$value, node:$ptr)> {
595  let IsStore = 1;
596  let IsTruncStore = 0;
597}
598
599def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr),
600                                                (store_local_m0 node:$value, node:$ptr)> {
601  let IsStore = 1;
602  let IsTruncStore = 0;
603}
604}
605
606def atomic_store_8_glue : PatFrag <
607  (ops node:$ptr, node:$value),
608  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
609  let IsAtomic = 1;
610  let MemoryVT = i8;
611}
612
613def atomic_store_16_glue : PatFrag <
614  (ops node:$ptr, node:$value),
615  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
616  let IsAtomic = 1;
617  let MemoryVT = i16;
618}
619
620def atomic_store_32_glue : PatFrag <
621  (ops node:$ptr, node:$value),
622  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
623  let IsAtomic = 1;
624  let MemoryVT = i32;
625}
626
627def atomic_store_64_glue : PatFrag <
628  (ops node:$ptr, node:$value),
629  (AMDGPUatomic_st_glue node:$ptr, node:$value)> {
630  let IsAtomic = 1;
631  let MemoryVT = i64;
632}
633
634let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in {
635def atomic_store_8_local_m0 : PatFrag<(ops node:$val, node:$ptr),
636                                       (atomic_store_8_glue node:$val, node:$ptr)>;
637def atomic_store_16_local_m0 : PatFrag<(ops node:$val, node:$ptr),
638                                       (atomic_store_16_glue node:$val, node:$ptr)>;
639def atomic_store_32_local_m0 : PatFrag<(ops node:$val, node:$ptr),
640                                       (atomic_store_32_glue node:$val, node:$ptr)>;
641def atomic_store_64_local_m0 : PatFrag<(ops node:$val, node:$ptr),
642                                       (atomic_store_64_glue node:$val, node:$ptr)>;
643} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces
644
645
646//===----------------------------------------------------------------------===//
647// SDNodes PatFrags for a16 loads and stores with 3 components.
648// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory
649// load/store size.
650//===----------------------------------------------------------------------===//
651
652class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
653  (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
654            node:$auxiliary, node:$idxen),
655  (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
656            node:$auxiliary, node:$idxen)> {
657  let IsLoad = 1;
658  let MemoryVT = vt;
659}
660
661class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
662  (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
663            node:$auxiliary, node:$idxen),
664  (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
665            node:$auxiliary, node:$idxen)> {
666  let IsStore = 1;
667  let MemoryVT = vt;
668}
669
670class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag <
671  (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
672            node:$format, node:$auxiliary, node:$idxen),
673  (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
674            node:$format, node:$auxiliary, node:$idxen)> {
675  let IsLoad = 1;
676  let MemoryVT = vt;
677}
678
679class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag <
680  (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
681            node:$format, node:$auxiliary, node:$idxen),
682  (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset,
683            node:$format, node:$auxiliary, node:$idxen)> {
684  let IsStore = 1;
685  let MemoryVT = vt;
686}
687
688//===----------------------------------------------------------------------===//
689// SDNodes PatFrags for d16 loads
690//===----------------------------------------------------------------------===//
691
692class LoadD16Frag <SDPatternOperator op> : PatFrag<
693  (ops node:$ptr, node:$tied_in),
694  (op node:$ptr, node:$tied_in)> {
695  let IsLoad = 1;
696}
697
698foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in {
699let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in {
700
701def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>;
702
703def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> {
704  let MemoryVT = i8;
705}
706
707def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> {
708  let MemoryVT = i8;
709}
710
711def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>;
712
713def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> {
714  let MemoryVT = i8;
715}
716
717def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> {
718  let MemoryVT = i8;
719}
720
721} // End let AddressSpaces = ...
722} // End foreach AddrSpace
723
724def lshr_rev : PatFrag <
725  (ops node:$src1, node:$src0),
726  (srl $src0, $src1)
727>;
728
729def ashr_rev : PatFrag <
730  (ops node:$src1, node:$src0),
731  (sra $src0, $src1)
732>;
733
734def lshl_rev : PatFrag <
735  (ops node:$src1, node:$src0),
736  (shl $src0, $src1)
737>;
738
739def add_ctpop : PatFrag <
740  (ops node:$src0, node:$src1),
741  (add (ctpop $src0), $src1)
742>;
743
744def xnor : PatFrag <
745  (ops node:$src0, node:$src1),
746  (not (xor $src0, $src1))
747>;
748
749foreach I = 1-4 in {
750def shl#I#_add : PatFrag <
751  (ops node:$src0, node:$src1),
752  (add (shl_oneuse $src0, (i32 I)), $src1)> {
753  // FIXME: Poor substitute for disabling pattern in SelectionDAG
754  let PredicateCode = [{return false;}];
755  let GISelPredicateCode = [{return true;}];
756}
757}
758
759multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
760                            SDTypeProfile tc = SDTAtomic2,
761                            bit IsInt = 1> {
762
763  def _glue : SDNode <
764    !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc,
765    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue]
766  >;
767
768  let AddressSpaces = StoreAddress_local.AddrSpaces in {
769
770    if IsInt then {
771      defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
772      defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
773    } else {
774      defm _local_m0 : binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
775      defm _local_m0 : noret_binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
776     }
777  }
778
779  let AddressSpaces = StoreAddress_region.AddrSpaces in {
780    if IsInt then {
781      defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
782      defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>;
783    } else {
784      defm _region_m0 : binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
785      defm _region_m0 : noret_binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>;
786    }
787  }
788}
789
790defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
791defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
792defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">;
793defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">;
794defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
795defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
796defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
797defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">;
798defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">;
799defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">;
800defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">;
801defm atomic_swap : SIAtomicM0Glue2 <"SWAP">;
802defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
803defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 0, SDTAtomic2_f32, 0>;
804defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 0, SDTAtomic2_f32, 0>;
805
806def as_i1timm : SDNodeXForm<timm, [{
807  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
808}]>;
809
810def as_i1timm_zext : SDNodeXForm<timm, [{
811  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
812}]>;
813
814def as_i8imm : SDNodeXForm<imm, [{
815  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
816}]>;
817
818def as_i8timm : SDNodeXForm<timm, [{
819  return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
820}]>;
821
822def as_i16imm : SDNodeXForm<imm, [{
823  return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
824}]>;
825
826def as_i16timm : SDNodeXForm<timm, [{
827  // Explicit cast, as this is used with both signed and unsigned immediates.
828  return CurDAG->getSignedTargetConstant(int16_t(N->getSExtValue()), SDLoc(N),
829                                         MVT::i16);
830}]>;
831
832def as_i32imm: SDNodeXForm<imm, [{
833  return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
834}]>;
835
836def as_i32timm: SDNodeXForm<timm, [{
837  return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
838}]>;
839
840def as_i64imm: SDNodeXForm<imm, [{
841  return CurDAG->getSignedTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
842}]>;
843
844def cond_as_i32imm: SDNodeXForm<cond, [{
845  return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32);
846}]>;
847
848// Copied from the AArch64 backend:
849def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
850return CurDAG->getTargetConstant(
851  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
852}]>;
853
854def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
855  auto FI = cast<FrameIndexSDNode>(N);
856  return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32);
857}]>;
858
859// Copied from the AArch64 backend:
860def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
861return CurDAG->getTargetConstant(
862  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
863}]>;
864
865def as_hw_round_mode : SDNodeXForm<timm, [{
866  // "round.towardzero" -> TowardZero 0        -> FP_ROUND_ROUND_TO_ZERO 3
867  // "round.tonearest"  -> NearestTiesToEven 1 -> FP_ROUND_ROUND_TO_NEAREST 0
868  // "round.upward"     -> TowardPositive 2    -> FP_ROUND_ROUND_TO_INF 1
869  // "round.downward    -> TowardNegative 3    -> FP_ROUND_ROUND_TO_NEGINF 2
870  return CurDAG->getTargetConstant((N->getSExtValue() + 3) % 4, SDLoc(N),
871                                    MVT::i32);
872}]>;
873
874def SupportedRoundMode : TImmLeaf<i32, [{
875  return Imm == (int)RoundingMode::TowardZero ||
876         Imm == (int)RoundingMode::NearestTiesToEven ||
877         Imm == (int)RoundingMode::TowardPositive ||
878         Imm == (int)RoundingMode::TowardNegative;
879}]>;
880
881class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
882  uint64_t Imm = N->getZExtValue();
883  unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
884  return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1);
885}]>;
886
887def SIMM16bit : TImmLeaf <i32,
888  [{return isInt<16>(Imm) || isUInt<16>(Imm);}],
889  as_i16timm
890>;
891
892def i64imm_32bit : ImmLeaf<i64, [{
893  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
894}]>;
895
896def InlineImm64 : IntImmLeaf<i64, [{
897  return isInlineImmediate(Imm);
898}]>;
899
900def InlineImmFP32 : FPImmLeaf<f32, [{
901  return isInlineImmediate(Imm);
902}]>;
903
904def InlineImmFP64 : FPImmLeaf<f64, [{
905  return isInlineImmediate(Imm);
906}]>;
907
908
909class VGPRImm <dag frag> : PatLeaf<frag, [{
910  return isVGPRImm(N);
911}]> {
912  let GISelPredicateCode = [{return true;}];
913}
914
915def NegateImm : SDNodeXForm<imm, [{
916  return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
917}]>;
918
919// TODO: When FP inline imm values work?
920def NegSubInlineConst32 : ImmLeaf<i32, [{
921  return Imm < -16 && Imm >= -64;
922}], NegateImm>;
923
924def NegSubInlineIntConst16 : ImmLeaf<i16, [{
925  return Imm < -16 && Imm >= -64;
926}], NegateImm>;
927
928def ShiftAmt32Imm : ImmLeaf <i32, [{
929  return Imm < 32;
930}]>;
931
932def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{
933  return fp16SrcZerosHighBits(N->getOpcode());
934}]>;
935
936def MFMALdScaleXForm : SDNodeXForm<timm, [{
937  unsigned Val = N->getZExtValue();
938  unsigned New = 0;
939  if (Val & 0x1)
940    New |= SISrcMods::OP_SEL_0;
941  if (Val & 0x2)
942    New |= SISrcMods::OP_SEL_1;
943  return CurDAG->getTargetConstant(New, SDLoc(N), MVT::i32);
944}]>;
945
946def is_canonicalized : PatLeaf<(fAny srcvalue:$src), [{
947  const SITargetLowering &Lowering =
948      *static_cast<const SITargetLowering *>(getTargetLowering());
949  return Lowering.isCanonicalized(*CurDAG, SDValue(N, 0));
950}]> {
951  let GISelPredicateCode = [{
952    const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
953        MF.getSubtarget().getTargetLowering());
954    const MachineOperand &Dst = MI.getOperand(0);
955    assert(Dst.isDef());
956    return TLI->isCanonicalized(Dst.getReg(), MF);
957   }];
958}
959
960//===----------------------------------------------------------------------===//
961// MUBUF/SMEM Patterns
962//===----------------------------------------------------------------------===//
963
964def extract_cpol : SDNodeXForm<timm, [{
965  return CurDAG->getTargetConstant(
966      N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
967                               ? AMDGPU::CPol::ALL
968                               : AMDGPU::CPol::ALL_pregfx12),
969      SDLoc(N), MVT::i8);
970}]>;
971
972def extract_swz : SDNodeXForm<timm, [{
973  const bool Swizzle =
974      N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
975                               ? AMDGPU::CPol::SWZ
976                               : AMDGPU::CPol::SWZ_pregfx12);
977  return CurDAG->getTargetConstant(Swizzle, SDLoc(N), MVT::i8);
978}]>;
979
980def extract_cpol_set_glc : SDNodeXForm<timm, [{
981  const uint32_t cpol = N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12
982                               ? AMDGPU::CPol::ALL
983                               : AMDGPU::CPol::ALL_pregfx12);
984  return CurDAG->getTargetConstant(cpol | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8);
985}]>;
986
987//===----------------------------------------------------------------------===//
988// Custom Operands
989//===----------------------------------------------------------------------===//
990
991def SOPPBrTarget : CustomOperand<OtherVT> {
992  let PrintMethod = "printOperand";
993  let EncoderMethod = "getSOPPBrEncoding";
994  let DecoderMethod = "decodeSOPPBrTarget";
995  let OperandType = "OPERAND_PCREL";
996}
997
998def si_ga : Operand<iPTR>;
999
1000def InterpSlot : CustomOperand<i32>;
1001
1002// It appears to be necessary to create a separate operand for this to
1003// be able to parse attr<num> with no space.
1004def InterpAttr : CustomOperand<i32>;
1005
1006def InterpAttrChan : ImmOperand<i32>;
1007
1008def SplitBarrier : ImmOperand<i32> {
1009  let OperandNamespace = "AMDGPU";
1010  let OperandType = "OPERAND_INLINE_SPLIT_BARRIER_INT32";
1011  let DecoderMethod = "decodeSplitBarrier";
1012  let PrintMethod = "printOperand";
1013}
1014
1015def VReg32OrOffClass : AsmOperandClass {
1016  let Name = "VReg32OrOff";
1017  let ParserMethod = "parseVReg32OrOff";
1018}
1019
1020def SendMsg : CustomOperand<i32>;
1021
1022def Swizzle : CustomOperand<i16, 1>;
1023
1024def Endpgm : CustomOperand<i16, 1>;
1025
1026def SWaitCnt : CustomOperand<i32>;
1027
1028def DepCtr : CustomOperand<i32>;
1029
1030def SDelayALU : CustomOperand<i32>;
1031
1032include "SIInstrFormats.td"
1033include "VIInstrFormats.td"
1034
1035def BoolReg : AsmOperandClass {
1036  let Name = "BoolReg";
1037  let ParserMethod = "parseBoolReg";
1038  let RenderMethod = "addRegOperands";
1039}
1040
1041class BoolRC : RegisterOperand<SReg_1> {
1042  let ParserMatchClass = BoolReg;
1043  let DecoderMethod = "decodeBoolReg";
1044}
1045
1046def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
1047  let ParserMatchClass = BoolReg;
1048  let DecoderMethod = "decodeBoolReg";
1049}
1050
1051def VOPDstS64orS32 : BoolRC {
1052  let PrintMethod = "printVOPDst";
1053}
1054
1055// SCSrc_i1 is the operand for pseudo instructions only.
1056// Boolean immediates shall not be exposed to codegen instructions.
1057def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> {
1058  let OperandNamespace = "AMDGPU";
1059  let OperandType = "OPERAND_REG_IMM_INT32";
1060  let ParserMatchClass = BoolReg;
1061  let DecoderMethod = "decodeBoolReg";
1062}
1063
1064// ===----------------------------------------------------------------------===//
1065// ExpSrc* Special cases for exp src operands which are printed as
1066// "off" depending on en operand.
1067// ===----------------------------------------------------------------------===//
1068
1069def ExpSrc0 : RegisterOperand<VGPR_32> {
1070  let PrintMethod = "printExpSrc0";
1071  let ParserMatchClass = VReg32OrOffClass;
1072}
1073
1074def ExpSrc1 : RegisterOperand<VGPR_32> {
1075  let PrintMethod = "printExpSrc1";
1076  let ParserMatchClass = VReg32OrOffClass;
1077}
1078
1079def ExpSrc2 : RegisterOperand<VGPR_32> {
1080  let PrintMethod = "printExpSrc2";
1081  let ParserMatchClass = VReg32OrOffClass;
1082}
1083
1084def ExpSrc3 : RegisterOperand<VGPR_32> {
1085  let PrintMethod = "printExpSrc3";
1086  let ParserMatchClass = VReg32OrOffClass;
1087}
1088
1089class SDWASrc<ValueType vt> : RegisterOperand<VS_32> {
1090  let OperandNamespace = "AMDGPU";
1091  string Type = !if(vt.isFP, "FP", "INT");
1092  let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size;
1093  let DecoderMethod = "decodeSDWASrc"#vt.Size;
1094  let EncoderMethod = "getSDWASrcEncoding";
1095}
1096
1097def SDWASrc_i32 : SDWASrc<i32>;
1098def SDWASrc_i16 : SDWASrc<i16>;
1099def SDWASrc_f32 : SDWASrc<f32>;
1100def SDWASrc_f16 : SDWASrc<f16>;
1101
1102def SDWAVopcDst : BoolRC {
1103  let OperandNamespace = "AMDGPU";
1104  let OperandType = "OPERAND_SDWA_VOPC_DST";
1105  let EncoderMethod = "getSDWAVopcDstEncoding";
1106  let DecoderMethod = "decodeSDWAVopcDst";
1107  let PrintMethod = "printVOPDst";
1108}
1109
1110class NamedIntOperand<string prefix, bit Optional = 1, string name = NAME>
1111    : CustomOperand<i32, Optional, name> {
1112  string Prefix = prefix;
1113
1114  let PredicateMethod =
1115    "getPredicate([](const AMDGPUOperand &Op) -> bool { "#
1116    "return Op.isImmTy(AMDGPUOperand::"#ImmTy#"); })";
1117
1118  string Validator = "[](int64_t V) { return true; }";
1119  string ConvertMethod = "[](int64_t &V) { return "#Validator#"(V); }";
1120  let ParserMethod =
1121    "[this](OperandVector &Operands) -> ParseStatus { "#
1122    "return parseIntWithPrefix(\""#Prefix#"\", Operands, "#
1123    "AMDGPUOperand::"#ImmTy#", "#ConvertMethod#"); }";
1124
1125  bit PrintInHex = 0;
1126  bit AlwaysPrint = 0;
1127  let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, "
1128                    "const MCSubtargetInfo &STI, raw_ostream &O) { "
1129                    "printNamedInt(MI, OpNo, STI, O, \""#Prefix#"\", "#
1130                    !if(PrintInHex, "true", "false")#", "#
1131                    !if(AlwaysPrint, "true", "false")#"); }";
1132}
1133
1134class NamedBitOperand<string Id, string Name = NAME>
1135    : CustomOperand<i1, 1, Name> {
1136  let PredicateMethod = "isImmTy<AMDGPUOperand::"#ImmTy#">";
1137  let ParserMethod =
1138    "[this](OperandVector &Operands) -> ParseStatus { "#
1139    "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }";
1140  let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, "#
1141    "const MCSubtargetInfo &STI, raw_ostream &O) { "#
1142    "printNamedBit(MI, OpNo, O, \""#Id#"\"); }";
1143}
1144
1145class DefaultOperand<CustomOperand Op, int Value>
1146  : OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>,
1147    CustomOperandProps<1> {
1148  let ParserMatchClass = Op.ParserMatchClass;
1149  let PrintMethod = Op.PrintMethod;
1150}
1151
1152class SDWAOperand<string Id, string Name = NAME>
1153    : CustomOperand<i32, 1, Name> {
1154  let ParserMethod =
1155    "[this](OperandVector &Operands) -> ParseStatus { "#
1156    "return parseSDWASel(Operands, \""#Id#"\", AMDGPUOperand::"#ImmTy#"); }";
1157}
1158
1159class ArrayOperand0<string Id, string Name = NAME>
1160  : OperandWithDefaultOps<i32, (ops (i32 0))>,
1161    CustomOperandProps<1, Name> {
1162  let ParserMethod =
1163    "[this](OperandVector &Operands) -> ParseStatus { "#
1164    "return parseOperandArrayWithPrefix(\""#Id#"\", Operands, "#
1165    "AMDGPUOperand::"#ImmTy#"); }";
1166}
1167
1168let ImmTy = "ImmTyOffset" in
1169def flat_offset : CustomOperand<i32, 1, "FlatOffset">;
1170let PrintMethod = "printOffset" in
1171def Offset : NamedIntOperand<"offset">;
1172let Validator = "isUInt<8>" in {
1173def Offset0 : NamedIntOperand<"offset0">;
1174def Offset1 : NamedIntOperand<"offset1">;
1175}
1176
1177def gds : NamedBitOperand<"gds", "GDS">;
1178
1179def omod : CustomOperand<i32, 1, "OModSI">;
1180def omod0 : DefaultOperand<omod, 0>;
1181
1182// We need to make the cases with a default of 0 distinct from no
1183// default to help deal with some cases where the operand appears
1184// before a mandatory operand.
1185def Clamp : NamedBitOperand<"clamp">;
1186def Clamp0 : DefaultOperand<Clamp, 0>;
1187def highmod : NamedBitOperand<"high", "High">;
1188
1189def CPol : CustomOperand<i32, 1>;
1190def CPol_0 : DefaultOperand<CPol, 0>;
1191def CPol_GLC1 : DefaultOperand<CPol, 1>;
1192def CPol_GLC : ValuePredicatedOperand<CPol, "Op.getImm() & CPol::GLC">;
1193def CPol_NonGLC : ValuePredicatedOperand<CPol, "!(Op.getImm() & CPol::GLC)", 1>;
1194def CPol_GLC_WithDefault : DefaultOperand<CPol_GLC, !shl(1, CPolBit.GLC)>;
1195def CPol_NonGLC_WithDefault : DefaultOperand<CPol_NonGLC, 0>;
1196
1197def TFE : NamedBitOperand<"tfe">;
1198def UNorm : NamedBitOperand<"unorm">;
1199def DA : NamedBitOperand<"da">;
1200def R128A16 : CustomOperand<i1, 1>;
1201def A16 : NamedBitOperand<"a16">;
1202def D16 : NamedBitOperand<"d16">;
1203def LWE : NamedBitOperand<"lwe">;
1204def exp_compr : NamedBitOperand<"compr", "ExpCompr">;
1205def exp_vm : NamedBitOperand<"vm", "ExpVM">;
1206
1207def FORMAT : CustomOperand<i8>;
1208
1209let PrintInHex = 1 in
1210def DMask : NamedIntOperand<"dmask">;
1211
1212def Dim : CustomOperand<i8, /*optional=*/1>;
1213
1214def dst_sel : SDWAOperand<"dst_sel", "SDWADstSel">;
1215def src0_sel : SDWAOperand<"src0_sel", "SDWASrc0Sel">;
1216def src1_sel : SDWAOperand<"src1_sel", "SDWASrc1Sel">;
1217def dst_unused : CustomOperand<i32, 1, "SDWADstUnused">;
1218
1219def op_sel0 : ArrayOperand0<"op_sel", "OpSel">;
1220def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">;
1221def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">;
1222def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">;
1223
1224def IndexKey16bit : CustomOperand<i32, 1>;
1225def IndexKey8bit : CustomOperand<i32, 1>;
1226
1227def dpp8 : CustomOperand<i32, 0, "DPP8">;
1228def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">;
1229
1230let DefaultValue = "0xf", PrintInHex = 1, AlwaysPrint = 1 in {
1231def DppRowMask : NamedIntOperand<"row_mask">;
1232def DppBankMask : NamedIntOperand<"bank_mask">;
1233}
1234def DppBoundCtrl : NamedIntOperand<"bound_ctrl"> {
1235  let ConvertMethod = "[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }";
1236  let PrintMethod = "printDppBoundCtrl";
1237}
1238
1239let DecoderMethod = "decodeDpp8FI", PrintMethod = "printDppFI" in
1240def Dpp8FI : NamedIntOperand<"fi", 1, "DppFI">;
1241let PrintMethod = "printDppFI" in
1242def Dpp16FI : NamedIntOperand<"fi", 1, "DppFI">;
1243
1244def blgp : CustomOperand<i32, 1, "BLGP">;
1245def CBSZ : NamedIntOperand<"cbsz"> {
1246  let Validator = "isUInt<3>";
1247}
1248def ABID : NamedIntOperand<"abid"> {
1249  let Validator = "isUInt<4>";
1250}
1251def hwreg : CustomOperand<i32, 0, "Hwreg">;
1252
1253def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
1254
1255let AlwaysPrint = 1 in {
1256def WaitVDST : NamedIntOperand<"wait_vdst"> {
1257  let Validator = "isUInt<4>";
1258}
1259def WaitEXP : NamedIntOperand<"wait_exp"> {
1260  let Validator = "isUInt<3>";
1261}
1262def WaitVAVDst : NamedIntOperand<"wait_va_vdst"> {
1263  let Validator = "isUInt<4>";
1264}
1265def WaitVMVSrc : NamedIntOperand<"wait_vm_vsrc"> {
1266  let Validator = "isUInt<1>";
1267}
1268} // End AlwaysPrint = 1
1269
1270def ByteSel : NamedIntOperand<"byte_sel"> {
1271  let Validator = "isUInt<2>";
1272}
1273
1274let PrintMethod = "printBitOp3" in
1275def BitOp3 : NamedIntOperand<"bitop3">;
1276def bitop3_0 : DefaultOperand<BitOp3, 0>;
1277
1278class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
1279  let OperandNamespace = "AMDGPU";
1280  let OperandType = "OPERAND_KIMM"#vt.Size;
1281  let PrintMethod = "printU"#vt.Size#"ImmOperand";
1282  let DecoderMethod = "decodeOperand_KImmFP";
1283}
1284
1285// 32-bit VALU immediate operand that uses the constant bus.
1286def KImmFP32 : KImmFPOperand<i32>;
1287
1288// 32-bit VALU immediate operand with a 16-bit value that uses the
1289// constant bus.
1290def KImmFP16 : KImmFPOperand<i16>;
1291
1292class FPInputModsMatchClass <int opSize> : AsmOperandClass {
1293  let Name = "RegOrImmWithFP"#opSize#"InputMods";
1294  let ParserMethod = "parseRegOrImmWithFPInputMods";
1295  let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
1296}
1297
1298class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> {
1299  let Name = "RegOrInlineImmWithFP"#opSize#"InputMods";
1300  let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods";
1301}
1302
1303def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
1304class FPT16InputModsMatchClass<bit IsFake16> : FPInputModsMatchClass<16> {
1305  let Name = !if(IsFake16, "RegOrImmWithFPFake16InputMods",
1306                 "RegOrImmWithFPT16InputMods");
1307  let PredicateMethod = "isRegOrImmWithFPT16InputMods<" #
1308                        !if(IsFake16, "true", "false") # ">";
1309}
1310def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
1311def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
1312
1313class FP16VCSrcInputModsMatchClass<bit IsFake16>
1314    : FPVCSrcInputModsMatchClass<16> {
1315  let Name = !if(IsFake16, "RegOrInlineImmWithFPFake16InputMods",
1316                 "RegOrInlineImmWithFPT16InputMods");
1317  let PredicateMethod = "isRegOrInlineImmWithFP16InputMods<" #
1318                        !if(IsFake16, "true", "false") # ">";
1319}
1320def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>;
1321
1322class InputMods <AsmOperandClass matchClass> : Operand <i32> {
1323  let OperandNamespace = "AMDGPU";
1324  let OperandType = "OPERAND_INPUT_MODS";
1325  let ParserMatchClass = matchClass;
1326}
1327
1328class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
1329  let PrintMethod = "printOperandAndFPInputMods";
1330}
1331
1332def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
1333class FPT16InputMods<bit IsFake16> : FPInputMods<FPT16InputModsMatchClass<IsFake16>> {
1334  let EncoderMethod = "getMachineOpValueT16";
1335}
1336def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
1337def FP32T16DstInputMods : FPInputMods<FP32InputModsMatchClass> {
1338  let EncoderMethod = "getMachineOpValueT16";
1339}
1340def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
1341
1342class FPT16VCSrcInputMods<bit IsFake16 = 1>
1343  : FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>> {
1344  let EncoderMethod = "getMachineOpValueT16";
1345}
1346def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>;
1347
1348class IntInputModsMatchClass <int opSize> : AsmOperandClass {
1349  let Name = "RegOrImmWithInt"#opSize#"InputMods";
1350  let ParserMethod = "parseRegOrImmWithIntInputMods";
1351  let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
1352}
1353class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> {
1354  let Name = "RegOrInlineImmWithInt"#opSize#"InputMods";
1355  let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods";
1356}
1357class IntT16InputModsMatchClass<bit IsFake16> : IntInputModsMatchClass<16> {
1358  let Name = !if(IsFake16, "RegOrImmWithIntFake16InputMods",
1359                 "RegOrImmWithIntT16InputMods");
1360  let PredicateMethod = "isRegOrImmWithIntT16InputMods<" #
1361                        !if(IsFake16, "true", "false") # ">";
1362}
1363def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
1364def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
1365def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>;
1366class IntT16VCSrcInputModsMatchClass<bit IsFake16> : IntInputModsMatchClass<16> {
1367  let Name = !if(IsFake16, "RegOrInlineImmWithIntFake16InputMods",
1368                 "RegOrInlineImmWithIntT16InputMods");
1369  let PredicateMethod = "isRegOrInlineImmWithIntT16InputMods<" #
1370                        !if(IsFake16, "true", "false") # ">";
1371}
1372
1373class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
1374  let PrintMethod = "printOperandAndIntInputMods";
1375}
1376class IntT16InputMods<bit IsFake16> : IntInputMods<IntT16InputModsMatchClass<IsFake16>> {
1377  let EncoderMethod = "getMachineOpValueT16";
1378}
1379def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
1380def Int32T16DstInputMods : IntInputMods<Int32InputModsMatchClass> {
1381  let EncoderMethod = "getMachineOpValueT16";
1382}
1383def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
1384def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>;
1385class IntT16VCSrcInputMods<bit IsFake16 = 1>
1386    : IntInputMods<IntT16VCSrcInputModsMatchClass<IsFake16>> {
1387  let EncoderMethod = "getMachineOpValueT16";
1388}
1389
1390class OpSelModsMatchClass : AsmOperandClass {
1391  let Name = "OpSelMods";
1392  let ParserMethod = "parseRegOrImm";
1393  let PredicateMethod = "isRegOrImm";
1394}
1395
1396def IntOpSelModsMatchClass : OpSelModsMatchClass;
1397def IntOpSelMods : InputMods<IntOpSelModsMatchClass>;
1398
1399class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1400  let Name = "SDWAWithFP"#opSize#"InputMods";
1401  let ParserMethod = "parseRegOrImmWithFPInputMods";
1402  let PredicateMethod = "isSDWAFP"#opSize#"Operand";
1403}
1404
1405def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>;
1406def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>;
1407
1408class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> :
1409  InputMods <matchClass> {
1410  let PrintMethod = "printOperandAndFPInputMods";
1411}
1412
1413def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>;
1414def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>;
1415
1416def FPVRegInputModsMatchClass : AsmOperandClass {
1417  let Name = "VRegWithFPInputMods";
1418  let ParserMethod = "parseRegWithFPInputMods";
1419  let PredicateMethod = "isVRegWithInputMods";
1420}
1421
1422def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> {
1423  let PrintMethod = "printOperandAndFPInputMods";
1424}
1425
1426def FPVRegT16DstInputMods : InputMods <FPVRegInputModsMatchClass> {
1427  let PrintMethod = "printOperandAndFPInputMods";
1428  let EncoderMethod = "getMachineOpValueT16";
1429}
1430
1431class FPT16_Lo128VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
1432  let Name = !if(IsFake16, "Fake16_Lo128VRegWithFPInputMods",
1433                 "T16_Lo128VRegWithFPInputMods");
1434  let ParserMethod = "parseRegWithFPInputMods";
1435  let PredicateMethod = "isT16_Lo128VRegWithInputMods<" #
1436                        !if(IsFake16, "true", "false") # ">";
1437}
1438
1439class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass {
1440  let Name = !if(IsFake16, "Fake16VRegWithFPInputMods",
1441                 "T16VRegWithFPInputMods");
1442  let ParserMethod = "parseRegWithFPInputMods";
1443  let PredicateMethod = "isT16VRegWithInputMods<" #
1444                        !if(IsFake16, "true", "false") # ">";
1445}
1446
1447class FPT16_Lo128VRegInputMods<bit IsFake16 = 1>
1448    : InputMods <FPT16_Lo128VRegInputModsMatchClass<IsFake16>> {
1449  let PrintMethod = "printOperandAndFPInputMods";
1450  let EncoderMethod = "getMachineOpValueT16Lo128";
1451}
1452
1453class FPT16VRegInputMods<bit IsFake16 = 1>
1454    : InputMods <FPT16VRegInputModsMatchClass<IsFake16>> {
1455  let PrintMethod = "printOperandAndFPInputMods";
1456  let EncoderMethod = "getMachineOpValueT16";
1457}
1458
1459class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass {
1460  let Name = "SDWAWithInt"#opSize#"InputMods";
1461  let ParserMethod = "parseRegOrImmWithIntInputMods";
1462  let PredicateMethod = "isSDWAInt"#opSize#"Operand";
1463}
1464
1465def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>;
1466def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>;
1467def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> {
1468  let Name = "SDWAWithBin32InputMods";
1469  let ParserMethod = "parseRegOrImm";
1470}
1471
1472class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> :
1473  InputMods <matchClass> {
1474  let PrintMethod = "printOperandAndIntInputMods";
1475}
1476
1477def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>;
1478def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>;
1479def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>;
1480
1481def IntVRegInputModsMatchClass : AsmOperandClass {
1482  let Name = "VRegWithIntInputMods";
1483  let ParserMethod = "parseRegWithIntInputMods";
1484  let PredicateMethod = "isVRegWithInputMods";
1485}
1486
1487class IntT16_Lo128VRegInputModsMatchClass<bit IsFake16 = 1> : AsmOperandClass {
1488  let Name = !if(IsFake16, "Fake16_Lo128VRegWithIntInputMods",
1489                 "T16_Lo128VRegWithIntInputMods");
1490  let ParserMethod = "parseRegWithIntInputMods";
1491  let PredicateMethod = "isT16_Lo128VRegWithInputMods<" #
1492                        !if(IsFake16, "true", "false") # ">";
1493}
1494
1495class IntT16VRegInputModsMatchClass<bit IsFake16 = 1> : AsmOperandClass {
1496  let Name = !if(IsFake16, "Fake16VRegWithIntInputMods",
1497                 "T16VRegWithIntInputMods");
1498  let ParserMethod = "parseRegWithIntInputMods";
1499  let PredicateMethod = "isT16VRegWithInputMods<" #
1500                        !if(IsFake16, "true", "false") # ">";
1501}
1502
1503class IntT16_Lo128VRegInputMods<bit IsFake16 = 1>
1504    : InputMods <IntT16_Lo128VRegInputModsMatchClass<IsFake16>> {
1505  let PrintMethod = "printOperandAndIntInputMods";
1506  let EncoderMethod = "getMachineOpValueT16Lo128";
1507}
1508
1509class IntT16VRegInputMods<bit IsFake16 = 1>
1510    : InputMods <IntT16VRegInputModsMatchClass<IsFake16>> {
1511  let PrintMethod = "printOperandAndIntInputMods";
1512  let EncoderMethod = "getMachineOpValueT16";
1513}
1514
1515def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
1516  let PrintMethod = "printOperandAndIntInputMods";
1517}
1518
1519def IntVRegT16DstInputMods : InputMods <IntVRegInputModsMatchClass> {
1520  let PrintMethod = "printOperandAndIntInputMods";
1521  let EncoderMethod = "getMachineOpValueT16";
1522}
1523
1524class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
1525  let Name = "PackedFP"#opSize#"InputMods";
1526  let ParserMethod = "parseRegOrImmWithFPInputMods";
1527  let PredicateMethod = "isPackedFP"#opSize#"InputMods";
1528}
1529
1530class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
1531  let Name = "PackedInt"#opSize#"InputMods";
1532  let ParserMethod = "parseRegOrImm";
1533  let PredicateMethod = "isRegOrImm";
1534//  let PredicateMethod = "isPackedInt"#opSize#"InputMods";
1535}
1536
1537def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
1538def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
1539def PackedF32InputModsMatchClass : PackedFPInputModsMatchClass<32>;
1540
1541class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
1542  let PrintMethod = "printOperandAndFPInputMods";
1543}
1544
1545class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
1546  //let PrintMethod = "printPackedIntInputMods";
1547}
1548
1549def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
1550def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
1551def PackedF32InputMods : PackedFPInputMods<PackedF32InputModsMatchClass>;
1552
1553def MFMALdScaleModifierOp : TImmLeaf<i32, [{
1554  return isUInt<2>(Imm);
1555}], MFMALdScaleXForm>;
1556
1557//===----------------------------------------------------------------------===//
1558// Complex patterns
1559//===----------------------------------------------------------------------===//
1560
1561def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">;
1562def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">;
1563def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">;
1564
1565def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">;
1566
1567def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
1568
1569// Modifiers for floating point instructions.
1570def VOP3Mods  : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
1571
1572// VOP3 modifiers used for instructions that do not read canonicalized
1573// floating point values (i.e. integer operations with FP source
1574// modifiers)
1575def VOP3ModsNonCanonicalizing : ComplexPattern<untyped, 2,
1576  "SelectVOP3ModsNonCanonicalizing">;
1577
1578def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
1579
1580def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
1581
1582def VOP3PMods  : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
1583
1584def VOP3PModsDOT  : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
1585def VOP3PModsNeg  : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">;
1586def WMMAOpSelVOP3PMods  : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
1587
1588def WMMAModsF32NegAbs  : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">;
1589def WMMAModsF16Neg  : ComplexPattern<untyped, 2, "SelectWMMAModsF16Neg">;
1590def WMMAModsF16NegAbs  : ComplexPattern<untyped, 2, "SelectWMMAModsF16NegAbs">;
1591def WMMAVISrc  : ComplexPattern<untyped, 1, "SelectWMMAVISrc">;
1592def SWMMACIndex8  : ComplexPattern<untyped, 2, "SelectSWMMACIndex8">;
1593def SWMMACIndex16  : ComplexPattern<untyped, 2, "SelectSWMMACIndex16">;
1594
1595def VOP3OpSel  : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
1596
1597def VOP3OpSelMods  : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
1598
1599def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">;
1600def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
1601
1602def VINTERPMods  : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
1603def VINTERPModsHi  : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
1604
1605//===----------------------------------------------------------------------===//
1606// SI assembler operands
1607//===----------------------------------------------------------------------===//
1608
1609def SIOperand {
1610  int ZERO = 0x80;
1611  int VCC = 0x6A;
1612  int FLAT_SCR = 0x68;
1613}
1614
1615// This should be kept in sync with SISrcMods enum
1616def SRCMODS {
1617  int NONE = 0;
1618  int NEG = 1;
1619  int ABS = 2;
1620  int NEG_ABS = 3;
1621
1622  int NEG_HI = ABS;
1623  int OP_SEL_0 = 4;
1624  int OP_SEL_1 = 8;
1625  int DST_OP_SEL = 8;
1626}
1627
1628def DSTCLAMP {
1629  int NONE = 0;
1630  int ENABLE = 1;
1631}
1632
1633def DSTOMOD {
1634  int NONE = 0;
1635}
1636
1637def HWREG {
1638  int MODE = 1;
1639  int STATUS = 2;
1640  int TRAPSTS = 3;
1641  int HW_ID = 4;
1642  int GPR_ALLOC = 5;
1643  int LDS_ALLOC = 6;
1644  int IB_STS = 7;
1645  int MEM_BASES = 15;
1646  int TBA_LO = 16;
1647  int TBA_HI = 17;
1648  int TMA_LO = 18;
1649  int TMA_HI = 19;
1650  int FLAT_SCR_LO = 20;
1651  int FLAT_SCR_HI = 21;
1652  int XNACK_MASK = 22;
1653  int POPS_PACKER = 25;
1654  int SHADER_CYCLES = 29;
1655}
1656
1657class getHwRegImm<int Reg, int Offset = 0, int Size = 32> {
1658  int ret = !and(!or(Reg,
1659                     !shl(Offset, 6),
1660                     !shl(!add(Size, -1), 11)), 65535);
1661}
1662
1663//===----------------------------------------------------------------------===//
1664//
1665// SI Instruction multiclass helpers.
1666//
1667// Instructions with _32 take 32-bit operands.
1668// Instructions with _64 take 64-bit operands.
1669//
1670// VOP_* instructions can use either a 32-bit or 64-bit encoding.  The 32-bit
1671// encoding is the standard encoding, but instruction that make use of
1672// any of the instruction modifiers must use the 64-bit encoding.
1673//
1674// Instructions with _e32 use the 32-bit encoding.
1675// Instructions with _e64 use the 64-bit encoding.
1676//
1677//===----------------------------------------------------------------------===//
1678
1679class SIMCInstr <string pseudo, int subtarget> {
1680  string PseudoInstr = pseudo;
1681  int Subtarget = subtarget;
1682}
1683
1684//===----------------------------------------------------------------------===//
1685// Vector ALU classes
1686//===----------------------------------------------------------------------===//
1687
1688class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
1689  int ret =
1690    !if (!eq(Src0.Value, untyped.Value),      0,
1691      !if (!eq(Src1.Value, untyped.Value),    1,   // VOP1
1692         !if (!eq(Src2.Value, untyped.Value), 2,   // VOP2
1693                                              3))); // VOP3
1694}
1695
1696// Returns the register class to use for the destination of VOP[123C]
1697// instructions for the given VT.
1698class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
1699  defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16,
1700                                   VOPDstOperand_t16Lo128),
1701                    VOPDstOperand<VGPR_32>);
1702  RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VOPDstOperand<VReg_1024>,
1703                              !eq(VT.Size, 512) : VOPDstOperand<VReg_512>,
1704                              !eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
1705                              !eq(VT.Size, 192) : VOPDstOperand<VReg_192>,
1706                              !eq(VT.Size, 128) : VOPDstOperand<VReg_128>,
1707                              !eq(VT.Size, 64)  : VOPDstOperand<VReg_64>,
1708                              !eq(VT.Size, 32)  : VOPDstOperand<VGPR_32>,
1709                              !eq(VT.Size, 16)  : op16,
1710                              1                 : VOPDstS64orS32); // else VT == i1
1711}
1712
1713class getVALUDstForVT_fake16<ValueType VT> {
1714  RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
1715                          !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
1716                            !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
1717                              !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>,
1718                              VOPDstS64orS32)))); // else VT == i1
1719}
1720
1721// Returns the register class to use for the destination of VOP[12C]
1722// instructions with SDWA extension
1723class getSDWADstForVT<ValueType VT> {
1724  RegisterOperand ret = !if(!eq(VT.Size, 1),
1725                            SDWAVopcDst, // VOPC
1726                            VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst
1727}
1728
1729// Returns the register class to use for source 0 of VOP[12C]
1730// instructions for the given VT.
1731class getVOPSrc0ForVT<ValueType VT, bit IsTrue16, bit IsFake16 = 1> {
1732  RegisterOperand ret =
1733  !cond(!eq(VT, i64)    : VSrc_b64,
1734        !eq(VT, f64)    : VSrc_f64,
1735        !eq(VT, i32)    : VSrc_b32,
1736        !eq(VT, f32)    : VSrc_f32,
1737        !eq(VT, i16)    : !if(IsTrue16,
1738                              !if(IsFake16, VSrcFake16_b16_Lo128, VSrcT_b16_Lo128),
1739                              VSrc_b16),
1740        !eq(VT, f16)    : !if(IsTrue16,
1741                              !if(IsFake16, VSrcFake16_f16_Lo128, VSrcT_f16_Lo128),
1742                              VSrc_f16),
1743        !eq(VT, bf16)   : !if(IsTrue16,
1744                              !if(IsFake16, VSrcFake16_bf16_Lo128, VSrcT_bf16_Lo128),
1745                              VSrc_bf16),
1746        !eq(VT, v2i16)  : VSrc_v2b16,
1747        !eq(VT, v2f16)  : VSrc_v2f16,
1748        !eq(VT, v2bf16) : VSrc_v2bf16,
1749        !eq(VT, v4f16)  : AVSrc_64,
1750        !eq(VT, v4bf16) : AVSrc_64,
1751        1               : VSrc_b32);
1752}
1753
1754class getSOPSrcForVT<ValueType VT> {
1755  RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32);
1756}
1757
1758// Returns the vreg register class to use for source operand given VT
1759class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> {
1760  RegisterOperand ret =
1761  !cond(!eq(VT.Size, 512) : RegisterOperand<VReg_512>,
1762        !eq(VT.Size, 192) : RegisterOperand<VReg_192>,
1763        !eq(VT.Size, 128) : RegisterOperand<VReg_128>,
1764        !eq(VT.Size, 96)  : RegisterOperand<VReg_96>,
1765        !eq(VT.Size, 64)  : RegisterOperand<VReg_64>,
1766        !eq(VT.Size, 48)  : RegisterOperand<VReg_64>,
1767        !eq(VT.Size, 16)  : !if(IsTrue16,
1768                                !if(IsFake16, VGPRSrc_32_Lo128, VGPRSrc_16_Lo128),
1769                                RegisterOperand<VGPR_32>),
1770        1                 : RegisterOperand<VGPR_32>);
1771}
1772
1773class getSDWASrcForVT <ValueType VT> {
1774  RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32);
1775  RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32);
1776  RegisterOperand ret = !if(VT.isFP, retFlt, retInt);
1777}
1778
1779// Returns the register class to use for sources of VOP3 instructions for the
1780// given VT.
1781class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
1782  RegisterOperand ret =
1783  !cond(!eq(VT, f64)       : VSrc_f64,
1784        !eq(VT, f32)       : VSrc_f32,
1785        !eq(VT, f16)       : !if(IsTrue16, VSrcT_f16, VSrc_f16),
1786        !eq(VT, bf16)      : !if(IsTrue16, VSrcT_bf16, VSrc_bf16),
1787        !eq(VT, i16)       : !if(IsTrue16, VSrcT_b16, VSrc_b16),
1788        !eq(VT, i1)        : SSrc_i1,
1789        !eq(VT, v2f32)     : VSrc_v2f32,
1790        !eq(VT, v2i32)     : VSrc_v2b32,
1791        !eq(VT, v2f16)     : VSrc_v2f16,
1792        !eq(VT, v2bf16)    : VSrc_v2bf16,
1793        !eq(VT, v2i16)     : VSrc_v2b16,
1794        !eq(VT, v4f16)     : AVSrc_64,
1795        !eq(VT, v4bf16)    : AVSrc_64,
1796        !eq(VT.Size, 1024) : VRegSrc_1024,
1797        !eq(VT.Size, 512)  : VRegSrc_512,
1798        !eq(VT.Size, 256)  : VRegSrc_256,
1799        !eq(VT.Size, 192)  : VRegSrc_192,
1800        !eq(VT.Size, 128)  : VRegSrc_128,
1801        !eq(VT.Size, 96)   : VRegSrc_96,
1802        !eq(VT.Size, 64)   : VSrc_b64,
1803        1                  : VSrc_b32);
1804}
1805
1806// Returns the vreg register class to use for sources of VOP3 instructions for the
1807// given VT.
1808class getVOP3VRegSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
1809   RegisterOperand ret =
1810   !cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
1811         !eq(VT.Size, 96)  : RegisterOperand<VReg_96>,
1812         !eq(VT.Size, 64)  : RegisterOperand<VReg_64>,
1813         !eq(VT.Size, 48)  : RegisterOperand<VReg_64>,
1814         !eq(VT.Size, 16)  : !if(IsTrue16,
1815                                 !if(IsFake16, RegisterOperand<VGPR_32>,
1816                                               RegisterOperand<VGPR_16>),
1817                                 RegisterOperand<VGPR_32>),
1818         1                 : RegisterOperand<VGPR_32>);
1819}
1820
1821// Src2 of VOP3 DPP instructions cannot be a literal
1822class getVOP3DPPSrcForVT<ValueType VT, bit IsFake16 = 1> {
1823  RegisterOperand ret =
1824  !cond(!eq(VT, i1)     : SSrc_i1,
1825        !eq(VT, i16)    : !if (IsFake16, VCSrc_b16, VCSrcT_b16),
1826        !eq(VT, f16)    : !if (IsFake16, VCSrc_f16, VCSrcT_f16),
1827        !eq(VT, bf16)   : !if (IsFake16, VCSrc_bf16, VCSrcT_bf16),
1828        !eq(VT, v2i16)  : VCSrc_v2b16,
1829        !eq(VT, v2f16)  : VCSrc_v2f16,
1830        !eq(VT, v2bf16) : VCSrc_v2bf16,
1831        !eq(VT, f32)    : VCSrc_f32,
1832        1               : VCSrc_b32);
1833}
1834
1835// Float or packed int
1836class isModifierType<ValueType SrcVT> {
1837  bit ret = !or(!eq(SrcVT.Value, f16.Value),
1838                !eq(SrcVT.Value, bf16.Value),
1839                !eq(SrcVT.Value, f32.Value),
1840                !eq(SrcVT.Value, f64.Value),
1841                !eq(SrcVT.Value, v2f16.Value),
1842                !eq(SrcVT.Value, v2i16.Value),
1843                !eq(SrcVT.Value, v2bf16.Value),
1844                !eq(SrcVT.Value, v2f32.Value),
1845                !eq(SrcVT.Value, v2i32.Value),
1846                !eq(SrcVT.Value, v4f16.Value),
1847                !eq(SrcVT.Value, v4i16.Value),
1848                !eq(SrcVT.Value, v4bf16.Value),
1849                !eq(SrcVT.Value, v4f32.Value),
1850                !eq(SrcVT.Value, v4i32.Value),
1851                !eq(SrcVT.Value, v8f16.Value),
1852                !eq(SrcVT.Value, v8i16.Value),
1853                !eq(SrcVT.Value, v8bf16.Value),
1854                !eq(SrcVT.Value, v8f32.Value),
1855                !eq(SrcVT.Value, v8i32.Value),
1856                !eq(SrcVT.Value, v16f16.Value),
1857                !eq(SrcVT.Value, v16i16.Value),
1858                !eq(SrcVT.Value, v16bf16.Value));
1859}
1860
1861// Return type of input modifiers operand for specified input operand.
1862// True16: If the destination is a 16-bit value, the src0 modifier must hold
1863// dst's opsel bit. Use a dummy value for DstVT if getting the mod for a src operand besides 0.
1864// 64-bit src types are not implemented for True16 dst.
1865class getSrc0Mod <ValueType VT, ValueType DstVT, bit IsTrue16 = 0, bit IsFake16 = 1> {
1866  defvar T16Dst =  !if(!eq(VT.Size, 64),
1867                     !if(VT.isFP, FP64InputMods, Int64InputMods),
1868                     !if(!eq(VT.Size, 16),
1869                         !if(VT.isFP, !if(IsTrue16, FPT16InputMods<IsFake16>, FP16InputMods),
1870                                      !if(IsTrue16, IntT16InputMods<IsFake16>, IntOpSelMods)),
1871                         !if(VT.isFP, FP32T16DstInputMods, Int32T16DstInputMods)));
1872  defvar Normal =  !if(!eq(VT.Size, 64),
1873                     !if(VT.isFP, FP64InputMods, Int64InputMods),
1874                     !if(!eq(VT.Size, 16),
1875                         !if(VT.isFP, !if(IsTrue16, FPT16InputMods<IsFake16>, FP16InputMods),
1876                                      !if(IsTrue16, IntT16InputMods<IsFake16>, IntOpSelMods)),
1877                         !if(VT.isFP, FP32InputMods, Int32InputMods)));
1878  Operand ret = !if(!and(IsTrue16, !eq(DstVT.Size, 16)), T16Dst, Normal);
1879}
1880
1881class getSrcMod<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 1> : getSrc0Mod<VT, f128/*Dummy Arg*/, IsTrue16, IsFake16>;
1882
1883// Return type of input modifiers operand specified input operand for DPP
1884class getSrcModDPP <ValueType VT> {
1885  Operand ret = !if(VT.isFP, FPVRegInputMods, IntVRegInputMods);
1886}
1887
1888class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> {
1889  Operand ret =
1890      !if (VT.isFP,
1891           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1892                FPT16_Lo128VRegInputMods<IsFake16>, FPVRegInputMods),
1893           !if (!eq(VT.Value, i16.Value),
1894                IntT16_Lo128VRegInputMods<IsFake16>, IntVRegInputMods));
1895}
1896
1897// Return type of input modifiers operand for specified input operand for DPP
1898// True16: If the destination is a 16-bit value, the src0 modifier must hold
1899// dst's opsel bit. Use a dummy value for DstVT if getting the mod for a src operand besides 0.
1900// 64-bit src types are not implemented for True16 dst.
1901class getSrc0ModVOP3DPP <ValueType VT, ValueType DstVT, bit IsFake16 = 1> {
1902  defvar T16Dst =
1903      !if (VT.isFP,
1904           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1905                FPT16VRegInputMods<IsFake16>, FPVRegT16DstInputMods),
1906           !if (!eq(VT.Value, i16.Value), IntT16VRegInputMods<IsFake16>,
1907                IntVRegT16DstInputMods));
1908  defvar Normal =
1909      !if (VT.isFP,
1910           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1911                FPT16VRegInputMods<IsFake16>, FPVRegInputMods),
1912           !if (!eq(VT.Value, i16.Value),
1913                IntT16VRegInputMods<IsFake16>,
1914                IntVRegInputMods));
1915  Operand ret = !if(!and(!not(IsFake16), !eq(DstVT.Size, 16)), T16Dst, Normal);
1916}
1917
1918// GFX11 only supports VGPR src1, but the restriction is done in AsmParser
1919// and GCNDPPCombine.
1920class getSrcModVOP3DPP<ValueType VT, bit IsFake16 = 1> {
1921  Operand ret =
1922      !if (VT.isFP,
1923           !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)),
1924                FPT16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods),
1925           !if (!eq(VT.Value, i16.Value),
1926                IntT16VCSrcInputMods<IsFake16>,
1927                Int32VCSrcInputMods));
1928}
1929
1930// Return type of input modifiers operand specified input operand for SDWA
1931class getSrcModSDWA <ValueType VT> {
1932  Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods,
1933                !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods,
1934                !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods,
1935                !if(!eq(VT.Value, bf16.Value), FP16SDWAInputMods,
1936                Int32SDWAInputMods))));
1937}
1938
1939// Returns the input arguments for VOP[12C] instructions for the given SrcVT.
1940class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> {
1941  dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0),               // VOP1
1942            !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2
1943                                    (ins)));
1944}
1945
1946// Returns the input arguments for VOP3 instructions for the given SrcVT.
1947class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
1948                RegisterOperand Src2RC, int NumSrcArgs,
1949                bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
1950                Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
1951
1952  dag ret =
1953    !if (!eq(NumSrcArgs, 0),
1954      // VOP1 without input operands (V_NOP, V_CLREXCP)
1955      (ins),
1956      /* else */
1957    !if (!eq(NumSrcArgs, 1),
1958      !if (HasModifiers,
1959        // VOP1 with modifiers
1960        !if(HasOMod,
1961          (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1962               Clamp0:$clamp, omod0:$omod),
1963          !if (HasClamp,
1964            (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Clamp0:$clamp),
1965            (ins Src0Mod:$src0_modifiers, Src0RC:$src0)))
1966      /* else */,
1967        // VOP1 without modifiers
1968        !if(HasOMod,
1969          (ins Src0RC:$src0, Clamp0:$clamp, omod0:$omod),
1970          !if (HasClamp,
1971            (ins Src0RC:$src0, Clamp0:$clamp),
1972            (ins Src0RC:$src0)))
1973      /* endif */ ),
1974    !if (!eq(NumSrcArgs, 2),
1975      !if (HasModifiers,
1976        // VOP 2 with modifiers
1977        !if(HasOMod,
1978          (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1979               Src1Mod:$src1_modifiers, Src1RC:$src1,
1980               Clamp0:$clamp, omod0:$omod),
1981          !con((ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1982                    Src1Mod:$src1_modifiers, Src1RC:$src1),
1983                !if(HasClamp, (ins Clamp0:$clamp), (ins))))
1984      /* else */,
1985        // VOP2 without modifiers
1986        !if (HasClamp,
1987          (ins Src0RC:$src0, Src1RC:$src1, Clamp0:$clamp),
1988          (ins Src0RC:$src0, Src1RC:$src1))
1989
1990      /* endif */ )
1991    /* NumSrcArgs == 3 */,
1992      !if (HasModifiers,
1993        !if (HasSrc2Mods,
1994          // VOP3 with modifiers
1995          !if (HasOMod,
1996            (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
1997                 Src1Mod:$src1_modifiers, Src1RC:$src1,
1998                 Src2Mod:$src2_modifiers, Src2RC:$src2,
1999                 Clamp0:$clamp, omod0:$omod),
2000            !if (HasClamp,
2001              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2002                   Src1Mod:$src1_modifiers, Src1RC:$src1,
2003                   Src2Mod:$src2_modifiers, Src2RC:$src2,
2004                   Clamp0:$clamp),
2005              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2006                   Src1Mod:$src1_modifiers, Src1RC:$src1,
2007                   Src2Mod:$src2_modifiers, Src2RC:$src2))),
2008          // VOP3 with modifiers except src2
2009          !if (HasOMod,
2010            (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2011                 Src1Mod:$src1_modifiers, Src1RC:$src1,
2012                 Src2RC:$src2, Clamp0:$clamp, omod0:$omod),
2013            !if (HasClamp,
2014              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2015                   Src1Mod:$src1_modifiers, Src1RC:$src1,
2016                   Src2RC:$src2, Clamp0:$clamp),
2017              (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2018                   Src1Mod:$src1_modifiers, Src1RC:$src1,
2019                   Src2RC:$src2))))
2020      /* else */,
2021        // VOP3 without modifiers
2022        !if (HasClamp,
2023          (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, Clamp0:$clamp),
2024          (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2))
2025      /* endif */ ))));
2026}
2027
2028class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
2029                RegisterOperand Src2RC, int NumSrcArgs,
2030                bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
2031                Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> {
2032  // getInst64 handles clamp and omod. implicit mutex between vop3p and omod
2033  dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs,
2034                HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
2035                Src0Mod, Src1Mod, Src2Mod>.ret;
2036  dag opsel = (ins op_sel0:$op_sel);
2037  dag ret = !con(base, !if(HasOpSel, opsel, (ins)));
2038}
2039
2040class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
2041                   RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
2042                   bit HasNeg,
2043                   Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
2044  dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
2045                    HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
2046                    0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret;
2047
2048  dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
2049  dag vop3p_neg = !if(HasNeg, (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi), (ins));
2050
2051  dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg);
2052  dag ret = !con(base, vop3pFields);
2053}
2054
2055class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
2056                       RegisterOperand Src2RC, int NumSrcArgs,
2057                       bit HasClamp, bit HasOMod,
2058                       Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
2059  dag ret = getInsVOP3Base<Src0RC, Src1RC,
2060                    Src2RC, NumSrcArgs,
2061                    HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod,
2062                    Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret;
2063}
2064
2065class getInsDPPBase <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
2066                     RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
2067                     Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> {
2068  dag ret = !if(!eq(NumSrcArgs, 0),
2069                // VOP1 without input operands (V_NOP)
2070                (ins ),
2071                !con(
2072                  !if(HasOld ,(ins OldRC:$old), (ins)),
2073                  !if (!eq(NumSrcArgs, 1),
2074                    !if (HasModifiers,
2075                      // VOP1_DPP with modifiers
2076                      (ins Src0Mod:$src0_modifiers, Src0RC:$src0)
2077                    /* else */,
2078                      // VOP1_DPP without modifiers
2079                      (ins Src0RC:$src0)
2080                    /* endif */),
2081                  !if (!eq(NumSrcArgs, 2),
2082                    !if (HasModifiers,
2083                      // VOP2_DPP with modifiers
2084                      (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2085                       Src1Mod:$src1_modifiers, Src1RC:$src1)
2086                    /* else */,
2087                      // VOP2_DPP without modifiers
2088                      (ins Src0RC:$src0, Src1RC:$src1)
2089                    )
2090                    /* NumSrcArgs == 3, VOP3 */,
2091                    !if (HasModifiers,
2092                      // VOP3_DPP with modifiers
2093                      (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2094                       Src1Mod:$src1_modifiers, Src1RC:$src1,
2095                       Src2Mod:$src2_modifiers, Src2RC:$src2)
2096                    /* else */,
2097                      // VOP3_DPP without modifiers
2098                      (ins Src0RC:$src0, Src1RC:$src1,
2099                       Src2RC:$src2)
2100                      )
2101                    )
2102                  )
2103                )
2104            );
2105}
2106
2107class getInsDPP <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
2108                 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
2109                 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
2110  dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
2111                           HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
2112                 (ins dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
2113                      DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl));
2114}
2115
2116class getInsDPP16 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
2117                   RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
2118                   Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
2119  dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
2120                           HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
2121                 (ins Dpp16FI:$fi));
2122}
2123
2124class getInsDPP8 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
2125                  RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers,
2126                  Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> {
2127  dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs,
2128                           HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret,
2129                 (ins dpp8:$dpp8, Dpp8FI:$fi));
2130}
2131
2132class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> {
2133  dag old = ( ins OldRC:$old );
2134  dag base = VOP3Base;
2135  dag ret =  !con(
2136                !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)),
2137                base
2138              );
2139}
2140
2141class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
2142  dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
2143                 (ins dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask,
2144                      DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl));
2145}
2146
2147class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
2148  dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
2149                 (ins Dpp16FI:$fi));
2150}
2151
2152class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> {
2153  dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret,
2154                 (ins dpp8:$dpp8, Dpp8FI:$fi));
2155}
2156
2157// Ins for SDWA
2158class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs,
2159                  bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod,
2160                  ValueType DstVT> {
2161
2162  dag ret = !if(!eq(NumSrcArgs, 0),
2163               // VOP1 without input operands (V_NOP)
2164               (ins),
2165            !if(!eq(NumSrcArgs, 1),
2166               // VOP1
2167               !if(!not(HasSDWAOMod),
2168                  // VOP1_SDWA without omod
2169                  (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2170                       Clamp:$clamp,
2171                       dst_sel:$dst_sel, dst_unused:$dst_unused,
2172                       src0_sel:$src0_sel),
2173                  // VOP1_SDWA with omod
2174                  (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2175                       Clamp:$clamp, omod:$omod,
2176                       dst_sel:$dst_sel, dst_unused:$dst_unused,
2177                       src0_sel:$src0_sel)),
2178            !if(!eq(NumSrcArgs, 2),
2179               !if(!eq(DstVT.Size, 1),
2180                  // VOPC_SDWA
2181                  (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2182                       Src1Mod:$src1_modifiers, Src1RC:$src1,
2183                       Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
2184                  // VOP2_SDWA
2185                  !if(!not(HasSDWAOMod),
2186                     // VOP2_SDWA without omod
2187                     (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2188                          Src1Mod:$src1_modifiers, Src1RC:$src1,
2189                          Clamp:$clamp,
2190                          dst_sel:$dst_sel, dst_unused:$dst_unused,
2191                          src0_sel:$src0_sel, src1_sel:$src1_sel),
2192                     // VOP2_SDWA with omod
2193                     (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
2194                          Src1Mod:$src1_modifiers, Src1RC:$src1,
2195                          Clamp:$clamp, omod:$omod,
2196                          dst_sel:$dst_sel, dst_unused:$dst_unused,
2197                          src0_sel:$src0_sel, src1_sel:$src1_sel))),
2198            (ins)/* endif */)));
2199}
2200
2201// Outs for DPP
2202class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> {
2203  dag ret = !if(HasDst,
2204                !if(!eq(DstVT.Size, 1),
2205                    (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions
2206                    (outs DstRCDPP:$vdst)),
2207                (outs)); // V_NOP
2208}
2209
2210// Outs for SDWA
2211class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> {
2212  dag ret = !if(HasDst,
2213                !if(!eq(DstVT.Size, 1),
2214                    (outs DstRCSDWA:$sdst),
2215                    (outs DstRCSDWA:$vdst)),
2216                (outs)); // V_NOP
2217}
2218
2219// Returns the assembly string for the inputs and outputs of a VOP[12C]
2220// instruction.
2221class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
2222  string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
2223  string src0 = ", $src0";
2224  string src1 = ", $src1";
2225  string src2 = ", $src2";
2226  string ret = !if(HasDst, dst, "") #
2227               !if(!eq(NumSrcArgs, 1), src0, "") #
2228               !if(!eq(NumSrcArgs, 2), src0#src1, "") #
2229               !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
2230}
2231
2232class getAsmVOPDPart <int NumSrcArgs, string XorY> {
2233  string dst = "$vdst" # XorY;
2234  string src0 = ", $src0" # XorY;
2235  string src1 = ", $vsrc1" # XorY;
2236  string ret = dst #
2237               !if(!ge(NumSrcArgs, 1), src0, "") #
2238               !if(!ge(NumSrcArgs, 2), src1, "");
2239}
2240
2241// Returns the assembly string for the inputs and outputs of a VOP3P
2242// instruction.
2243class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasNeg,
2244                   bit HasClamp, bit HasOpSel> {
2245  string dst = !if(HasDst, "$vdst"# !if(!gt(NumSrcArgs, 0), ",", ""), "");
2246  string src0 = !if(!eq(NumSrcArgs, 1), " $src0", " $src0,");
2247  string src1 = !if(!eq(NumSrcArgs, 1), "",
2248                   !if(!eq(NumSrcArgs, 2), " $src1",
2249                                           " $src1,"));
2250  string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
2251
2252  string mods = !if(HasNeg, "$neg_lo$neg_hi", "");
2253  string clamp = !if(HasClamp, "$clamp", "");
2254  string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", "");
2255
2256  // Each modifier is printed as an array of bits for each operand, so
2257  // all operands are printed as part of src0_modifiers.
2258  string ret = dst#src0#src1#src2#opsel#mods#clamp;
2259}
2260
2261// FIXME-TRUE16 AsmVOP3OpSel will be deprecated after all
2262// VOP3 16 bit instructions are replaced to true16 format
2263class getAsmVOP3OpSel <int NumSrcArgs,
2264                       bit HasClamp,
2265                       bit HasOMod,
2266                       bit Src0HasMods,
2267                       bit Src1HasMods,
2268                       bit Src2HasMods> {
2269  string dst = "$vdst";
2270
2271  string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2272  string isrc1 = !if(!eq(NumSrcArgs, 1), "",
2273                     !if(!eq(NumSrcArgs, 2), " $src1",
2274                                             " $src1,"));
2275  string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
2276
2277  string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2278  string fsrc1 = !if(!eq(NumSrcArgs, 1), "",
2279                     !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2280                                             " $src1_modifiers,"));
2281  string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
2282
2283  string src0 = !if(Src0HasMods, fsrc0, isrc0);
2284  string src1 = !if(Src1HasMods, fsrc1, isrc1);
2285  string src2 = !if(Src2HasMods, fsrc2, isrc2);
2286
2287  string clamp = !if(HasClamp, "$clamp", "");
2288  string omod = !if(HasOMod, "$omod", "");
2289  string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod;
2290}
2291
2292class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2293  string dst = !if(HasDst,
2294                   !if(!eq(DstVT.Size, 1),
2295                       "$sdst",
2296                       "$vdst"),
2297                    ""); // use $sdst for VOPC
2298  string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2299  string src1 = !if(!eq(NumSrcArgs, 1), "",
2300                   !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2301                                           " $src1_modifiers,"));
2302  string args = !if(!not(HasModifiers),
2303                     getAsm32<0, NumSrcArgs, DstVT>.ret,
2304                     ", "#src0#src1);
2305  string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
2306}
2307
2308class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
2309  string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi";
2310}
2311
2312class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32>
2313  : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{
2314  let ret = dst#args#" $dpp8$fi";
2315}
2316
2317class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp,
2318                       bit HasOpSel, bit HasOMod, bit IsVOP3P,
2319                       bit HasNeg, bit Src0HasMods,
2320                       bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32,
2321                       bit HasByteSel = 0> {
2322  string dst = !if(HasDst,
2323                   !if(!eq(DstVT.Size, 1),
2324                       "$sdst",
2325                       "$vdst"),
2326                    ""); // use $sdst for VOPC
2327  string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
2328  string src1nomods = !if(!eq(NumSrcArgs, 1), "",
2329                    !if(!eq(NumSrcArgs, 2), " $src1",
2330                                            " $src1,"));
2331  string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", "");
2332
2333  string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
2334  string src1mods = !if(!eq(NumSrcArgs, 1), "",
2335                    !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
2336                                            " $src1_modifiers,"));
2337  string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
2338
2339  string src0 = !if(Src0HasMods, src0mods, src0nomods);
2340  string src1 = !if(Src1HasMods, src1mods, src1nomods);
2341  string src2 = !if(Src2HasMods, src2mods, src2nomods);
2342  string opsel = !if(HasOpSel, "$op_sel", "");
2343  string bytesel = !if(HasByteSel, "$byte_sel", "");
2344  string 3PMods = !if(IsVOP3P,
2345                      !if(HasOpSel, "$op_sel_hi", "")
2346                        #!if(HasNeg, "$neg_lo$neg_hi", ""),
2347                      "");
2348  string clamp = !if(HasClamp, "$clamp", "");
2349  string omod = !if(HasOMod, "$omod", "");
2350
2351  string ret = dst#!if(!eq(NumSrcArgs,0),
2352                       "",
2353                       !if(HasDst,", ", "")#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod);
2354}
2355
2356class getAsmVOP3DPP<string base> {
2357  string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
2358}
2359
2360class getAsmVOP3DPP16<string base> {
2361  string ret = getAsmVOP3DPP<base>.ret # "$fi";
2362}
2363
2364class getAsmVOP3DPP8<string base> {
2365  string ret = base # " $dpp8$fi";
2366}
2367
2368
2369class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
2370  string dst = !if(HasDst,
2371                   !if(!eq(DstVT.Size, 1),
2372                       " vcc", // use vcc token as dst for VOPC instructions
2373                       "$vdst"),
2374                    "");
2375  string src0 = "$src0_modifiers";
2376  string src1 = "$src1_modifiers";
2377  string args = !if(!eq(NumSrcArgs, 0),
2378                    "",
2379                    !if(!eq(NumSrcArgs, 1),
2380                        ", "#src0#"$clamp",
2381                        ", "#src0#", "#src1#"$clamp"
2382                     )
2383                );
2384  string sdwa = !if(!eq(NumSrcArgs, 0),
2385                    "",
2386                    !if(!eq(NumSrcArgs, 1),
2387                        " $dst_sel $dst_unused $src0_sel",
2388                        !if(!eq(DstVT.Size, 1),
2389                            " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC
2390                            " $dst_sel $dst_unused $src0_sel $src1_sel"
2391                        )
2392                    )
2393                );
2394  string ret = dst#args#sdwa;
2395}
2396
2397class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs,
2398                   ValueType DstVT = i32> {
2399  string dst = !if(HasDst,
2400                   !if(!eq(DstVT.Size, 1),
2401                       "$sdst", // VOPC
2402                       "$vdst"), // VOP1/2
2403                    "");
2404  string src0 = "$src0_modifiers";
2405  string src1 = "$src1_modifiers";
2406  string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod");
2407  string args = !if(!eq(NumSrcArgs, 0), "",
2408                    !if(!eq(NumSrcArgs, 1),
2409                        ", "#src0,
2410                        ", "#src0#", "#src1
2411                     )
2412                );
2413  string sdwa = !if(!eq(NumSrcArgs, 0), "",
2414                    !if(!eq(NumSrcArgs, 1),
2415                        out_mods#" $dst_sel $dst_unused $src0_sel",
2416                        !if(!eq(DstVT.Size, 1),
2417                            " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC
2418                            out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel"
2419                        )
2420                    )
2421                );
2422  string ret = dst#args#sdwa;
2423}
2424
2425class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT,
2426                      ValueType Src1VT> {
2427  bit ret = !if(!eq(NumSrcArgs, 3),
2428                0,
2429                !if(!eq(DstVT.Size, 64),
2430                    1,
2431                    !if(!eq(Src0VT.Size, 64),
2432                        1,
2433                        !if(!eq(Src1VT.Size, 64),
2434                            1,
2435                            0
2436                        )
2437                    )
2438                )
2439            );
2440}
2441
2442class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2443                  ValueType Src1VT = i32> {
2444  bit ret = !if(!eq(NumSrcArgs, 3),
2445                0, // NumSrcArgs == 3 - No SDWA for VOP3
2446                !if(!eq(DstVT.Size, 64),
2447                    0, // 64-bit dst - No SDWA for 64-bit operands
2448                    !if(!eq(Src0VT.Size, 64),
2449                        0, // 64-bit src0
2450                        !if(!eq(Src1VT.Size, 64),
2451                            0, // 64-bit src2
2452                            1
2453                        )
2454                    )
2455                )
2456            );
2457}
2458
2459class getHasDPP <int NumSrcArgs> {
2460  bit ret = !if(!eq(NumSrcArgs, 3),
2461                0, // NumSrcArgs == 3 - No DPP for VOP3
2462                1);
2463}
2464
2465class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2466                 ValueType Src1VT = i32> {
2467  bit ret = !and(getHasDPP<NumSrcArgs>.ret,
2468                 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret));
2469}
2470
2471class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2472                 ValueType Src1VT = i32> {
2473  bit ret = !and(getHasDPP<NumSrcArgs>.ret,
2474                 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2475}
2476
2477// Function that checks if instruction supports DPP and SDWA
2478class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
2479                 ValueType Src1VT = i32> {
2480  bit ret = !or(getHasDPP<NumSrcArgs>.ret,
2481                getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
2482}
2483
2484// Return an AGPR+VGPR operand class for the given VGPR register class.
2485class getLdStRegisterOperand<RegisterClass RC> {
2486  RegisterOperand ret =
2487    !cond(!eq(RC.Size, 32)   : AVLdSt_32,
2488          !eq(RC.Size, 64)   : AVLdSt_64,
2489          !eq(RC.Size, 96)   : AVLdSt_96,
2490          !eq(RC.Size, 128)  : AVLdSt_128,
2491          !eq(RC.Size, 160)  : AVLdSt_160,
2492          !eq(RC.Size, 1024) : AVLdSt_1024);
2493}
2494
2495class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,
2496                 ValueType Src1VT = i32, ValueType Src2VT = i32> {
2497  bit ret =    !if(!eq(DstVT.Size, 64),
2498                    0, // 64-bit dst No DPP for 64-bit operands
2499                    !if(!eq(Src0VT.Size, 64),
2500                        0, // 64-bit src0
2501                        !if(!eq(Src1VT.Size, 64),
2502                            0, // 64-bit src1
2503                            !if(!eq(Src2VT.Size, 64),
2504                                0, // 64-bit src2
2505                                1
2506                            )
2507                        )
2508                    )
2509                );
2510}
2511
2512
2513def PatGenMode {
2514  int NoPattern = 0;
2515  int Pattern   = 1;
2516}
2517
2518class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
2519
2520  field list<ValueType> ArgVT = _ArgVT;
2521  field bit EnableClamp = _EnableClamp;
2522  field bit IsTrue16 = 0;
2523  field bit IsRealTrue16 = 0;
2524
2525  field ValueType DstVT = ArgVT[0];
2526  field ValueType Src0VT = ArgVT[1];
2527  field ValueType Src1VT = ArgVT[2];
2528  field ValueType Src2VT = ArgVT[3];
2529  field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
2530  field RegisterOperand DstRCDPP = DstRC;
2531  field RegisterOperand DstRC64 = DstRC;
2532  field RegisterOperand DstRCVOP3DPP = DstRC64;
2533  field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret;
2534  field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT, IsTrue16>.ret;
2535  field RegisterOperand Src1RC32 = getVregSrcForVT<Src1VT>.ret;
2536  field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
2537  field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
2538  field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
2539  field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret;
2540  field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret;
2541  field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret;
2542  field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
2543  field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
2544  field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
2545  field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
2546  field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
2547  field Operand Src0Mod = getSrc0Mod<Src0VT, DstVT>.ret;
2548  field Operand Src1Mod = getSrcMod<Src1VT>.ret;
2549  field Operand Src2Mod = getSrcMod<Src2VT>.ret;
2550  field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret;
2551  field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret;
2552  field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret;
2553  field Operand Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT>.ret;
2554  field Operand Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT>.ret;
2555  field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret;
2556  field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
2557  field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
2558
2559
2560  field bit IsMAI = 0;
2561  field bit IsVOP3P = 0;
2562  field bit IsDOT = 0;
2563  field bit IsSingle = 0;
2564  field bit IsWMMA = 0;
2565  field bit IsSWMMAC = 0;
2566
2567  field bit IsFP8SrcByteSel = 0;
2568  field bit IsFP8DstByteSel = 0;
2569  field bit HasFP8DstByteSel = 0;
2570  field bit HasFP4DstByteSel = 0;
2571  field bit IsFP8ByteSel = !or(IsFP8SrcByteSel, IsFP8DstByteSel);
2572
2573  field bit HasDst = !ne(DstVT.Value, untyped.Value);
2574  field bit HasDst32 = HasDst;
2575  field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
2576  field bit EmitDstSel = EmitDst;
2577  field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
2578  field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value);
2579  field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value);
2580  field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value);
2581
2582  field bit HasSrc0FloatMods = Src0VT.isFP;
2583  field bit HasSrc1FloatMods = Src1VT.isFP;
2584  field bit HasSrc2FloatMods = Src2VT.isFP;
2585
2586  field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
2587  field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
2588  field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
2589
2590  field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp);
2591  field bit HasSDWAClamp = EmitDst;
2592  field bit HasFPClamp = !and(DstVT.isFP, HasClamp);
2593  field bit HasIntClamp = !if(DstVT.isFP, 0, HasClamp);
2594  field bit HasClampLo = HasClamp;
2595  field bit HasClampHi = !and(DstVT.isVector, HasClamp);
2596  field bit HasHigh = 0;
2597
2598  field bit IsPacked = Src0VT.isVector;
2599  field bit HasOpSel = IsPacked;
2600  field bit HasOMod = !if(IsVOP3P, 0, DstVT.isFP);
2601  field bit HasSDWAOMod = DstVT.isFP;
2602
2603  field bit HasModifiers = !or(isModifierType<Src0VT>.ret,
2604                               isModifierType<Src1VT>.ret,
2605                               isModifierType<Src2VT>.ret,
2606                               HasOMod);
2607  field bit HasNeg = HasModifiers;
2608
2609  field bit HasSrc0Mods = HasModifiers;
2610  field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0);
2611  field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0);
2612
2613  field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2614  field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret;
2615  field bit HasExtDPP = !or(getHasDPP<NumSrcArgs>.ret, HasExtVOP3DPP);
2616  field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2617  field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2618  field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
2619  field bit HasExtSDWA9 = HasExtSDWA;
2620  field int NeedPatGen = PatGenMode.NoPattern;
2621
2622  field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
2623  field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
2624  field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
2625
2626  field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
2627
2628  // VOP3b instructions are a special case with a second explicit
2629  // output. This is manually overridden for them.
2630  field dag Outs32 = Outs;
2631  field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs));
2632  field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret;
2633  field dag OutsDPP8 = OutsDPP;
2634  field dag OutsVOP3DPP = getOutsDPP<HasDst, DstVT, DstRCVOP3DPP>.ret;
2635  field dag OutsVOP3DPP8 = OutsVOP3DPP;
2636  field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret;
2637
2638  field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
2639  field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
2640                             HasClamp, HasModifiers, HasSrc2Mods,
2641                             HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
2642  field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
2643                                   NumSrcArgs, HasClamp, HasOpSel, HasNeg,
2644                                   Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
2645  field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
2646                                NumSrcArgs, HasClamp, HasOMod,
2647                                Src0Mod, Src1Mod, Src2Mod>.ret;
2648  field dag InsDPP = !if(HasExtDPP,
2649                         getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
2650                                   HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret,
2651                         (ins));
2652  field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
2653                                   HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
2654  field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP,
2655                                 NumSrcArgs, HasModifiers,
2656                                 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
2657  defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
2658                  Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
2659                  Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
2660  defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
2661                  Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, HasNeg,
2662                  Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
2663
2664  field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase);
2665
2666  field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2667  field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2668  field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
2669  field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
2670                                 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
2671                                 DstVT>.ret;
2672  field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X);
2673  // It is a slight misnomer to use the deferred f32 operand type for non-float
2674  // operands, but this operand type will only be used if the other dual
2675  // component is FMAAK or FMAMK
2676  field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X);
2677  field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y);
2678  field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y);
2679
2680
2681  field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
2682  field string AsmDPP = !if(HasExtDPP,
2683                            getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, "");
2684  field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
2685  // DPP8 encoding has no fields for modifiers, and it is enforced by setting
2686  // the asm operand name via this HasModifiers flag
2687  field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret;
2688  field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp,
2689   HasOpSel, HasOMod, IsVOP3P, HasNeg, HasModifiers, HasModifiers,
2690   HasModifiers, DstVT, IsFP8ByteSel>.ret;
2691  field string Asm64 = AsmVOP3Base;
2692  field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasNeg, HasClamp, HasOpSel>.ret;
2693  field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
2694                                              HasClamp,
2695                                              HasOMod,
2696                                              HasSrc0FloatMods,
2697                                              HasSrc1FloatMods,
2698                                              HasSrc2FloatMods>.ret;
2699  field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret;
2700  field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret;
2701  field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret;
2702  field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret;
2703  field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret;
2704  field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret;
2705  field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret;
2706  field string TieRegDPP = "$old";
2707  field bit IsSMFMAC = false;
2708  field bit HasAbid = !and(IsMAI, HasSrc1);
2709}
2710
2711  class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
2712  let HasExt = 0;
2713  let HasExtDPP = 0;
2714  let HasExtVOP3DPP = 0;
2715  let HasExt32BitDPP = 0;
2716  let HasExt64BitDPP = 0;
2717  let HasExtSDWA = 0;
2718  let HasExtSDWA9 = 0;
2719}
2720
2721class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> {
2722  let NeedPatGen = mode;
2723}
2724
2725// VOPC_Profile_t16, VOPC_NoSdst_Profile_t16, VOPC_Class_Profile_t16,
2726// VOPC_Class_NoSdst_Profile_t16, and  VOP_MAC_F16_t16 do not inherit from this
2727// class, so copy changes to this class in those profiles
2728class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
2729  let IsTrue16 = 1;
2730  let IsRealTrue16 = 1;
2731
2732  let HasOpSel = 1;
2733  let HasModifiers = 1; // All instructions at least have OpSel.
2734
2735  // Most DstVT are 16-bit, but not all.
2736  let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret;
2737  let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2738  let Src1RC32 = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2739  let Src0DPP = getVregSrcForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2740  let Src1DPP = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2741  let Src2DPP = getVregSrcForVT<Src2VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret;
2742  let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret;
2743  let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret;
2744  let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret;
2745  let Src0VOP3DPP = !if (!eq(Src0VT.Size, 16), VGPRSrc_16, VGPRSrc_32);
2746  let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 0 /*IsFake16*/>.ret;
2747  let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 0 /*IsFake16*/>.ret;
2748  let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 0/*IsFake16*/>.ret;
2749  let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0 /*IsFake16*/>.ret;
2750  let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0 /*IsFake16*/>.ret;
2751
2752  let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret;
2753  let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret;
2754  let Src1RC64 = getVOP3SrcForVT<Src1VT, 1 /*IsTrue16*/>.ret;
2755  let Src2RC64 = getVOP3SrcForVT<Src2VT, 1 /*IsTrue16*/>.ret;
2756  let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 0/*IsFake16*/>.ret;
2757  let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/, 0/*IsFake16*/>.ret;
2758  let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/, 0/*IsFake16*/>.ret;
2759}
2760
2761class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
2762  let IsTrue16 = 1;
2763  // Most DstVT are 16-bit, but not all
2764  let DstRC = getVALUDstForVT_fake16<DstVT>.ret;
2765  let DstRC64 = getVALUDstForVT<DstVT>.ret;
2766  let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2767  let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2768  let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2769  let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2770  let Src0ModDPP = getSrcModDPP_t16<Src0VT, 1/*IsFake16*/>.ret;
2771  let Src1ModDPP = getSrcModDPP_t16<Src1VT, 1/*IsFake16*/>.ret;
2772  let Src2ModDPP = getSrcModDPP_t16<Src2VT, 1/*IsFake16*/>.ret;
2773  let Src0Mod = getSrc0Mod<Src0VT, DstVT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret;
2774  let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/, 1/*IsFake16*/>.ret;
2775  let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/, 1/*IsFake16*/>.ret;
2776  let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT, 1 /*IsFake16*/>.ret;
2777  let Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT, 1 /*IsFake16*/>.ret;
2778  let Src0ModVOP3DPP = getSrc0ModVOP3DPP<Src0VT, DstVT, 1/*IsFake16*/>.ret;
2779  let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret;
2780  let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 1/*IsFake16*/>.ret;
2781}
2782
2783def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>;
2784def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
2785def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
2786def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>;
2787
2788def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
2789def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
2790def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
2791def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
2792def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>;
2793
2794def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
2795def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
2796
2797def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>;
2798def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>;
2799def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>;
2800
2801def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
2802def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
2803def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
2804
2805def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
2806def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
2807def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>;
2808def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>;
2809
2810def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>;
2811def VOP_BF16_V2BF16_V2BF16_BF16: VOPProfile <[bf16, v2bf16, v2bf16, bf16]>;
2812def VOP_F32_V2BF16_V2BF16_F32 : VOPProfile <[f32, v2bf16, v2bf16, f32]>;
2813
2814def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>;
2815
2816def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
2817
2818def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
2819def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
2820def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
2821def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>;
2822def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>;
2823def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
2824def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
2825def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
2826def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
2827def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
2828def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
2829def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>;
2830def VOP_F32_BF16 : VOPProfile <[f32, bf16, untyped, untyped]>;
2831
2832def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
2833def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
2834def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
2835def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
2836def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
2837def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
2838def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
2839def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
2840def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>;
2841def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
2842def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
2843def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>;
2844def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>;
2845def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>;
2846def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>;
2847def VOP_V6I32_V32F16_F32 : VOPProfile<[v6i32, v32f16, f32, untyped]>;
2848def VOP_V6I32_V32BF16_F32 : VOPProfile<[v6i32, v32bf16, f32, untyped]>;
2849def VOP_V6I32_V16F32_V16F32_F32 : VOPProfile<[v6i32, v16f32, v16f32, f32]>;
2850def VOP_V2F16_I32_F32 : VOPProfile<[v2f16, i32, f32, untyped]>;
2851def VOP_V2I16_F32_F32_F32 : VOPProfile<[v2i16, f32, f32, f32]>;
2852def VOP_V2I16_V2F16_F32 : VOPProfile<[v2i16, v2f16, f32, untyped]>;
2853def VOP_V2I16_V2BF16_F32 : VOPProfile<[v2i16, v2bf16, f32, untyped]>;
2854def VOP_I32_F32_F32_F32 : VOPProfile<[i32, f32, f32, f32]>;
2855def VOP_I32_V2F16_F32_F32 : VOPProfile<[i32, v2f16, f32, f32]>;
2856def VOP_I32_V2BF16_F32_F32: VOPProfile<[i32, v2bf16, f32, f32]>;
2857def VOP_BF16_F32_I32 : VOPProfile<[bf16, f32, i32, untyped]>;
2858def VOP_F16_F32_I32 : VOPProfile<[f16, f32, i32, untyped]>;
2859def VOP_I32_BF16_I32_F32 : VOPProfile<[i32, bf16, i32, f32]>;
2860def VOP_I32_F16_I32_F32 : VOPProfile<[i32, f16, i32, f32]>;
2861def VOP_I32_F32_I32_F32 : VOPProfile<[i32, f32, i32, f32]>;
2862
2863def VOP_V6I32_V32BF16_I32_F32 : VOPProfile<[v6i32, v32bf16, i32, f32]>;
2864def VOP_V6I32_V32F16_I32_F32 : VOPProfile<[v6i32, v32f16, i32, f32]>;
2865def VOP_V6I32_V32F32_I32_F32 : VOPProfile<[v6i32, v32f32, i32, f32]>;
2866
2867def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
2868def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
2869def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
2870
2871def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>;
2872def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
2873def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
2874def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
2875def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
2876def VOP_I32_I32_I32_I16 : VOPProfile <[i32, i32, i32, i16]>;
2877def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
2878def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
2879def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
2880def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
2881def VOP_I16_I32_I32_I32 : VOPProfile <[i16, i32, i32, i32]>;
2882
2883def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>;
2884def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>;
2885
2886def VOP_V4F32_F32_F32_V4F32       : VOPProfile <[v4f32,  f32,   f32,   v4f32]>;
2887def VOP_V16F32_F32_F32_V16F32     : VOPProfile <[v16f32, f32,   f32,   v16f32]>;
2888def VOP_V32F32_F32_F32_V32F32     : VOPProfile <[v32f32, f32,   f32,   v32f32]>;
2889def VOP_V4F32_V4F16_V4F16_V4F32   : VOPProfile <[v4f32,  v4f16, v4f16, v4f32]>;
2890def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>;
2891def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>;
2892def VOP_V4F32_V2I16_V2I16_V4F32   : VOPProfile <[v4f32,  v2i16, v2i16, v4f32]>;
2893def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>;
2894def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>;
2895def VOP_V4I32_I32_I32_V4I32       : VOPProfile <[v4i32,  i32,   i32,   v4i32]>;
2896def VOP_V16I32_I32_I32_V16I32     : VOPProfile <[v16i32, i32,   i32,   v16i32]>;
2897def VOP_V32I32_I32_I32_V32I32     : VOPProfile <[v32i32, i32,   i32,   v32i32]>;
2898
2899def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>;
2900def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>;
2901
2902def VOP_V2F32_V2F32_V2F32_V2F32   : VOPProfile <[v2f32,  v2f32, v2f32, v2f32]>;
2903def VOP_V2F32_V2F32_V2F32         : VOPProfile <[v2f32,  v2f32, v2f32, untyped]>;
2904def VOP_V2I32_V2I32_V2I32         : VOPProfile <[v2i32,  v2i32, v2i32, untyped]>;
2905def VOP_V4F32_V4I16_V4I16_V4F32   : VOPProfile <[v4f32,  v4i16, v4i16, v4f32]>;
2906def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>;
2907def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>;
2908
2909def VOP_V4I32_I64_I64_V4I32       : VOPProfile <[v4i32,  i64,   i64,   v4i32]>;
2910def VOP_V16I32_I64_I64_V16I32     : VOPProfile <[v16i32, i64,   i64,   v16i32]>;
2911def VOP_V4F32_V2F32_V2F32_V4F32   : VOPProfile <[v4f32,  v2f32, v2f32, v4f32]>;
2912def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>;
2913def VOP_V4F32_I64_I64_V4F32       : VOPProfile <[v4f32,  i64,   i64,   v4f32]>;
2914def VOP_V16F32_I64_I64_V16F32     : VOPProfile <[v16f32, i64,   i64,   v16f32]>;
2915
2916def VOP_V4F32_V4F16_V8F16_I32     : VOPProfile <[v4f32,  v4f16, v8f16, i32]>;
2917def VOP_V4F32_V8F16_V16F16_I32    : VOPProfile <[v4f32,  v8f16, v16f16, i32]>;
2918def VOP_V4F32_V8BF16_V16BF16_I32  : VOPProfile <[v4f32,  v8bf16, v16bf16, i32]>;
2919def VOP_V16F32_V4F16_V8F16_I32    : VOPProfile <[v16f32, v4f16, v8f16, i32]>;
2920def VOP_V16F32_V8F16_V16F16_I32   : VOPProfile <[v16f32, v8f16, v16f16, i32]>;
2921def VOP_V16F32_V8BF16_V16BF16_I32 : VOPProfile <[v16f32, v8bf16, v16bf16, i32]>;
2922def VOP_V4F32_V4I16_V8I16_I32     : VOPProfile <[v4f32,  v4i16, v8i16, i32]>;
2923def VOP_V16F32_V4I16_V8I16_I32    : VOPProfile <[v16f32, v4i16, v8i16, i32]>;
2924def VOP_V4I32_V2I32_V4I32_I32     : VOPProfile <[v4i32,  v2i32, v4i32, i32]>;
2925def VOP_V16I32_V2I32_V4I32_I32    : VOPProfile <[v16i32, v2i32, v4i32, i32]>;
2926def VOP_V4F32_V2I32_V4I32_I32     : VOPProfile <[v4f32,  v2i32, v4i32, i32]>;
2927def VOP_V16F32_V2I32_V4I32_I32    : VOPProfile <[v16f32, v2i32, v4i32, i32]>;
2928def VOP_V4I32_V4I32_V8I32_I32     : VOPProfile <[v4i32,  v4i32, v8i32, i32]>;
2929def VOP_V16I32_V4I32_V8I32_I32    : VOPProfile <[v16i32, v4i32, v8i32, i32]>;
2930def VOP_V4F32_V4I32_V8I32_I32     : VOPProfile <[v4f32, v4i32, v8i32, i32]>;
2931def VOP_V16F32_V4I32_V8I32_I32    : VOPProfile <[v16f32, v4i32, v8i32, i32]>;
2932
2933def VOP_V4F32_V8F16_V8F16_V4F32   : VOPProfile <[v4f32,  v8f16, v8f16, v4f32]>;
2934def VOP_V16F32_V8F16_V8F16_V16F32 : VOPProfile <[v16f32, v8f16, v8f16, v16f32]>;
2935def VOP_V16F32_V8BF16_V8BF16_V16F32 : VOPProfile <[v16f32, v8bf16, v8bf16, v16f32]>;
2936def VOP_V4F32_V8BF16_V8BF16_V4F32 : VOPProfile <[v4f32, v8bf16, v8bf16, v4f32]>;
2937def VOP_V4F32_V8I32_V8I32_V4F32   : VOPProfile <[v4f32,  v8i32, v8i32, v4f32]>;
2938
2939def VOP_V4F32_V8I32_V6I32_V4F32   : VOPProfile <[v4f32,  v8i32, v6i32, v4f32]>;
2940def VOP_V4F32_V6I32_V8I32_V4F32   : VOPProfile <[v4f32,  v6i32, v8i32, v4f32]>;
2941def VOP_V4F32_V6I32_V6I32_V4F32   : VOPProfile <[v4f32,  v6i32, v6i32, v4f32]>;
2942
2943def VOP_V4F32_V8I32_V4I32_V4F32   : VOPProfile <[v4f32,  v8i32, v4i32, v4f32]>;
2944def VOP_V4F32_V4I32_V8I32_V4F32   : VOPProfile <[v4f32,  v4i32, v8i32, v4f32]>;
2945def VOP_V4F32_V6I32_V4I32_V4F32   : VOPProfile <[v4f32,  v6i32, v4i32, v4f32]>;
2946def VOP_V4F32_V4I32_V6I32_V4F32   : VOPProfile <[v4f32,  v4i32, v6i32, v4f32]>;
2947def VOP_V4F32_V4I32_V4I32_V4F32   : VOPProfile <[v4f32,  v4i32, v4i32, v4f32]>;
2948
2949def VOP_V16F32_V8I32_V8I32_V16F32   : VOPProfile <[v16f32, v8i32, v8i32, v16f32]>;
2950def VOP_V16F32_V8I32_V6I32_V16F32   : VOPProfile <[v16f32, v8i32, v6i32, v16f32]>;
2951def VOP_V16F32_V6I32_V8I32_V16F32   : VOPProfile <[v16f32, v6i32, v8i32, v16f32]>;
2952def VOP_V16F32_V6I32_V6I32_V16F32   : VOPProfile <[v16f32, v6i32, v6i32, v16f32]>;
2953
2954def VOP_V16F32_V8I32_V4I32_V16F32   : VOPProfile <[v16f32, v8i32, v4i32, v16f32]>;
2955def VOP_V16F32_V4I32_V8I32_V16F32   : VOPProfile <[v16f32, v4i32, v8i32, v16f32]>;
2956def VOP_V16F32_V6I32_V4I32_V16F32   : VOPProfile <[v16f32, v6i32, v4i32, v16f32]>;
2957def VOP_V16F32_V4I32_V6I32_V16F32   : VOPProfile <[v16f32, v4i32, v6i32, v16f32]>;
2958def VOP_V16F32_V4I32_V4I32_V16F32   : VOPProfile <[v16f32, v4i32, v4i32, v16f32]>;
2959
2960def VOP_V4I32_V4I32_V4I32_V4I32     : VOPProfile <[v4i32,  v4i32, v4i32, v4i32]>;
2961def VOP_V16I32_V4I32_V4I32_V16I32   : VOPProfile <[v16i32,  v4i32, v4i32, v16i32]>;
2962
2963
2964class Commutable_REV <string revOp, bit isOrig> {
2965  string RevOp = revOp;
2966  bit IsOrig = isOrig;
2967}
2968
2969//===----------------------------------------------------------------------===//
2970// Interpolation opcodes
2971//===----------------------------------------------------------------------===//
2972
2973class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">;
2974
2975class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
2976  VINTRPCommon <outs, ins, "", pattern>,
2977  SIMCInstr<opName, SIEncodingFamily.NONE> {
2978  let isPseudo = 1;
2979  let isCodeGenOnly = 1;
2980}
2981
2982// FIXME-GFX10: WIP.
2983class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins,
2984                      string asm, int encodingFamily> :
2985  VINTRPCommon <outs, ins, asm, []>,
2986  VINTRPe <op>,
2987  SIMCInstr<opName, encodingFamily> {
2988}
2989
2990class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
2991                      string asm> :
2992  VINTRPCommon <outs, ins, asm, []>,
2993  VINTRPe_vi <op>,
2994  SIMCInstr<opName, SIEncodingFamily.VI> {
2995  let AssemblerPredicate = isGFX8GFX9;
2996  let DecoderNamespace = "GFX8";
2997}
2998
2999// FIXME-GFX10: WIP.
3000multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
3001                     list<dag> pattern = []> {
3002  def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>;
3003
3004  let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
3005    def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>;
3006  } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
3007
3008  def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>;
3009
3010  let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
3011    def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>;
3012  } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
3013}
3014
3015//===----------------------------------------------------------------------===//
3016// Vector instruction mappings
3017//===----------------------------------------------------------------------===//
3018
3019// Maps an opcode in e32 form to its e64 equivalent
3020def getVOPe64 : InstrMapping {
3021  let FilterClass = "VOP";
3022  let RowFields = ["OpName"];
3023  let ColFields = ["Size", "VOP3"];
3024  let KeyCol = ["4", "0"];
3025  let ValueCols = [["8", "1"]];
3026}
3027
3028// Maps an opcode in e64 form to its e32 equivalent
3029def getVOPe32 : InstrMapping {
3030  let FilterClass = "VOP";
3031  let RowFields = ["OpName"];
3032  let ColFields = ["Size", "VOP3"];
3033  let KeyCol = ["8", "1"];
3034  let ValueCols = [["4", "0"]];
3035}
3036
3037// Maps ordinary instructions to their SDWA counterparts
3038def getSDWAOp : InstrMapping {
3039  let FilterClass = "VOP";
3040  let RowFields = ["OpName"];
3041  let ColFields = ["AsmVariantName"];
3042  let KeyCol = ["Default"];
3043  let ValueCols = [["SDWA"]];
3044}
3045
3046// Maps SDWA instructions to their ordinary counterparts
3047def getBasicFromSDWAOp : InstrMapping {
3048  let FilterClass = "VOP";
3049  let RowFields = ["OpName"];
3050  let ColFields = ["AsmVariantName"];
3051  let KeyCol = ["SDWA"];
3052  let ValueCols = [["Default"]];
3053}
3054
3055// Maps ordinary instructions to their DPP counterparts
3056def getDPPOp32 : InstrMapping {
3057  let FilterClass = "VOP";
3058  let RowFields = ["OpName"];
3059  let ColFields = ["AsmVariantName"];
3060  let KeyCol = ["Default"];
3061  let ValueCols = [["DPP"]];
3062}
3063
3064def getDPPOp64 : InstrMapping {
3065  let FilterClass = "VOP";
3066  let RowFields = ["OpName"];
3067  let ColFields = ["AsmVariantName"];
3068  let KeyCol = ["VOP3"];
3069  let ValueCols = [["VOP3_DPP"]];
3070}
3071
3072// Maps an commuted opcode to its original version
3073def getCommuteOrig : InstrMapping {
3074  let FilterClass = "Commutable_REV";
3075  let RowFields = ["RevOp"];
3076  let ColFields = ["IsOrig"];
3077  let KeyCol = ["0"];
3078  let ValueCols = [["1"]];
3079}
3080
3081// Maps an original opcode to its commuted version
3082def getCommuteRev : InstrMapping {
3083  let FilterClass = "Commutable_REV";
3084  let RowFields = ["RevOp"];
3085  let ColFields = ["IsOrig"];
3086  let KeyCol = ["1"];
3087  let ValueCols = [["0"]];
3088}
3089
3090def getMCOpcodeGen : InstrMapping {
3091  let FilterClass = "SIMCInstr";
3092  let RowFields = ["PseudoInstr"];
3093  let ColFields = ["Subtarget"];
3094  let KeyCol = [!cast<string>(SIEncodingFamily.NONE)];
3095  // These columns must be kept in sync with the SIEncodingFamily enumeration.
3096  let ValueCols = [[!cast<string>(SIEncodingFamily.SI)],
3097                   [!cast<string>(SIEncodingFamily.VI)],
3098                   [!cast<string>(SIEncodingFamily.SDWA)],
3099                   [!cast<string>(SIEncodingFamily.SDWA9)],
3100                   // GFX80 encoding is added to work around a multiple matching
3101                   // issue for buffer instructions with unpacked d16 data. This
3102                   // does not actually change the encoding, and thus may be
3103                   // removed later.
3104                   [!cast<string>(SIEncodingFamily.GFX80)],
3105                   [!cast<string>(SIEncodingFamily.GFX9)],
3106                   [!cast<string>(SIEncodingFamily.GFX10)],
3107                   [!cast<string>(SIEncodingFamily.SDWA10)],
3108                   [!cast<string>(SIEncodingFamily.GFX90A)],
3109                   [!cast<string>(SIEncodingFamily.GFX940)],
3110                   [!cast<string>(SIEncodingFamily.GFX11)],
3111                   [!cast<string>(SIEncodingFamily.GFX12)]];
3112}
3113
3114// Get equivalent SOPK instruction.
3115def getSOPKOp : InstrMapping {
3116  let FilterClass = "SOPKInstTable";
3117  let RowFields = ["BaseCmpOp"];
3118  let ColFields = ["IsSOPK"];
3119  let KeyCol = ["0"];
3120  let ValueCols = [["1"]];
3121}
3122
3123def getAddr64Inst : InstrMapping {
3124  let FilterClass = "MUBUFAddr64Table";
3125  let RowFields = ["OpName"];
3126  let ColFields = ["IsAddr64"];
3127  let KeyCol = ["0"];
3128  let ValueCols = [["1"]];
3129}
3130
3131def getIfAddr64Inst : InstrMapping {
3132  let FilterClass = "MUBUFAddr64Table";
3133  let RowFields = ["OpName"];
3134  let ColFields = ["IsAddr64"];
3135  let KeyCol = ["1"];
3136  let ValueCols = [["1"]];
3137}
3138
3139// Maps a GLOBAL to its SADDR form.
3140def getGlobalSaddrOp : InstrMapping {
3141  let FilterClass = "GlobalSaddrTable";
3142  let RowFields = ["SaddrOp"];
3143  let ColFields = ["IsSaddr"];
3144  let KeyCol = ["0"];
3145  let ValueCols = [["1"]];
3146}
3147
3148// Maps a GLOBAL SADDR to its VADDR form.
3149def getGlobalVaddrOp : InstrMapping {
3150  let FilterClass = "GlobalSaddrTable";
3151  let RowFields = ["SaddrOp"];
3152  let ColFields = ["IsSaddr"];
3153  let KeyCol = ["1"];
3154  let ValueCols = [["0"]];
3155}
3156
3157// Maps a v_cmpx opcode with sdst to opcode without sdst.
3158def getVCMPXNoSDstOp : InstrMapping {
3159  let FilterClass = "VCMPXNoSDstTable";
3160  let RowFields = ["NoSDstOp"];
3161  let ColFields = ["HasSDst"];
3162  let KeyCol = ["1"];
3163  let ValueCols = [["0"]];
3164}
3165
3166// Maps a SOPP to a SOPP with S_NOP
3167def getSOPPWithRelaxation : InstrMapping {
3168  let FilterClass = "SOPPRelaxTable";
3169  let RowFields = ["KeyName"];
3170  let ColFields = ["IsRelaxed"];
3171  let KeyCol = ["0"];
3172  let ValueCols = [["1"]];
3173}
3174
3175// Maps flat scratch opcodes by addressing modes
3176def getFlatScratchInstSTfromSS : InstrMapping {
3177  let FilterClass = "FlatScratchInst";
3178  let RowFields = ["SVOp"];
3179  let ColFields = ["Mode"];
3180  let KeyCol = ["SS"];
3181  let ValueCols = [["ST"]];
3182}
3183
3184def getFlatScratchInstSSfromSV : InstrMapping {
3185  let FilterClass = "FlatScratchInst";
3186  let RowFields = ["SVOp"];
3187  let ColFields = ["Mode"];
3188  let KeyCol = ["SV"];
3189  let ValueCols = [["SS"]];
3190}
3191
3192def getFlatScratchInstSVfromSVS : InstrMapping {
3193  let FilterClass = "FlatScratchInst";
3194  let RowFields = ["SVOp"];
3195  let ColFields = ["Mode"];
3196  let KeyCol = ["SVS"];
3197  let ValueCols = [["SV"]];
3198}
3199
3200def getFlatScratchInstSVfromSS : InstrMapping {
3201  let FilterClass = "FlatScratchInst";
3202  let RowFields = ["SVOp"];
3203  let ColFields = ["Mode"];
3204  let KeyCol = ["SS"];
3205  let ValueCols = [["SV"]];
3206}
3207
3208def getMFMAEarlyClobberOp : InstrMapping {
3209  let FilterClass = "MFMATable";
3210  let RowFields = ["FMAOp"];
3211  let ColFields = ["IsMac"];
3212  let KeyCol = ["1"];
3213  let ValueCols = [["0"]];
3214}
3215
3216// Map from an mfma using VGPRs to one using AGPRs.
3217def getMFMASrcCVDstAGPROp : InstrMapping {
3218  let FilterClass = "MFMATable";
3219  let RowFields = ["AGPROp"];
3220  let ColFields = ["MFMAKind"];
3221  let KeyCol = ["VGPR"];
3222  let ValueCols = [["AGPR"]];
3223}
3224
3225// Maps an v_cmp instruction to its v_cmpx equivalent.
3226def getVCMPXOpFromVCMP : InstrMapping {
3227  let FilterClass = "VCMPVCMPXTable";
3228  let RowFields = ["VCMPOp"];
3229  let ColFields = ["IsVCMPX"];
3230  let KeyCol = ["0"];
3231  let ValueCols = [["1"]];
3232}
3233
3234// Map encoded mfma(_scale)?_f8f6f4 instructions depending on the
3235// number of registers required for the used format.
3236def getMFMA_F8F6F4_WithSize : GenericTable {
3237  let FilterClass = "MFMA_F8F6F4_WithSizeTable";
3238  let CppTypeName = "MFMA_F8F6F4_Info";
3239  let Fields = [ "Opcode", "F8F8Opcode", "NumRegsSrcA", "NumRegsSrcB" ];
3240  let PrimaryKey = [ "NumRegsSrcA", "NumRegsSrcB", "F8F8Opcode" ];
3241  let PrimaryKeyName = "getMFMA_F8F6F4_InstWithNumRegs" ;
3242}
3243
3244def isMFMA_F8F6F4Table : GenericTable {
3245  let FilterClass = "MFMA_F8F6F4_WithSizeTable";
3246  let CppTypeName = "MFMA_F8F6F4_Info";
3247//  let Fields = [ "Opcode" ];
3248  let Fields = [ "Opcode", "F8F8Opcode", "NumRegsSrcA", "NumRegsSrcB" ];
3249  let PrimaryKey = [ "Opcode" ];
3250  let PrimaryKeyName = "isMFMA_F8F6F4" ;
3251}
3252
3253def FP4FP8DstByteSelTable : GenericTable {
3254  let FilterClass = "VOP3_Pseudo";
3255  let CppTypeName = "FP4FP8DstByteSelInfo";
3256  let Fields = ["Opcode", "HasFP8DstByteSel", "HasFP4DstByteSel"];
3257
3258  let PrimaryKey = ["Opcode"];
3259  let PrimaryKeyName = "getFP4FP8DstByteSelHelper";
3260}
3261
3262def VOPDComponentTable : GenericTable {
3263  let FilterClass = "VOPD_Component";
3264  let CppTypeName = "VOPDComponentInfo";
3265  let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"];
3266  let PrimaryKey = ["BaseVOP"];
3267  let PrimaryKeyName = "getVOPDComponentHelper";
3268}
3269
3270def getVOPDBaseFromComponent : SearchIndex {
3271  let Table = VOPDComponentTable;
3272  let Key = ["VOPDOp"];
3273}
3274
3275def VOPDPairs : GenericTable {
3276  let FilterClass = "VOPD_Base";
3277  let CppTypeName = "VOPDInfo";
3278  let Fields = ["Opcode", "OpX", "OpY", "SubTgt"];
3279  let PrimaryKey = ["Opcode"];
3280  let PrimaryKeyName = "getVOPDOpcodeHelper";
3281}
3282
3283def getVOPDInfoFromComponentOpcodes : SearchIndex {
3284  let Table = VOPDPairs;
3285  let Key = ["OpX", "OpY", "SubTgt"];
3286}
3287
3288include "SIInstructions.td"
3289
3290include "DSInstructions.td"
3291include "MIMGInstructions.td"
3292