xref: /llvm-project/llvm/lib/Target/AMDGPU/FLATInstructions.td (revision 6b2232606d01a029f640b61b4f985d9dea79d4b6)
1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9let WantsRoot = true in {
10  def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [], -10>;
11  def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [], -10>;
12  def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [], -10>;
13
14  def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [], -10>;
15  def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [], -10>;
16  def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [], -10>;
17}
18
19//===----------------------------------------------------------------------===//
20// FLAT classes
21//===----------------------------------------------------------------------===//
22
23class FLAT_Pseudo<string opName, dag outs, dag ins,
24                  string asmOps, list<dag> pattern=[]> :
25  InstSI<outs, ins, "", pattern>,
26  SIMCInstr<NAME, SIEncodingFamily.NONE> {
27
28  let isPseudo = 1;
29  let isCodeGenOnly = 1;
30
31  let FLAT = 1;
32
33  let UseNamedOperandTable = 1;
34  let hasSideEffects = 0;
35  let SchedRW = [WriteVMEM];
36
37  string Mnemonic = opName;
38  string AsmOperands = asmOps;
39
40  bits<1> is_flat_global = 0;
41  bits<1> is_flat_scratch = 0;
42
43  bits<1> has_vdst = 1;
44
45  // We need to distinguish having saddr and enabling saddr because
46  // saddr is only valid for scratch and global instructions. Pre-gfx9
47  // these bits were reserved, so we also don't necessarily want to
48  // set these bits to the disabled value for the original flat
49  // segment instructions.
50  bits<1> has_saddr = 0;
51  bits<1> enabled_saddr = 0;
52  bits<7> saddr_value = 0;
53  bits<1> has_vaddr = 1;
54
55  bits<1> has_data = 1;
56  bits<1> has_glc  = 1;
57  bits<1> glcValue = 0;
58  bits<1> has_dlc  = 1;
59  bits<1> dlcValue = 0;
60  bits<1> has_sccb  = 1;
61  bits<1> sccbValue = 0;
62  bits<1> has_sve  = 0; // Scratch VGPR Enable
63  bits<1> lds = 0;
64  bits<1> sve = 0;
65  bits<1> has_offset = 1;
66
67  let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts,
68    !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace));
69
70  // TODO: M0 if it could possibly access LDS (before gfx9? only)?
71  let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]);
72
73  // Internally, FLAT instruction are executed as both an LDS and a
74  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
75  // and are not considered done until both have been decremented.
76  let VM_CNT = 1;
77  let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch));
78
79  let FlatGlobal = is_flat_global;
80
81  let FlatScratch = is_flat_scratch;
82}
83
84class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
85  InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>,
86  Enc64 {
87
88  let isPseudo = 0;
89  let isCodeGenOnly = 0;
90
91  let FLAT = 1;
92
93  // copy relevant pseudo op flags
94  let SubtargetPredicate   = ps.SubtargetPredicate;
95  let AsmMatchConverter    = ps.AsmMatchConverter;
96  let OtherPredicates      = ps.OtherPredicates;
97  let TSFlags              = ps.TSFlags;
98  let UseNamedOperandTable = ps.UseNamedOperandTable;
99  let SchedRW              = ps.SchedRW;
100  let mayLoad              = ps.mayLoad;
101  let mayStore             = ps.mayStore;
102  let IsAtomicRet          = ps.IsAtomicRet;
103  let IsAtomicNoRet        = ps.IsAtomicNoRet;
104  let VM_CNT               = ps.VM_CNT;
105  let LGKM_CNT             = ps.LGKM_CNT;
106  let VALU                 = ps.VALU;
107  let Uses                 = ps.Uses;
108  let Defs                 = ps.Defs;
109  let isConvergent         = ps.isConvergent;
110
111  // encoding fields
112  bits<8> vaddr;
113  bits<10> vdata;
114  bits<7> saddr;
115  bits<10> vdst;
116
117  bits<5> cpol;
118
119  // Only valid on gfx9
120  bits<1> lds = ps.lds; // LDS DMA for global and scratch
121
122  // Segment, 00=flat, 01=scratch, 10=global, 11=reserved
123  bits<2> seg = {ps.is_flat_global, ps.is_flat_scratch};
124
125  // Signed offset. Highest bit ignored for flat and treated as 12-bit
126  // unsigned for flat accesses.
127  bits<13> offset;
128  // GFX90A+ only: instruction uses AccVGPR for data
129  bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0));
130
131  // We don't use tfe right now, and it was removed in gfx9.
132  bits<1> tfe = 0;
133
134  // Only valid on GFX9+
135  let Inst{12-0} = offset;
136  let Inst{13} = !if(ps.has_sve, ps.sve, lds);
137  let Inst{15-14} = seg;
138
139  let Inst{16}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
140  let Inst{17}    = cpol{CPolBit.SLC};
141  let Inst{24-18} = op;
142  let Inst{31-26} = 0x37; // Encoding.
143  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
144  let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?);
145  let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0);
146
147  // 54-48 is reserved.
148  let Inst{55}    = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A.
149  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?);
150}
151
152class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
153  InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>,
154  Enc96 {
155
156  let FLAT = 1;
157
158  // copy relevant pseudo op flags
159  let SubtargetPredicate   = ps.SubtargetPredicate;
160  let WaveSizePredicate    = ps.WaveSizePredicate;
161  let AsmMatchConverter    = ps.AsmMatchConverter;
162  let OtherPredicates      = ps.OtherPredicates;
163  let TSFlags              = ps.TSFlags;
164  let UseNamedOperandTable = ps.UseNamedOperandTable;
165  let SchedRW              = ps.SchedRW;
166  let mayLoad              = ps.mayLoad;
167  let mayStore             = ps.mayStore;
168  let IsAtomicRet          = ps.IsAtomicRet;
169  let IsAtomicNoRet        = ps.IsAtomicNoRet;
170  let VM_CNT               = ps.VM_CNT;
171  let LGKM_CNT             = ps.LGKM_CNT;
172  let VALU                 = ps.VALU;
173  let Uses                 = ps.Uses;
174  let Defs                 = ps.Defs;
175  let isConvergent         = ps.isConvergent;
176
177  bits<7> saddr;
178  bits<8> vdst;
179  bits<6> cpol;
180  bits<8> vdata; // vsrc
181  bits<8> vaddr;
182  bits<24> offset;
183
184  let Inst{6-0} = !if(ps.enabled_saddr, saddr, SGPR_NULL_gfx11plus.Index);
185  let Inst{21-14} = op;
186  let Inst{31-26} = 0x3b;
187  let Inst{39-32} = !if(ps.has_vdst, vdst, ?);
188  let Inst{49} = ps.sve;
189  let Inst{54-53} = cpol{2-1}; // th{2-1}
190  let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
191  let Inst{51-50} = cpol{4-3}; // scope
192  let Inst{62-55} = !if(ps.has_data, vdata{7-0}, ?);
193  let Inst{71-64} = !if(ps.has_vaddr, vaddr, ?);
194  let Inst{95-72} = !if(ps.has_offset, offset, ?);
195}
196
197class GlobalSaddrTable <bit is_saddr, string Name = ""> {
198  bit IsSaddr = is_saddr;
199  string SaddrOp = Name;
200}
201
202// TODO: Is exec allowed for saddr? The disabled value 0x7f is the
203// same encoding value as exec_hi, so it isn't possible to use that if
204// saddr is 32-bit (which isn't handled here yet).
205class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
206  bit HasTiedOutput = 0,
207  bit HasSaddr = 0, bit EnableSaddr = 0,
208  RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo<
209  opName,
210  (outs vdata_op:$vdst),
211  !con(
212    !if(EnableSaddr,
213      (ins SReg_64_XEXEC_XNULL:$saddr, VGPR_32:$vaddr),
214      (ins VReg_64:$vaddr)),
215      (ins flat_offset:$offset),
216      // FIXME: Operands with default values do not work with following non-optional operands.
217      !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in),
218                         (ins CPol_0:$cpol))),
219  " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
220  let has_data = 0;
221  let mayLoad = 1;
222  let has_saddr = HasSaddr;
223  let enabled_saddr = EnableSaddr;
224
225  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
226  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
227}
228
229class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
230  bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
231  opName,
232  (outs),
233  !con(
234    !if(EnableSaddr,
235      (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64_XEXEC_XNULL:$saddr),
236      (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)),
237      (ins flat_offset:$offset, CPol_0:$cpol)),
238  " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
239  let mayLoad  = 0;
240  let mayStore = 1;
241  let has_vdst = 0;
242  let has_saddr = HasSaddr;
243  let enabled_saddr = EnableSaddr;
244}
245
246multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> {
247  let is_flat_global = 1 in {
248    def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>,
249      GlobalSaddrTable<0, opName>;
250    def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>,
251      GlobalSaddrTable<1, opName>;
252  }
253}
254
255class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
256  bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
257  opName,
258  (outs regClass:$vdst),
259  !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
260    (ins flat_offset:$offset, CPol_0:$cpol),
261    !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
262  " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
263  let is_flat_global = 1;
264  let has_data = 0;
265  let mayLoad = 1;
266  let has_vaddr = 0;
267  let has_saddr = 1;
268  let enabled_saddr = EnableSaddr;
269
270  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
271  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
272}
273
274multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
275  bit HasTiedOutput = 0> {
276  def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>,
277    GlobalSaddrTable<0, opName>;
278  def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>,
279    GlobalSaddrTable<1, opName>;
280}
281
282multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
283  let is_flat_global = 1 in {
284    def "" : FLAT_Store_Pseudo<opName, regClass, 1>,
285      GlobalSaddrTable<0, opName>;
286    def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>,
287      GlobalSaddrTable<1, opName>;
288  }
289}
290
291class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo<
292  opName,
293  (outs ),
294  !con(
295      !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)),
296      (ins flat_offset:$offset, CPol_0:$cpol)),
297  " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> {
298  let LGKM_CNT = 1;
299  let is_flat_global = 1;
300  let lds = 1;
301  let has_data = 0;
302  let has_vdst = 0;
303  let mayLoad = 1;
304  let mayStore = 1;
305  let has_saddr = 1;
306  let enabled_saddr = EnableSaddr;
307  let VALU = 1;
308  let Uses = [M0, EXEC];
309  let SchedRW = [WriteVMEM, WriteLDS];
310}
311
312multiclass FLAT_Global_Load_LDS_Pseudo<string opName> {
313  def ""     : FLAT_Global_Load_LDS_Pseudo<opName>,
314    GlobalSaddrTable<0, opName>;
315  def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>,
316    GlobalSaddrTable<1, opName>;
317}
318
319class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
320  bit EnableSaddr = 0> : FLAT_Pseudo<
321  opName,
322  (outs),
323  !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
324    (ins flat_offset:$offset, CPol:$cpol)),
325  " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
326  let is_flat_global = 1;
327  let mayLoad  = 0;
328  let mayStore = 1;
329  let has_vdst = 0;
330  let has_vaddr = 0;
331  let has_saddr = 1;
332  let enabled_saddr = EnableSaddr;
333}
334
335multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> {
336  def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>,
337    GlobalSaddrTable<0, opName>;
338  def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>,
339    GlobalSaddrTable<1, opName>;
340}
341
342class FLAT_Global_Invalidate_Writeback<string opName, SDPatternOperator node = null_frag> :
343  FLAT_Pseudo<opName, (outs), (ins CPol:$cpol), "$cpol", [(node)]> {
344
345  let AsmMatchConverter = "";
346
347  let hasSideEffects = 1;
348  let mayLoad = 0;
349  let mayStore = 0;
350  let is_flat_global = 1;
351
352  let has_offset = 0;
353  let has_saddr = 0;
354  let enabled_saddr = 0;
355  let saddr_value = 0;
356  let has_vdst = 0;
357  let has_data = 0;
358  let has_vaddr = 0;
359  let has_glc = 0;
360  let has_dlc = 0;
361  let glcValue = 0;
362  let dlcValue = 0;
363  let has_sccb = 0;
364  let sccbValue = 0;
365  let has_sve = 0;
366  let lds = 0;
367  let sve = 0;
368}
369
370class FlatScratchInst <string sv_op, string mode> {
371  string SVOp = sv_op;
372  string Mode = mode;
373}
374
375class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
376  bit HasTiedOutput = 0,
377  bit EnableSaddr = 0,
378  bit EnableSVE = 0,
379  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))>
380  : FLAT_Pseudo<
381  opName,
382  (outs getLdStRegisterOperand<regClass>.ret:$vdst),
383  !con(
384    !if(EnableSVE,
385        (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
386        !if(EnableSaddr,
387          (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset),
388          !if(EnableVaddr,
389            (ins VGPR_32:$vaddr, flat_offset:$offset),
390            (ins flat_offset:$offset)))),
391     !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in),
392                        (ins CPol_0:$cpol))),
393  " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
394  let is_flat_scratch = 1;
395  let has_data = 0;
396  let mayLoad = 1;
397  let has_saddr = 1;
398  let enabled_saddr = EnableSaddr;
399  let has_vaddr = EnableVaddr;
400  let has_sve = EnableSVE;
401  let sve = EnableVaddr;
402
403  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
404  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
405}
406
407class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0,
408  bit EnableSVE = 0,
409  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)),
410  RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo<
411  opName,
412  (outs),
413  !if(EnableSVE,
414    (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
415    !if(EnableSaddr,
416      (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
417      !if(EnableVaddr,
418        (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
419        (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))),
420  " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
421  let is_flat_scratch = 1;
422  let mayLoad  = 0;
423  let mayStore = 1;
424  let has_vdst = 0;
425  let has_saddr = 1;
426  let enabled_saddr = EnableSaddr;
427  let has_vaddr = EnableVaddr;
428  let has_sve = EnableSVE;
429  let sve = EnableVaddr;
430}
431
432multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> {
433  def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>,
434           FlatScratchInst<opName, "SV">;
435  def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>,
436               FlatScratchInst<opName, "SS">;
437
438  let SubtargetPredicate = HasFlatScratchSVSMode in
439  def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>,
440             FlatScratchInst<opName, "SVS">;
441
442  let SubtargetPredicate = HasFlatScratchSTMode in
443  def _ST  : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>,
444             FlatScratchInst<opName, "ST">;
445}
446
447multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> {
448  def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>,
449           FlatScratchInst<opName, "SV">;
450  def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>,
451               FlatScratchInst<opName, "SS">;
452
453  let SubtargetPredicate = HasFlatScratchSVSMode in
454  def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>,
455             FlatScratchInst<opName, "SVS">;
456
457  let SubtargetPredicate = HasFlatScratchSTMode in
458  def _ST  : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>,
459             FlatScratchInst<opName, "ST">;
460}
461
462class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0,
463  bit EnableSVE = 0,
464  bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo<
465  opName,
466  (outs ),
467  !if(EnableSVE,
468    (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
469    !if(EnableSaddr,
470      (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol),
471      !if(EnableVaddr,
472        (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol),
473        (ins flat_offset:$offset, CPol:$cpol)))),
474  " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
475
476  let LGKM_CNT = 1;
477  let is_flat_scratch = 1;
478  let lds = 1;
479  let has_data = 0;
480  let has_vdst = 0;
481  let mayLoad = 1;
482  let mayStore = 1;
483  let has_saddr = 1;
484  let enabled_saddr = EnableSaddr;
485  let has_vaddr = EnableVaddr;
486  let has_sve = EnableSVE;
487  let sve = EnableVaddr;
488  let VALU = 1;
489  let Uses = [M0, EXEC];
490  let SchedRW = [WriteVMEM, WriteLDS];
491}
492
493multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> {
494  def ""     : FLAT_Scratch_Load_LDS_Pseudo<opName>,
495               FlatScratchInst<opName, "SV">;
496  def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>,
497               FlatScratchInst<opName, "SS">;
498  def _SVS   : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>,
499               FlatScratchInst<opName, "SVS">;
500  def _ST    : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>,
501               FlatScratchInst<opName, "ST">;
502}
503
504class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
505                               string asm, list<dag> pattern = []> :
506  FLAT_Pseudo<opName, outs, ins, asm, pattern> {
507    let mayLoad = 1;
508    let mayStore = 1;
509    let has_glc  = 0;
510    let glcValue = 0;
511    let has_vdst = 0;
512    let has_sccb  = 1;
513    let sccbValue = 0;
514    let IsAtomicNoRet = 1;
515}
516
517class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
518                            string asm, list<dag> pattern = []>
519  : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> {
520  let has_vdst = 1;
521  let glcValue = 1;
522  let sccbValue = 0;
523  let IsAtomicNoRet = 0;
524  let IsAtomicRet = 1;
525}
526
527multiclass FLAT_Atomic_Pseudo_NO_RTN<
528  string opName,
529  RegisterClass vdst_rc,
530  ValueType vt,
531  ValueType data_vt = vt,
532  RegisterClass data_rc = vdst_rc,
533  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
534  def "" : FLAT_AtomicNoRet_Pseudo <opName,
535    (outs),
536    (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
537    " $vaddr, $vdata$offset$cpol">,
538    GlobalSaddrTable<0, opName> {
539    let FPAtomic = data_vt.isFP;
540    let AddedComplexity = -1; // Prefer global atomics if available
541  }
542}
543
544multiclass FLAT_Atomic_Pseudo_RTN<
545  string opName,
546  RegisterClass vdst_rc,
547  ValueType vt,
548  ValueType data_vt = vt,
549  RegisterClass data_rc = vdst_rc,
550  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
551  def _RTN : FLAT_AtomicRet_Pseudo <opName,
552    (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
553    (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
554    " $vdst, $vaddr, $vdata$offset$cpol">,
555    GlobalSaddrTable<0, opName#"_rtn"> {
556    let FPAtomic = data_vt.isFP;
557    let AddedComplexity = -1; // Prefer global atomics if available
558  }
559}
560
561multiclass FLAT_Atomic_Pseudo<
562  string opName,
563  RegisterClass vdst_rc,
564  ValueType vt,
565  ValueType data_vt = vt,
566  RegisterClass data_rc = vdst_rc,
567  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
568  defm "" : FLAT_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc, data_op>;
569  defm "" : FLAT_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc, data_op>;
570}
571
572multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
573  string opName,
574  RegisterClass vdst_rc,
575  ValueType vt,
576  ValueType data_vt = vt,
577  RegisterClass data_rc = vdst_rc,
578  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
579
580  let is_flat_global = 1 in {
581    def "" : FLAT_AtomicNoRet_Pseudo <opName,
582      (outs),
583      (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
584      " $vaddr, $vdata, off$offset$cpol">,
585      GlobalSaddrTable<0, opName> {
586      let has_saddr = 1;
587      let FPAtomic = data_vt.isFP;
588    }
589
590    def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
591      (outs),
592      (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_0:$cpol),
593      " $vaddr, $vdata, $saddr$offset$cpol">,
594      GlobalSaddrTable<1, opName> {
595      let has_saddr = 1;
596      let enabled_saddr = 1;
597      let FPAtomic = data_vt.isFP;
598    }
599  }
600}
601
602multiclass FLAT_Global_Atomic_Pseudo_RTN<
603  string opName,
604  RegisterClass vdst_rc,
605  ValueType vt,
606  ValueType data_vt = vt,
607  RegisterClass data_rc = vdst_rc,
608  RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret,
609  RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> {
610
611  let is_flat_global = 1 in {
612    def _RTN : FLAT_AtomicRet_Pseudo <opName,
613      (outs vdst_op:$vdst),
614        (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
615      " $vdst, $vaddr, $vdata, off$offset$cpol">,
616      GlobalSaddrTable<0, opName#"_rtn"> {
617      let has_saddr = 1;
618      let FPAtomic = data_vt.isFP;
619    }
620
621    def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
622      (outs vdst_op:$vdst),
623        (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64_XEXEC_XNULL:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
624      " $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
625      GlobalSaddrTable<1, opName#"_rtn"> {
626       let has_saddr = 1;
627       let enabled_saddr = 1;
628       let FPAtomic = data_vt.isFP;
629    }
630  }
631}
632
633multiclass FLAT_Global_Atomic_Pseudo<
634  string opName,
635  RegisterClass vdst_rc,
636  ValueType vt,
637  ValueType data_vt = vt,
638  RegisterClass data_rc = vdst_rc> {
639  defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
640  defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
641}
642
643//===----------------------------------------------------------------------===//
644// Flat Instructions
645//===----------------------------------------------------------------------===//
646
647def FLAT_LOAD_UBYTE    : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
648def FLAT_LOAD_SBYTE    : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
649def FLAT_LOAD_USHORT   : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
650def FLAT_LOAD_SSHORT   : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
651def FLAT_LOAD_DWORD    : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
652def FLAT_LOAD_DWORDX2  : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
653def FLAT_LOAD_DWORDX4  : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
654def FLAT_LOAD_DWORDX3  : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
655
656def FLAT_STORE_BYTE    : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
657def FLAT_STORE_SHORT   : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
658def FLAT_STORE_DWORD   : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
659def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
660def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
661def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
662
663let SubtargetPredicate = HasD16LoadStore in {
664let TiedSourceNotRead = 1 in {
665def FLAT_LOAD_UBYTE_D16     : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>;
666def FLAT_LOAD_UBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>;
667def FLAT_LOAD_SBYTE_D16     : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>;
668def FLAT_LOAD_SBYTE_D16_HI  : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>;
669def FLAT_LOAD_SHORT_D16     : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>;
670def FLAT_LOAD_SHORT_D16_HI  : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>;
671}
672
673def FLAT_STORE_BYTE_D16_HI  : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>;
674def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>;
675}
676
677defm FLAT_ATOMIC_CMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
678                                VGPR_32, i32, v2i32, VReg_64>;
679
680defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
681                                VReg_64, i64, v2i64, VReg_128>;
682
683defm FLAT_ATOMIC_SWAP       : FLAT_Atomic_Pseudo <"flat_atomic_swap",
684                                VGPR_32, i32>;
685
686defm FLAT_ATOMIC_SWAP_X2    : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
687                                VReg_64, i64>;
688
689defm FLAT_ATOMIC_ADD        : FLAT_Atomic_Pseudo <"flat_atomic_add",
690                                VGPR_32, i32>;
691
692defm FLAT_ATOMIC_SUB        : FLAT_Atomic_Pseudo <"flat_atomic_sub",
693                                VGPR_32, i32>;
694
695defm FLAT_ATOMIC_SMIN       : FLAT_Atomic_Pseudo <"flat_atomic_smin",
696                                VGPR_32, i32>;
697
698defm FLAT_ATOMIC_UMIN       : FLAT_Atomic_Pseudo <"flat_atomic_umin",
699                                VGPR_32, i32>;
700
701defm FLAT_ATOMIC_SMAX       : FLAT_Atomic_Pseudo <"flat_atomic_smax",
702                                VGPR_32, i32>;
703
704defm FLAT_ATOMIC_UMAX       : FLAT_Atomic_Pseudo <"flat_atomic_umax",
705                                VGPR_32, i32>;
706
707defm FLAT_ATOMIC_AND        : FLAT_Atomic_Pseudo <"flat_atomic_and",
708                                VGPR_32, i32>;
709
710defm FLAT_ATOMIC_OR         : FLAT_Atomic_Pseudo <"flat_atomic_or",
711                                VGPR_32, i32>;
712
713defm FLAT_ATOMIC_XOR        : FLAT_Atomic_Pseudo <"flat_atomic_xor",
714                                VGPR_32, i32>;
715
716defm FLAT_ATOMIC_INC        : FLAT_Atomic_Pseudo <"flat_atomic_inc",
717                                VGPR_32, i32>;
718
719defm FLAT_ATOMIC_DEC        : FLAT_Atomic_Pseudo <"flat_atomic_dec",
720                                VGPR_32, i32>;
721
722defm FLAT_ATOMIC_ADD_X2     : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
723                                VReg_64, i64>;
724
725defm FLAT_ATOMIC_SUB_X2     : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
726                                VReg_64, i64>;
727
728defm FLAT_ATOMIC_SMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
729                                VReg_64, i64>;
730
731defm FLAT_ATOMIC_UMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
732                                VReg_64, i64>;
733
734defm FLAT_ATOMIC_SMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
735                                VReg_64, i64>;
736
737defm FLAT_ATOMIC_UMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
738                                VReg_64, i64>;
739
740defm FLAT_ATOMIC_AND_X2     : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
741                                VReg_64, i64>;
742
743defm FLAT_ATOMIC_OR_X2      : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
744                                VReg_64, i64>;
745
746defm FLAT_ATOMIC_XOR_X2     : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
747                                VReg_64, i64>;
748
749defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
750                                VReg_64, i64>;
751
752defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
753                                VReg_64, i64>;
754
755// GFX7-, GFX10-only flat instructions.
756let SubtargetPredicate = isGFX7GFX10 in {
757defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
758                                VReg_64, f64, v2f64, VReg_128>;
759} // End SubtargetPredicate = isGFX7GFX10
760
761
762// The names may be flat_atomic_fmin_x2 on some subtargets, but we
763// choose this as the canonical name.
764let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
765defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo <"flat_atomic_min_f64",
766                                               VReg_64, f64>;
767
768defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo <"flat_atomic_max_f64",
769                                                VReg_64, f64>;
770}
771
772let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
773defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>;
774defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
775}
776
777let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst  in {
778  defm FLAT_ATOMIC_ADD_F64   : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>;
779  defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>;
780} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst
781
782let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
783  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16",  VGPR_32, v2f16>;
784  let FPAtomic = 1 in
785    defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2i16>;
786} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts
787
788let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, FPAtomic = 1 in
789  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2i16>;
790
791// GFX7-, GFX10-, GFX11-only flat instructions.
792let SubtargetPredicate = isGFX7GFX10GFX11 in {
793
794defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
795                                VGPR_32, f32, v2f32, VReg_64>;
796
797defm FLAT_ATOMIC_FMIN        : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
798                                VGPR_32, f32>;
799
800defm FLAT_ATOMIC_FMAX        : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
801                                VGPR_32, f32>;
802
803} // End SubtargetPredicate = isGFX7GFX10GFX11
804
805// GFX940-, GFX11-only flat instructions.
806let SubtargetPredicate = HasFlatAtomicFaddF32Inst in {
807  defm FLAT_ATOMIC_ADD_F32       : FLAT_Atomic_Pseudo<"flat_atomic_add_f32",     VGPR_32, f32>;
808} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst
809
810let SubtargetPredicate = isGFX12Plus in {
811  defm FLAT_ATOMIC_CSUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_csub_u32", VGPR_32, i32>;
812  defm FLAT_ATOMIC_COND_SUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_cond_sub_u32", VGPR_32, i32>;
813} // End SubtargetPredicate = isGFX12Plus
814
815defm GLOBAL_LOAD_UBYTE    : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>;
816defm GLOBAL_LOAD_SBYTE    : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>;
817defm GLOBAL_LOAD_USHORT   : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>;
818defm GLOBAL_LOAD_SSHORT   : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>;
819defm GLOBAL_LOAD_DWORD    : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>;
820defm GLOBAL_LOAD_DWORDX2  : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>;
821defm GLOBAL_LOAD_DWORDX3  : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>;
822defm GLOBAL_LOAD_DWORDX4  : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>;
823
824let TiedSourceNotRead = 1 in {
825defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>;
826defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>;
827defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>;
828defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>;
829defm GLOBAL_LOAD_SHORT_D16    : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>;
830defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>;
831}
832
833let OtherPredicates = [HasGFX10_BEncoding] in
834defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>;
835
836defm GLOBAL_STORE_BYTE    : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>;
837defm GLOBAL_STORE_SHORT   : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>;
838defm GLOBAL_STORE_DWORD   : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>;
839defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>;
840defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>;
841defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>;
842let OtherPredicates = [HasGFX10_BEncoding] in
843defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>;
844
845defm GLOBAL_STORE_BYTE_D16_HI  : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>;
846defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>;
847
848defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
849                               VGPR_32, i32, v2i32, VReg_64>;
850
851defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
852                                  VReg_64, i64, v2i64, VReg_128>;
853
854defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
855                             VGPR_32, i32>;
856
857defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2",
858                                VReg_64, i64>;
859
860defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add",
861                           VGPR_32, i32>;
862
863defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub",
864                           VGPR_32, i32>;
865
866defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin",
867                            VGPR_32, i32>;
868
869defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin",
870                            VGPR_32, i32>;
871
872defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax",
873                            VGPR_32, i32>;
874
875defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax",
876                            VGPR_32, i32>;
877
878defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and",
879                           VGPR_32, i32>;
880
881defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or",
882                          VGPR_32, i32>;
883
884defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor",
885                           VGPR_32, i32>;
886
887defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc",
888                           VGPR_32, i32>;
889
890defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec",
891                           VGPR_32, i32>;
892
893defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2",
894                              VReg_64, i64>;
895
896defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2",
897                              VReg_64, i64>;
898
899defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2",
900                               VReg_64, i64>;
901
902defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2",
903                               VReg_64, i64>;
904
905defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2",
906                               VReg_64, i64>;
907
908defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2",
909                               VReg_64, i64>;
910
911defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2",
912                              VReg_64, i64>;
913
914defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2",
915                             VReg_64, i64>;
916
917defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2",
918                              VReg_64, i64>;
919
920defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
921                              VReg_64, i64>;
922
923defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
924                              VReg_64, i64>;
925
926let SubtargetPredicate = HasGFX10_BEncoding in {
927  defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo <"global_atomic_csub",
928                                VGPR_32, i32>;
929}
930
931defm GLOBAL_LOAD_LDS_UBYTE  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">;
932defm GLOBAL_LOAD_LDS_SBYTE  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">;
933defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">;
934defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">;
935defm GLOBAL_LOAD_LDS_DWORD  : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">;
936
937let SubtargetPredicate = HasGFX950Insts in {
938defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx3">;
939defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dwordx4">;
940}
941
942let SubtargetPredicate = isGFX12Plus in {
943  defm GLOBAL_ATOMIC_COND_SUB_U32    : FLAT_Global_Atomic_Pseudo <"global_atomic_cond_sub_u32", VGPR_32, i32>;
944  defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>;
945
946  def GLOBAL_INV    : FLAT_Global_Invalidate_Writeback<"global_inv">;
947  def GLOBAL_WB     : FLAT_Global_Invalidate_Writeback<"global_wb">;
948  def GLOBAL_WBINV  : FLAT_Global_Invalidate_Writeback<"global_wbinv">;
949} // End SubtargetPredicate = isGFX12Plus
950
951defm SCRATCH_LOAD_UBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>;
952defm SCRATCH_LOAD_SBYTE    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>;
953defm SCRATCH_LOAD_USHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>;
954defm SCRATCH_LOAD_SSHORT   : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>;
955defm SCRATCH_LOAD_DWORD    : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>;
956defm SCRATCH_LOAD_DWORDX2  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>;
957defm SCRATCH_LOAD_DWORDX3  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>;
958defm SCRATCH_LOAD_DWORDX4  : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>;
959
960let TiedSourceNotRead = 1 in {
961defm SCRATCH_LOAD_UBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>;
962defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>;
963defm SCRATCH_LOAD_SBYTE_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>;
964defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>;
965defm SCRATCH_LOAD_SHORT_D16    : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>;
966defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>;
967}
968
969defm SCRATCH_STORE_BYTE    : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>;
970defm SCRATCH_STORE_SHORT   : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>;
971defm SCRATCH_STORE_DWORD   : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>;
972defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>;
973defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>;
974defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>;
975
976defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>;
977defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>;
978
979defm SCRATCH_LOAD_LDS_UBYTE  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">;
980defm SCRATCH_LOAD_LDS_SBYTE  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">;
981defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">;
982defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">;
983defm SCRATCH_LOAD_LDS_DWORD  : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">;
984
985let SubtargetPredicate = isGFX12Plus in {
986  let Uses = [EXEC, M0] in {
987    defm GLOBAL_LOAD_BLOCK  : FLAT_Global_Load_Pseudo <"global_load_block", VReg_1024>;
988    defm GLOBAL_STORE_BLOCK  : FLAT_Global_Store_Pseudo <"global_store_block", VReg_1024>;
989  }
990  let Uses = [EXEC, FLAT_SCR, M0] in {
991    defm SCRATCH_LOAD_BLOCK : FLAT_Scratch_Load_Pseudo <"scratch_load_block", VReg_1024>;
992    defm SCRATCH_STORE_BLOCK : FLAT_Scratch_Store_Pseudo <"scratch_store_block", VReg_1024>;
993  }
994
995  let WaveSizePredicate = isWave32 in {
996    let Mnemonic = "global_load_tr_b128" in
997    defm GLOBAL_LOAD_TR_B128_w32  : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w32", VReg_128>;
998    let Mnemonic = "global_load_tr_b64" in
999    defm GLOBAL_LOAD_TR_B64_w32   : FLAT_Global_Load_Pseudo <"global_load_tr_b64_w32", VReg_64>;
1000  }
1001  let WaveSizePredicate = isWave64 in {
1002    let Mnemonic = "global_load_tr_b128" in
1003    defm GLOBAL_LOAD_TR_B128_w64  : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w64", VReg_64>;
1004    let Mnemonic = "global_load_tr_b64" in
1005    defm GLOBAL_LOAD_TR_B64_w64   : FLAT_Global_Load_Pseudo <"global_load_tr_b64_w64", VGPR_32>;
1006  }
1007} // End SubtargetPredicate = isGFX12Plus
1008
1009let SubtargetPredicate = isGFX10Plus in {
1010  defm GLOBAL_ATOMIC_FCMPSWAP :
1011    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>;
1012  defm GLOBAL_ATOMIC_FMIN :
1013    FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
1014  defm GLOBAL_ATOMIC_FMAX :
1015    FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
1016  defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
1017    FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>;
1018} // End SubtargetPredicate = isGFX10Plus
1019
1020let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
1021  defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
1022    "global_atomic_add_f32", VGPR_32, f32
1023  >;
1024let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in
1025  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
1026    "global_atomic_pk_add_f16", VGPR_32, v2f16
1027  >;
1028let OtherPredicates = [HasAtomicFaddRtnInsts] in
1029  defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
1030    "global_atomic_add_f32", VGPR_32, f32
1031  >;
1032let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
1033  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
1034    "global_atomic_pk_add_f16", VGPR_32, v2f16
1035  >;
1036
1037//===----------------------------------------------------------------------===//
1038// Flat Patterns
1039//===----------------------------------------------------------------------===//
1040
1041// Patterns for global loads with no offset.
1042class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1043  (vt (node (FlatOffset i64:$vaddr, i32:$offset))),
1044  (inst $vaddr, $offset)
1045>;
1046
1047class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1048  (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
1049  (inst $vaddr, $offset, 0, $in)
1050>;
1051
1052class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1053  (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
1054  (inst $vaddr, $offset, 0, $in)
1055>;
1056
1057class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1058  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)),
1059  (inst $saddr, $voffset, $offset, 0, $in)
1060>;
1061
1062class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1063  (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
1064  (inst $vaddr, $offset)
1065>;
1066
1067class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1068  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
1069  (inst $saddr, $voffset, $offset, 0)
1070>;
1071
1072class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1073                           ValueType vt> : GCNPat <
1074  (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)),
1075  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1076>;
1077
1078class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1079                            ValueType vt, ValueType data_vt = vt> : GCNPat <
1080  (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)),
1081  (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset)
1082>;
1083
1084class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1085                                 ValueType vt> : GCNPat <
1086  (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data),
1087  (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1088>;
1089
1090class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1091  (node vt:$data, (FlatOffset i64:$vaddr, i32:$offset)),
1092  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
1093>;
1094
1095class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1096  (node vt:$data, (GlobalOffset i64:$vaddr, i32:$offset)),
1097  (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
1098>;
1099
1100class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node,
1101                                ValueType vt, ValueType data_vt = vt> : GCNPat <
1102  // atomic store follows atomic binop convention so the address comes
1103  // first.
1104  (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data),
1105  (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
1106>;
1107
1108multiclass FlatAtomicNoRtnPatBase <string inst, string node, ValueType vt,
1109                                   ValueType data_vt = vt> {
1110
1111  defvar noRtnNode = !cast<PatFrags>(node);
1112
1113  let AddedComplexity = 1 in
1114  def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1115    (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1116}
1117
1118multiclass FlatAtomicNoRtnPatWithAddrSpace<string inst, string node, string addrSpaceSuffix,
1119                                           ValueType vt> :
1120  FlatAtomicNoRtnPatBase<inst, node # "_noret_" # addrSpaceSuffix, vt, vt>;
1121
1122multiclass FlatAtomicNoRtnPat <string inst, string node, ValueType vt,
1123                          ValueType data_vt = vt, bit isIntr = 0> :
1124  FlatAtomicNoRtnPatBase<inst, node # "_noret" # !if(isIntr, "", "_"#vt), vt, data_vt>;
1125
1126
1127multiclass FlatAtomicRtnPatBase <string inst, string node, ValueType vt,
1128                                 ValueType data_vt = vt> {
1129
1130  defvar rtnNode = !cast<SDPatternOperator>(node);
1131
1132  def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1133    (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
1134}
1135
1136multiclass FlatAtomicRtnPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1137                                         ValueType vt> :
1138  FlatAtomicRtnPatBase<inst, intr # "_" # addrSpaceSuffix, vt, vt>;
1139
1140multiclass FlatAtomicRtnPat <string inst, string node, ValueType vt,
1141                             ValueType data_vt = vt, bit isIntr = 0> :
1142  FlatAtomicRtnPatBase<inst, node # !if(isIntr, "", "_"#vt), vt, data_vt>;
1143
1144
1145multiclass FlatAtomicPat <string inst, string node, ValueType vt,
1146                          ValueType data_vt = vt, bit isIntr = 0> :
1147  FlatAtomicRtnPat<inst, node, vt, data_vt, isIntr>,
1148  FlatAtomicNoRtnPat<inst, node, vt, data_vt, isIntr>;
1149
1150multiclass FlatAtomicIntrNoRtnPat <string inst, string node, ValueType vt,
1151                                 ValueType data_vt = vt> {
1152  defm : FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1153}
1154
1155multiclass FlatAtomicIntrRtnPat <string inst, string node, ValueType vt,
1156                                ValueType data_vt = vt> {
1157  defm : FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1158}
1159
1160multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt,
1161                              ValueType data_vt = vt> :
1162  FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>,
1163  FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>;
1164
1165class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node,
1166                               ValueType vt, ValueType data_vt = vt> : GCNPat <
1167  (vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)),
1168  (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
1169>;
1170
1171multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt,
1172                                ValueType data_vt = vt, int complexity = 0,
1173                                bit isIntr = 0> {
1174  defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt));
1175  defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt));
1176
1177  let AddedComplexity = complexity in
1178  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1179
1180  let AddedComplexity = !add(complexity, 1) in
1181  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>;
1182}
1183
1184class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1185  (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
1186  (inst $vaddr, $offset)
1187>;
1188
1189class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1190  (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset), vt:$in),
1191  (inst $vaddr, $offset, 0, $in)
1192>;
1193
1194class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1195  (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)),
1196  (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
1197>;
1198
1199class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1200  (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
1201  (inst $saddr, $offset)
1202>;
1203
1204class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1205  (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
1206  (inst $saddr, $offset, 0, $in)
1207>;
1208
1209class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1210                            ValueType vt> : GCNPat <
1211  (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)),
1212  (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
1213>;
1214
1215class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1216  (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))),
1217  (inst $vaddr, $saddr, $offset, 0)
1218>;
1219
1220class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
1221                             ValueType vt> : GCNPat <
1222  (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)),
1223  (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset)
1224>;
1225
1226class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
1227  (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
1228  (inst $vaddr, $saddr, $offset, 0, $in)
1229>;
1230
1231multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1232  def : FlatLoadSignedPat <inst, node, vt> {
1233    let AddedComplexity = 10;
1234  }
1235
1236  def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1237    let AddedComplexity = 11;
1238  }
1239}
1240
1241multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1242  def : FlatSignedLoadPat_D16 <inst, node, vt> {
1243    let AddedComplexity = 10;
1244  }
1245
1246  def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1247    let AddedComplexity = 11;
1248  }
1249}
1250
1251multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
1252                               ValueType vt> {
1253  def : FlatStoreSignedPat <inst, node, vt> {
1254    let AddedComplexity = 10;
1255  }
1256
1257  def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1258    let AddedComplexity = 11;
1259  }
1260}
1261
1262multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt,
1263                                         ValueType data_vt = vt> {
1264  let AddedComplexity = 11 in
1265  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>;
1266
1267  let AddedComplexity = 13 in
1268  def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>;
1269}
1270
1271multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt,
1272                                       ValueType data_vt = vt, bit isPatFrags = 0> {
1273  defvar rtnNode = !if(isPatFrags, !cast<PatFrags>(node), !cast<SDPatternOperator>(node));
1274
1275  let AddedComplexity = 10 in
1276  def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>;
1277
1278  let AddedComplexity = 12 in
1279  def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>;
1280}
1281
1282multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt,
1283                                     ValueType data_vt = vt, bit isIntr = 0> :
1284  GlobalFLATAtomicPatsNoRtnBase<inst, node # "_noret" # !if(isIntr, "", "_" # vt), vt, data_vt>;
1285
1286multiclass GlobalFLATAtomicPatsRtn<string inst, string node, ValueType vt,
1287                                   ValueType data_vt = vt, bit isIntr = 0> :
1288  GlobalFLATAtomicPatsRtnBase<inst, node # !if(isIntr, "", "_" # vt), vt, data_vt>;
1289
1290multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt,
1291                                ValueType data_vt = vt, bit isIntr = 0> :
1292  GlobalFLATAtomicPatsNoRtn<inst, node, vt, data_vt, isIntr>,
1293  GlobalFLATAtomicPatsRtn<inst, node, vt, data_vt, isIntr>;
1294
1295multiclass GlobalFLATAtomicPatsNoRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1296                                                  ValueType vt, ValueType data_vt = vt> :
1297  GlobalFLATAtomicPatsNoRtnBase<inst, intr # "_noret_" # addrSpaceSuffix, vt, data_vt>;
1298
1299multiclass GlobalFLATAtomicPatsRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1300                                                ValueType vt, ValueType data_vt = vt> :
1301  GlobalFLATAtomicPatsRtnBase<inst, intr # "_" # addrSpaceSuffix, vt, data_vt, /*isPatFrags*/ 1>;
1302
1303multiclass GlobalFLATAtomicPatsWithAddrSpace<string inst, string intr, string addrSpaceSuffix,
1304                                             ValueType vt, ValueType data_vt = vt> :
1305  GlobalFLATAtomicPatsNoRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>,
1306  GlobalFLATAtomicPatsRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>;
1307
1308multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt,
1309                                    ValueType data_vt = vt> {
1310  defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>;
1311}
1312
1313multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1314  def : ScratchLoadSignedPat <inst, node, vt> {
1315    let AddedComplexity = 25;
1316  }
1317
1318  def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1319    let AddedComplexity = 26;
1320  }
1321
1322  def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1323    let SubtargetPredicate = HasFlatScratchSVSMode;
1324    let AddedComplexity = 27;
1325  }
1326}
1327
1328multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node,
1329                               ValueType vt> {
1330  def : ScratchStoreSignedPat <inst, node, vt> {
1331    let AddedComplexity = 25;
1332  }
1333
1334  def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1335    let AddedComplexity = 26;
1336  }
1337
1338  def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1339    let SubtargetPredicate = HasFlatScratchSVSMode;
1340    let AddedComplexity = 27;
1341  }
1342}
1343
1344multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> {
1345  def : ScratchLoadSignedPat_D16 <inst, node, vt> {
1346    let AddedComplexity = 25;
1347  }
1348
1349  def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> {
1350    let AddedComplexity = 26;
1351  }
1352
1353  def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> {
1354    let SubtargetPredicate = HasFlatScratchSVSMode;
1355    let AddedComplexity = 27;
1356  }
1357}
1358
1359let OtherPredicates = [HasFlatAddressSpace] in {
1360
1361def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
1362def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1363def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i32>;
1364def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i16>;
1365def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
1366def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1367def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_zext_16_flat, i32>;
1368def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_zext_16_flat, i16>;
1369def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
1370def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
1371def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1372def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i32>;
1373def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16>;
1374def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
1375def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
1376def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
1377def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>;
1378def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>;
1379def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>;
1380def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>;
1381def : FlatLoadPat <FLAT_LOAD_SSHORT, atomic_load_sext_16_flat, i32>;
1382def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>;
1383
1384def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>;
1385def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>;
1386
1387def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>;
1388def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>;
1389
1390foreach vt = Reg32Types.types in {
1391def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>;
1392def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>;
1393}
1394
1395foreach vt = VReg_64.RegTypes in {
1396def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>;
1397def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>;
1398}
1399
1400def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>;
1401
1402foreach vt = VReg_128.RegTypes in {
1403def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>;
1404def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>;
1405}
1406
1407def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>;
1408def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>;
1409def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>;
1410def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>;
1411def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>;
1412def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
1413
1414foreach as = [ "flat", "global" ] in {
1415defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
1416defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
1417defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
1418defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
1419defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
1420defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
1421defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
1422defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>;
1423defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>;
1424defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>;
1425defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>;
1426defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>;
1427defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
1428
1429defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
1430defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
1431defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
1432defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
1433defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
1434defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
1435defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
1436defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>;
1437defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>;
1438defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>;
1439defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>;
1440defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>;
1441defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>;
1442
1443let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in {
1444defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_"#as, f32>;
1445defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_"#as, f32>;
1446}
1447
1448let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in {
1449defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_"#as, f64>;
1450defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>;
1451}
1452
1453} // end foreach as
1454
1455let SubtargetPredicate = isGFX12Plus in {
1456  defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >;
1457
1458  let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1459    defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>;
1460}
1461
1462def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>;
1463def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>;
1464
1465let OtherPredicates = [HasD16LoadStore] in {
1466def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>;
1467def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
1468}
1469
1470let OtherPredicates = [D16PreservesUnusedBits] in {
1471// TODO: Handle atomic loads
1472def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
1473def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
1474def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
1475def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>;
1476def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>;
1477def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>;
1478
1479def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>;
1480def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>;
1481def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>;
1482def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>;
1483def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>;
1484def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>;
1485}
1486
1487} // End OtherPredicates = [HasFlatAddressSpace]
1488
1489let OtherPredicates = [HasFlatGlobalInsts] in {
1490
1491defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>;
1492defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>;
1493defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i32>;
1494defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i16>;
1495defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>;
1496defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>;
1497defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i32>;
1498defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16>;
1499defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i32>;
1500defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i16>;
1501defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
1502defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
1503defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
1504defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>;
1505defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>;
1506defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
1507defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
1508defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
1509defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
1510defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, atomic_load_sext_16_global, i32>;
1511defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i32>;
1512defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>;
1513
1514foreach vt = Reg32Types.types in {
1515defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>;
1516defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>;
1517}
1518
1519foreach vt = VReg_64.RegTypes in {
1520defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>;
1521defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>;
1522}
1523
1524defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>;
1525
1526foreach vt = VReg_128.RegTypes in {
1527defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>;
1528defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>;
1529}
1530
1531// There is no distinction for atomic load lowering during selection;
1532// the memory legalizer will set the cache bits and insert the
1533// appropriate waits.
1534defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>;
1535defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>;
1536
1537defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>;
1538defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>;
1539defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>;
1540defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>;
1541defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>;
1542
1543let OtherPredicates = [HasD16LoadStore] in {
1544defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>;
1545defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>;
1546}
1547
1548let OtherPredicates = [D16PreservesUnusedBits] in {
1549// TODO: Handle atomic loads
1550defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
1551defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
1552defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;
1553defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>;
1554defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>;
1555defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>;
1556
1557defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>;
1558defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>;
1559defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>;
1560defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>;
1561defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>;
1562defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>;
1563}
1564
1565defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>;
1566defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>;
1567defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>;
1568defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>;
1569defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>;
1570defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>;
1571
1572defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>;
1573defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>;
1574defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>;
1575defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>;
1576defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>;
1577defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>;
1578defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>;
1579defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>;
1580defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>;
1581defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>;
1582defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>;
1583defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>;
1584defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>;
1585defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
1586
1587let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1588defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
1589
1590defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
1591defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>;
1592defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>;
1593defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>;
1594defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>;
1595defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>;
1596defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>;
1597defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>;
1598defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>;
1599defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>;
1600defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>;
1601defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>;
1602defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>;
1603
1604let SubtargetPredicate = isGFX12Plus in {
1605  defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>;
1606
1607  let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1608    defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace  <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace",  i32>;
1609}
1610
1611let OtherPredicates = [isGFX12Plus] in {
1612  defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>;
1613
1614  let WaveSizePredicate = isWave32 in {
1615    defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w32, int_amdgcn_global_load_tr_b64, v2i32>;
1616    foreach vt = [v8i16, v8f16, v8bf16] in
1617      defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr_b128, vt>;
1618  }
1619  let WaveSizePredicate = isWave64 in {
1620    defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w64, int_amdgcn_global_load_tr_b64, i32>;
1621    foreach vt = [v4i16, v4f16, v4bf16] in
1622      defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr_b128, vt>;
1623  }
1624}
1625
1626let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
1627defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
1628defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
1629}
1630
1631let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in {
1632defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
1633defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
1634}
1635
1636let OtherPredicates = [isGFX12Only] in {
1637  // FIXME: Remove these intrinsics
1638  defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>;
1639  defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>;
1640  defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>;
1641  defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>;
1642}
1643
1644let OtherPredicates = [HasAtomicFaddNoRtnInsts] in {
1645defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
1646}
1647
1648let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
1649defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>;
1650}
1651
1652let OtherPredicates = [HasAtomicFaddRtnInsts] in {
1653defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>;
1654}
1655
1656let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
1657defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>;
1658}
1659
1660let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in {
1661defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
1662defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
1663}
1664
1665let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in {
1666defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
1667defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
1668}
1669
1670let OtherPredicates = [HasFlatAtomicFaddF32Inst] in {
1671defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>;
1672}
1673
1674let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in {
1675defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_flat", v2f16>;
1676defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_flat", v2bf16>;
1677}
1678
1679let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in
1680defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_global", v2bf16>;
1681} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
1682
1683let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in {
1684
1685defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>;
1686defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>;
1687defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>;
1688defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>;
1689defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>;
1690defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>;
1691defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>;
1692defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>;
1693defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>;
1694defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>;
1695
1696foreach vt = Reg32Types.types in {
1697defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>;
1698defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>;
1699}
1700
1701foreach vt = VReg_64.RegTypes in {
1702defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>;
1703defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>;
1704}
1705
1706defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>;
1707
1708foreach vt = VReg_128.RegTypes in {
1709defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>;
1710defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>;
1711}
1712
1713defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>;
1714defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>;
1715defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>;
1716defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>;
1717defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>;
1718
1719let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in {
1720defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>;
1721defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>;
1722}
1723
1724let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in {
1725defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>;
1726defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>;
1727defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>;
1728defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>;
1729defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>;
1730defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>;
1731
1732defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>;
1733defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>;
1734defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>;
1735defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>;
1736defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>;
1737defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>;
1738}
1739
1740} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch]
1741
1742//===----------------------------------------------------------------------===//
1743// Target
1744//===----------------------------------------------------------------------===//
1745
1746//===----------------------------------------------------------------------===//
1747// CI
1748//===----------------------------------------------------------------------===//
1749
1750class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps, string asmName = ps.Mnemonic> :
1751  FLAT_Real <op, ps, asmName>,
1752  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
1753  let AssemblerPredicate = isGFX7Only;
1754  let DecoderNamespace="GFX7";
1755}
1756
1757def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
1758def FLAT_LOAD_SBYTE_ci         : FLAT_Real_ci <0x9,  FLAT_LOAD_SBYTE>;
1759def FLAT_LOAD_USHORT_ci        : FLAT_Real_ci <0xa,  FLAT_LOAD_USHORT>;
1760def FLAT_LOAD_SSHORT_ci        : FLAT_Real_ci <0xb,  FLAT_LOAD_SSHORT>;
1761def FLAT_LOAD_DWORD_ci         : FLAT_Real_ci <0xc,  FLAT_LOAD_DWORD>;
1762def FLAT_LOAD_DWORDX2_ci       : FLAT_Real_ci <0xd,  FLAT_LOAD_DWORDX2>;
1763def FLAT_LOAD_DWORDX4_ci       : FLAT_Real_ci <0xe,  FLAT_LOAD_DWORDX4>;
1764def FLAT_LOAD_DWORDX3_ci       : FLAT_Real_ci <0xf,  FLAT_LOAD_DWORDX3>;
1765
1766def FLAT_STORE_BYTE_ci         : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
1767def FLAT_STORE_SHORT_ci        : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
1768def FLAT_STORE_DWORD_ci        : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
1769def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
1770def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
1771def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
1772
1773multiclass FLAT_Real_Atomics_ci <bits<7> op, string opName = NAME,
1774                                 string asmName = !cast<FLAT_Pseudo>(opName).Mnemonic> {
1775  defvar ps = !cast<FLAT_Pseudo>(opName);
1776  defvar ps_rtn = !cast<FLAT_Pseudo>(opName#"_RTN");
1777
1778  def _ci     : FLAT_Real_ci<op, ps, asmName>;
1779  def _RTN_ci : FLAT_Real_ci<op, ps_rtn, asmName>;
1780}
1781
1782defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30>;
1783defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31>;
1784defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32>;
1785defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33>;
1786defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35>;
1787defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36>;
1788defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37>;
1789defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38>;
1790defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39>;
1791defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a>;
1792defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b>;
1793defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c>;
1794defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d>;
1795defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50>;
1796defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51>;
1797defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52>;
1798defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53>;
1799defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55>;
1800defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56>;
1801defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57>;
1802defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58>;
1803defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59>;
1804defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a>;
1805defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b>;
1806defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c>;
1807defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d>;
1808
1809// CI Only flat instructions
1810defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e>;
1811defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f>;
1812defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40>;
1813defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e>;
1814defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, "FLAT_ATOMIC_MIN_F64", "flat_atomic_fmin_x2">;
1815defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, "FLAT_ATOMIC_MAX_F64", "flat_atomic_fmax_x2">;
1816
1817
1818//===----------------------------------------------------------------------===//
1819// VI
1820//===----------------------------------------------------------------------===//
1821
1822class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
1823  FLAT_Real <op, ps>,
1824  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1825  let AssemblerPredicate = isGFX8GFX9;
1826  let DecoderNamespace = "GFX8";
1827
1828  let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
1829  let AsmString = ps.Mnemonic #
1830                  !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
1831}
1832
1833multiclass FLAT_Real_AllAddr_vi<bits<7> op,
1834  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1835  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>;
1836  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
1837}
1838
1839class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
1840  FLAT_Real <op, ps>,
1841  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
1842  let AssemblerPredicate = isGFX940Plus;
1843  let DecoderNamespace = "GFX9";
1844  let Inst{13} = ps.sve;
1845  let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
1846}
1847
1848multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
1849  def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
1850    let AssemblerPredicate = isGFX8GFX9NotGFX940;
1851    let OtherPredicates = [isGFX8GFX9NotGFX940];
1852  }
1853  def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
1854    let DecoderNamespace = "GFX9";
1855  }
1856  let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in {
1857    def _VE_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
1858    def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
1859    def _ST_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
1860  }
1861}
1862
1863multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
1864  string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic),
1865  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1866
1867  let OtherPredicates = [isGFX8GFX9NotGFX940] in {
1868    def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
1869      let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
1870    }
1871    def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
1872      let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
1873    }
1874  }
1875
1876  let SubtargetPredicate = isGFX940Plus in {
1877    def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
1878    def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
1879  }
1880}
1881
1882multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
1883  defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>;
1884  let SubtargetPredicate = isGFX940Plus in {
1885    def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
1886    def _ST_gfx940  : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
1887  }
1888}
1889
1890def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
1891def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
1892def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
1893def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
1894def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
1895def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
1896def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
1897def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
1898
1899def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
1900def FLAT_STORE_BYTE_D16_HI_vi  : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>;
1901def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
1902def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>;
1903def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
1904def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
1905def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
1906def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
1907
1908def FLAT_LOAD_UBYTE_D16_vi    : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>;
1909def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>;
1910def FLAT_LOAD_SBYTE_D16_vi    : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>;
1911def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
1912def FLAT_LOAD_SHORT_D16_vi    : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
1913def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;
1914
1915multiclass FLAT_Real_Atomics_vi <bits<7> op,
1916  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
1917  defvar ps = !cast<FLAT_Pseudo>(NAME);
1918  def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
1919  def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
1920}
1921
1922multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
1923  bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> :
1924  FLAT_Real_AllAddr_vi<op, has_sccb> {
1925  def _RTN_vi  : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>;
1926  def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>;
1927}
1928
1929
1930defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40>;
1931defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41>;
1932defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42>;
1933defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43>;
1934defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44>;
1935defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45>;
1936defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46>;
1937defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47>;
1938defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48>;
1939defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49>;
1940defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a>;
1941defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b>;
1942defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c>;
1943defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60>;
1944defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61>;
1945defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62>;
1946defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63>;
1947defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64>;
1948defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65>;
1949defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66>;
1950defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67>;
1951defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68>;
1952defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69>;
1953defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a>;
1954defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b>;
1955defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c>;
1956
1957defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
1958defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
1959defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>;
1960defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>;
1961defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>;
1962defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>;
1963defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>;
1964defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>;
1965
1966defm GLOBAL_LOAD_UBYTE_D16    : FLAT_Real_AllAddr_vi <0x20>;
1967defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>;
1968defm GLOBAL_LOAD_SBYTE_D16    : FLAT_Real_AllAddr_vi <0x22>;
1969defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>;
1970defm GLOBAL_LOAD_SHORT_D16    : FLAT_Real_AllAddr_vi <0x24>;
1971defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>;
1972
1973defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>;
1974defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>;
1975defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>;
1976defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>;
1977defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>;
1978defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>;
1979defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>;
1980defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>;
1981
1982defm GLOBAL_LOAD_LDS_UBYTE  : FLAT_Real_AllAddr_LDS <0x026, 0x10>;
1983defm GLOBAL_LOAD_LDS_SBYTE  : FLAT_Real_AllAddr_LDS <0x027, 0x11>;
1984defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>;
1985defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>;
1986defm GLOBAL_LOAD_LDS_DWORD  : FLAT_Real_AllAddr_LDS <0x02a, 0x14>;
1987
1988defm GLOBAL_LOAD_LDS_DWORDX3 : FLAT_Real_AllAddr_LDS <0x07e, 0x07e>;
1989defm GLOBAL_LOAD_LDS_DWORDX4 : FLAT_Real_AllAddr_LDS <0x07d, 0x07d>;
1990
1991
1992defm GLOBAL_ATOMIC_SWAP       : FLAT_Global_Real_Atomics_vi <0x40>;
1993defm GLOBAL_ATOMIC_CMPSWAP    : FLAT_Global_Real_Atomics_vi <0x41>;
1994defm GLOBAL_ATOMIC_ADD        : FLAT_Global_Real_Atomics_vi <0x42>;
1995defm GLOBAL_ATOMIC_SUB        : FLAT_Global_Real_Atomics_vi <0x43>;
1996defm GLOBAL_ATOMIC_SMIN       : FLAT_Global_Real_Atomics_vi <0x44>;
1997defm GLOBAL_ATOMIC_UMIN       : FLAT_Global_Real_Atomics_vi <0x45>;
1998defm GLOBAL_ATOMIC_SMAX       : FLAT_Global_Real_Atomics_vi <0x46>;
1999defm GLOBAL_ATOMIC_UMAX       : FLAT_Global_Real_Atomics_vi <0x47>;
2000defm GLOBAL_ATOMIC_AND        : FLAT_Global_Real_Atomics_vi <0x48>;
2001defm GLOBAL_ATOMIC_OR         : FLAT_Global_Real_Atomics_vi <0x49>;
2002defm GLOBAL_ATOMIC_XOR        : FLAT_Global_Real_Atomics_vi <0x4a>;
2003defm GLOBAL_ATOMIC_INC        : FLAT_Global_Real_Atomics_vi <0x4b>;
2004defm GLOBAL_ATOMIC_DEC        : FLAT_Global_Real_Atomics_vi <0x4c>;
2005defm GLOBAL_ATOMIC_SWAP_X2    : FLAT_Global_Real_Atomics_vi <0x60>;
2006defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>;
2007defm GLOBAL_ATOMIC_ADD_X2     : FLAT_Global_Real_Atomics_vi <0x62>;
2008defm GLOBAL_ATOMIC_SUB_X2     : FLAT_Global_Real_Atomics_vi <0x63>;
2009defm GLOBAL_ATOMIC_SMIN_X2    : FLAT_Global_Real_Atomics_vi <0x64>;
2010defm GLOBAL_ATOMIC_UMIN_X2    : FLAT_Global_Real_Atomics_vi <0x65>;
2011defm GLOBAL_ATOMIC_SMAX_X2    : FLAT_Global_Real_Atomics_vi <0x66>;
2012defm GLOBAL_ATOMIC_UMAX_X2    : FLAT_Global_Real_Atomics_vi <0x67>;
2013defm GLOBAL_ATOMIC_AND_X2     : FLAT_Global_Real_Atomics_vi <0x68>;
2014defm GLOBAL_ATOMIC_OR_X2      : FLAT_Global_Real_Atomics_vi <0x69>;
2015defm GLOBAL_ATOMIC_XOR_X2     : FLAT_Global_Real_Atomics_vi <0x6a>;
2016defm GLOBAL_ATOMIC_INC_X2     : FLAT_Global_Real_Atomics_vi <0x6b>;
2017defm GLOBAL_ATOMIC_DEC_X2     : FLAT_Global_Real_Atomics_vi <0x6c>;
2018
2019defm SCRATCH_LOAD_LDS_UBYTE  : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>;
2020defm SCRATCH_LOAD_LDS_SBYTE  : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>;
2021defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>;
2022defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>;
2023defm SCRATCH_LOAD_LDS_DWORD  : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>;
2024
2025defm SCRATCH_LOAD_UBYTE         : FLAT_Real_AllAddr_SVE_vi <0x10>;
2026defm SCRATCH_LOAD_SBYTE         : FLAT_Real_AllAddr_SVE_vi <0x11>;
2027defm SCRATCH_LOAD_USHORT        : FLAT_Real_AllAddr_SVE_vi <0x12>;
2028defm SCRATCH_LOAD_SSHORT        : FLAT_Real_AllAddr_SVE_vi <0x13>;
2029defm SCRATCH_LOAD_DWORD         : FLAT_Real_AllAddr_SVE_vi <0x14>;
2030defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_AllAddr_SVE_vi <0x15>;
2031defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_AllAddr_SVE_vi <0x16>;
2032defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_AllAddr_SVE_vi <0x17>;
2033defm SCRATCH_STORE_BYTE         : FLAT_Real_AllAddr_SVE_vi <0x18>;
2034defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x19>;
2035defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_AllAddr_SVE_vi <0x20>;
2036defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x21>;
2037defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_AllAddr_SVE_vi <0x22>;
2038defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x23>;
2039defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_AllAddr_SVE_vi <0x24>;
2040defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_AllAddr_SVE_vi <0x25>;
2041defm SCRATCH_STORE_SHORT        : FLAT_Real_AllAddr_SVE_vi <0x1a>;
2042defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>;
2043defm SCRATCH_STORE_DWORD        : FLAT_Real_AllAddr_SVE_vi <0x1c>;
2044defm SCRATCH_STORE_DWORDX2      : FLAT_Real_AllAddr_SVE_vi <0x1d>;
2045defm SCRATCH_STORE_DWORDX3      : FLAT_Real_AllAddr_SVE_vi <0x1e>;
2046defm SCRATCH_STORE_DWORDX4      : FLAT_Real_AllAddr_SVE_vi <0x1f>;
2047
2048let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
2049  // These instructions are encoded differently on gfx90* and gfx940.
2050  defm GLOBAL_ATOMIC_ADD_F32    : FLAT_Global_Real_Atomics_vi <0x04d, 0>;
2051  defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
2052}
2053
2054let SubtargetPredicate = isGFX90AOnly in {
2055  defm FLAT_ATOMIC_ADD_F64   : FLAT_Real_Atomics_vi<0x4f, 0>;
2056  defm FLAT_ATOMIC_MIN_F64   : FLAT_Real_Atomics_vi<0x50, 0>;
2057  defm FLAT_ATOMIC_MAX_F64   : FLAT_Real_Atomics_vi<0x51, 0>;
2058  defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>;
2059  defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>;
2060  defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
2061} // End SubtargetPredicate = isGFX90AOnly
2062
2063multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> {
2064  def _gfx940       : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
2065  def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
2066}
2067
2068multiclass FLAT_Real_Atomics_gfx940 <bits<7> op> {
2069  defvar ps = !cast<FLAT_Pseudo>(NAME);
2070  def _gfx940     : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
2071  def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
2072}
2073
2074multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> :
2075  FLAT_Real_AllAddr_gfx940<op> {
2076  def _RTN_gfx940       : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
2077  def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
2078}
2079
2080let SubtargetPredicate = isGFX940Plus in {
2081  // These instructions are encoded differently on gfx90* and gfx940.
2082  defm GLOBAL_ATOMIC_ADD_F32     : FLAT_Global_Real_Atomics_gfx940 <0x04d>;
2083  defm GLOBAL_ATOMIC_PK_ADD_F16  : FLAT_Global_Real_Atomics_gfx940 <0x04e>;
2084
2085  defm FLAT_ATOMIC_ADD_F64       : FLAT_Real_Atomics_gfx940<0x4f>;
2086  defm FLAT_ATOMIC_MIN_F64       : FLAT_Real_Atomics_gfx940<0x50>;
2087  defm FLAT_ATOMIC_MAX_F64       : FLAT_Real_Atomics_gfx940<0x51>;
2088  defm GLOBAL_ATOMIC_ADD_F64     : FLAT_Global_Real_Atomics_gfx940<0x4f>;
2089  defm GLOBAL_ATOMIC_MIN_F64     : FLAT_Global_Real_Atomics_gfx940<0x50>;
2090  defm GLOBAL_ATOMIC_MAX_F64     : FLAT_Global_Real_Atomics_gfx940<0x51>;
2091  defm FLAT_ATOMIC_ADD_F32       : FLAT_Real_Atomics_vi<0x4d>;
2092  defm FLAT_ATOMIC_PK_ADD_F16    : FLAT_Real_Atomics_vi<0x4e>;
2093  defm FLAT_ATOMIC_PK_ADD_BF16   : FLAT_Real_Atomics_vi<0x52>;
2094  defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
2095} // End SubtargetPredicate = isGFX940Plus
2096
2097//===----------------------------------------------------------------------===//
2098// GFX10.
2099//===----------------------------------------------------------------------===//
2100
2101class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
2102    FLAT_Real<op, ps, opName>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> {
2103  let AssemblerPredicate = isGFX10Only;
2104  let DecoderNamespace = "GFX10";
2105
2106  let Inst{11-0}  = offset{11-0};
2107  let Inst{12}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
2108  let Inst{54-48} = !cond(ps.enabled_saddr : saddr,
2109                          !and(ps.is_flat_scratch, !not(ps.has_vaddr)) : EXEC_HI.Index{6-0}, // ST mode
2110                          true : SGPR_NULL_gfxpre11.Index{6-0});
2111  let Inst{55}    = 0;
2112}
2113
2114multiclass FLAT_Real_Base_gfx10<bits<7> op, string psName = NAME,
2115                                string asmName = !cast<FLAT_Pseudo>(psName).Mnemonic> {
2116  def _gfx10 :
2117    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName), asmName>;
2118}
2119
2120multiclass FLAT_Real_RTN_gfx10<bits<7> op, string psName = NAME,
2121                               string asmName = !cast<FLAT_Pseudo>(psName).Mnemonic> {
2122  def _RTN_gfx10 :
2123    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_RTN"), asmName>;
2124}
2125
2126multiclass FLAT_Real_SADDR_gfx10<bits<7> op, string psName = NAME,
2127                                 string asmName = !cast<FLAT_Pseudo>(psName#"_SADDR").Mnemonic> {
2128  def _SADDR_gfx10 :
2129    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_SADDR"), asmName>;
2130}
2131
2132multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op, string psName = NAME,
2133                                     string asmName = !cast<FLAT_Pseudo>(psName#"_SADDR_RTN").Mnemonic> {
2134  def _SADDR_RTN_gfx10 :
2135    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_SADDR_RTN"), asmName>;
2136}
2137
2138multiclass FLAT_Real_ST_gfx10<bits<7> op> {
2139  def _ST_gfx10 :
2140    FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
2141}
2142
2143multiclass FLAT_Real_AllAddr_gfx10<bits<7> op, string OpName = NAME,
2144                                   string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> :
2145  FLAT_Real_Base_gfx10<op, OpName, asmName>,
2146  FLAT_Real_SADDR_gfx10<op, OpName, asmName>;
2147
2148multiclass FLAT_Real_Atomics_gfx10<bits<7> op, string OpName = NAME,
2149                                   string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> :
2150  FLAT_Real_Base_gfx10<op, OpName, asmName>,
2151  FLAT_Real_RTN_gfx10<op, OpName, asmName>;
2152
2153multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op, string OpName = NAME,
2154                                       string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> :
2155  FLAT_Real_AllAddr_gfx10<op, OpName, asmName>,
2156  FLAT_Real_RTN_gfx10<op, OpName, asmName>,
2157  FLAT_Real_SADDR_RTN_gfx10<op, OpName, asmName>;
2158
2159multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op, string OpName = NAME> :
2160  FLAT_Real_RTN_gfx10<op, OpName>,
2161  FLAT_Real_SADDR_RTN_gfx10<op, OpName>;
2162
2163multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> :
2164  FLAT_Real_Base_gfx10<op>,
2165  FLAT_Real_SADDR_gfx10<op>,
2166  FLAT_Real_ST_gfx10<op>;
2167
2168multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op,
2169  string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic)> {
2170  let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in
2171  defm "" : FLAT_Real_Base_gfx10<op>;
2172
2173  let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in
2174  defm "" : FLAT_Real_SADDR_gfx10<op>;
2175}
2176
2177multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op,
2178  string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic)> {
2179  defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>;
2180
2181  let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in
2182  defm "" : FLAT_Real_ST_gfx10<op>;
2183}
2184
2185// ENC_FLAT.
2186defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx10<0x008>;
2187defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx10<0x009>;
2188defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx10<0x00a>;
2189defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx10<0x00b>;
2190defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx10<0x00c>;
2191defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx10<0x00d>;
2192defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx10<0x00e>;
2193defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx10<0x00f>;
2194defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx10<0x018>;
2195defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx10<0x019>;
2196defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx10<0x01a>;
2197defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx10<0x01b>;
2198defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx10<0x01c>;
2199defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx10<0x01d>;
2200defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx10<0x01e>;
2201defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx10<0x01f>;
2202defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx10<0x020>;
2203defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x021>;
2204defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx10<0x022>;
2205defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx10<0x023>;
2206defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx10<0x024>;
2207defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx10<0x025>;
2208defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx10<0x030>;
2209defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx10<0x031>;
2210defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx10<0x032>;
2211defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx10<0x033>;
2212defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx10<0x035>;
2213defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx10<0x036>;
2214defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx10<0x037>;
2215defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx10<0x038>;
2216defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx10<0x039>;
2217defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx10<0x03a>;
2218defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx10<0x03b>;
2219defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx10<0x03c>;
2220defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx10<0x03d>;
2221defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx10<0x03e>;
2222defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx10<0x03f>;
2223defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx10<0x040>;
2224defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx10<0x050>;
2225defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx10<0x051>;
2226defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx10<0x052>;
2227defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx10<0x053>;
2228defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx10<0x055>;
2229defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx10<0x056>;
2230defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx10<0x057>;
2231defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx10<0x058>;
2232defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx10<0x059>;
2233defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx10<0x05a>;
2234defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx10<0x05b>;
2235defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx10<0x05c>;
2236defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx10<0x05d>;
2237defm FLAT_ATOMIC_FCMPSWAP_X2    : FLAT_Real_Atomics_gfx10<0x05e>;
2238defm FLAT_ATOMIC_FMIN_X2        : FLAT_Real_Atomics_gfx10<0x05f, "FLAT_ATOMIC_MIN_F64", "flat_atomic_fmin_x2">;
2239defm FLAT_ATOMIC_FMAX_X2        : FLAT_Real_Atomics_gfx10<0x060, "FLAT_ATOMIC_MAX_F64", "flat_atomic_fmax_x2">;
2240
2241
2242// ENC_FLAT_GLBL.
2243defm GLOBAL_LOAD_UBYTE          : FLAT_Real_AllAddr_gfx10<0x008>;
2244defm GLOBAL_LOAD_SBYTE          : FLAT_Real_AllAddr_gfx10<0x009>;
2245defm GLOBAL_LOAD_USHORT         : FLAT_Real_AllAddr_gfx10<0x00a>;
2246defm GLOBAL_LOAD_SSHORT         : FLAT_Real_AllAddr_gfx10<0x00b>;
2247defm GLOBAL_LOAD_DWORD          : FLAT_Real_AllAddr_gfx10<0x00c>;
2248defm GLOBAL_LOAD_DWORDX2        : FLAT_Real_AllAddr_gfx10<0x00d>;
2249defm GLOBAL_LOAD_DWORDX4        : FLAT_Real_AllAddr_gfx10<0x00e>;
2250defm GLOBAL_LOAD_DWORDX3        : FLAT_Real_AllAddr_gfx10<0x00f>;
2251defm GLOBAL_STORE_BYTE          : FLAT_Real_AllAddr_gfx10<0x018>;
2252defm GLOBAL_STORE_BYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x019>;
2253defm GLOBAL_STORE_SHORT         : FLAT_Real_AllAddr_gfx10<0x01a>;
2254defm GLOBAL_STORE_SHORT_D16_HI  : FLAT_Real_AllAddr_gfx10<0x01b>;
2255defm GLOBAL_STORE_DWORD         : FLAT_Real_AllAddr_gfx10<0x01c>;
2256defm GLOBAL_STORE_DWORDX2       : FLAT_Real_AllAddr_gfx10<0x01d>;
2257defm GLOBAL_STORE_DWORDX4       : FLAT_Real_AllAddr_gfx10<0x01e>;
2258defm GLOBAL_STORE_DWORDX3       : FLAT_Real_AllAddr_gfx10<0x01f>;
2259defm GLOBAL_LOAD_UBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x020>;
2260defm GLOBAL_LOAD_UBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x021>;
2261defm GLOBAL_LOAD_SBYTE_D16      : FLAT_Real_AllAddr_gfx10<0x022>;
2262defm GLOBAL_LOAD_SBYTE_D16_HI   : FLAT_Real_AllAddr_gfx10<0x023>;
2263defm GLOBAL_LOAD_SHORT_D16      : FLAT_Real_AllAddr_gfx10<0x024>;
2264defm GLOBAL_LOAD_SHORT_D16_HI   : FLAT_Real_AllAddr_gfx10<0x025>;
2265defm GLOBAL_ATOMIC_SWAP         : FLAT_Real_GlblAtomics_gfx10<0x030>;
2266defm GLOBAL_ATOMIC_CMPSWAP      : FLAT_Real_GlblAtomics_gfx10<0x031>;
2267defm GLOBAL_ATOMIC_ADD          : FLAT_Real_GlblAtomics_gfx10<0x032>;
2268defm GLOBAL_ATOMIC_SUB          : FLAT_Real_GlblAtomics_gfx10<0x033>;
2269defm GLOBAL_ATOMIC_CSUB         : FLAT_Real_GlblAtomics_gfx10<0x034>;
2270defm GLOBAL_ATOMIC_SMIN         : FLAT_Real_GlblAtomics_gfx10<0x035>;
2271defm GLOBAL_ATOMIC_UMIN         : FLAT_Real_GlblAtomics_gfx10<0x036>;
2272defm GLOBAL_ATOMIC_SMAX         : FLAT_Real_GlblAtomics_gfx10<0x037>;
2273defm GLOBAL_ATOMIC_UMAX         : FLAT_Real_GlblAtomics_gfx10<0x038>;
2274defm GLOBAL_ATOMIC_AND          : FLAT_Real_GlblAtomics_gfx10<0x039>;
2275defm GLOBAL_ATOMIC_OR           : FLAT_Real_GlblAtomics_gfx10<0x03a>;
2276defm GLOBAL_ATOMIC_XOR          : FLAT_Real_GlblAtomics_gfx10<0x03b>;
2277defm GLOBAL_ATOMIC_INC          : FLAT_Real_GlblAtomics_gfx10<0x03c>;
2278defm GLOBAL_ATOMIC_DEC          : FLAT_Real_GlblAtomics_gfx10<0x03d>;
2279defm GLOBAL_ATOMIC_FCMPSWAP     : FLAT_Real_GlblAtomics_gfx10<0x03e>;
2280defm GLOBAL_ATOMIC_FMIN         : FLAT_Real_GlblAtomics_gfx10<0x03f>;
2281defm GLOBAL_ATOMIC_FMAX         : FLAT_Real_GlblAtomics_gfx10<0x040>;
2282defm GLOBAL_ATOMIC_SWAP_X2      : FLAT_Real_GlblAtomics_gfx10<0x050>;
2283defm GLOBAL_ATOMIC_CMPSWAP_X2   : FLAT_Real_GlblAtomics_gfx10<0x051>;
2284defm GLOBAL_ATOMIC_ADD_X2       : FLAT_Real_GlblAtomics_gfx10<0x052>;
2285defm GLOBAL_ATOMIC_SUB_X2       : FLAT_Real_GlblAtomics_gfx10<0x053>;
2286defm GLOBAL_ATOMIC_SMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x055>;
2287defm GLOBAL_ATOMIC_UMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x056>;
2288defm GLOBAL_ATOMIC_SMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x057>;
2289defm GLOBAL_ATOMIC_UMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x058>;
2290defm GLOBAL_ATOMIC_AND_X2       : FLAT_Real_GlblAtomics_gfx10<0x059>;
2291defm GLOBAL_ATOMIC_OR_X2        : FLAT_Real_GlblAtomics_gfx10<0x05a>;
2292defm GLOBAL_ATOMIC_XOR_X2       : FLAT_Real_GlblAtomics_gfx10<0x05b>;
2293defm GLOBAL_ATOMIC_INC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05c>;
2294defm GLOBAL_ATOMIC_DEC_X2       : FLAT_Real_GlblAtomics_gfx10<0x05d>;
2295defm GLOBAL_ATOMIC_FCMPSWAP_X2  : FLAT_Real_GlblAtomics_gfx10<0x05e>;
2296defm GLOBAL_ATOMIC_FMIN_X2      : FLAT_Real_GlblAtomics_gfx10<0x05f, "GLOBAL_ATOMIC_MIN_F64", "global_atomic_fmin_x2">;
2297defm GLOBAL_ATOMIC_FMAX_X2      : FLAT_Real_GlblAtomics_gfx10<0x060, "GLOBAL_ATOMIC_MAX_F64", "global_atomic_fmax_x2">;
2298defm GLOBAL_LOAD_DWORD_ADDTID   : FLAT_Real_AllAddr_gfx10<0x016>;
2299defm GLOBAL_STORE_DWORD_ADDTID  : FLAT_Real_AllAddr_gfx10<0x017>;
2300
2301defm GLOBAL_LOAD_LDS_UBYTE      : FLAT_Real_AllAddr_LDS_gfx10 <0x008>;
2302defm GLOBAL_LOAD_LDS_SBYTE      : FLAT_Real_AllAddr_LDS_gfx10 <0x009>;
2303defm GLOBAL_LOAD_LDS_USHORT     : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>;
2304defm GLOBAL_LOAD_LDS_SSHORT     : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>;
2305defm GLOBAL_LOAD_LDS_DWORD      : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>;
2306
2307// ENC_FLAT_SCRATCH.
2308defm SCRATCH_LOAD_UBYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x008>;
2309defm SCRATCH_LOAD_SBYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x009>;
2310defm SCRATCH_LOAD_USHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x00a>;
2311defm SCRATCH_LOAD_SSHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x00b>;
2312defm SCRATCH_LOAD_DWORD         : FLAT_Real_ScratchAllAddr_gfx10<0x00c>;
2313defm SCRATCH_LOAD_DWORDX2       : FLAT_Real_ScratchAllAddr_gfx10<0x00d>;
2314defm SCRATCH_LOAD_DWORDX4       : FLAT_Real_ScratchAllAddr_gfx10<0x00e>;
2315defm SCRATCH_LOAD_DWORDX3       : FLAT_Real_ScratchAllAddr_gfx10<0x00f>;
2316defm SCRATCH_STORE_BYTE         : FLAT_Real_ScratchAllAddr_gfx10<0x018>;
2317defm SCRATCH_STORE_BYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x019>;
2318defm SCRATCH_STORE_SHORT        : FLAT_Real_ScratchAllAddr_gfx10<0x01a>;
2319defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>;
2320defm SCRATCH_STORE_DWORD        : FLAT_Real_ScratchAllAddr_gfx10<0x01c>;
2321defm SCRATCH_STORE_DWORDX2      : FLAT_Real_ScratchAllAddr_gfx10<0x01d>;
2322defm SCRATCH_STORE_DWORDX4      : FLAT_Real_ScratchAllAddr_gfx10<0x01e>;
2323defm SCRATCH_STORE_DWORDX3      : FLAT_Real_ScratchAllAddr_gfx10<0x01f>;
2324defm SCRATCH_LOAD_UBYTE_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x020>;
2325defm SCRATCH_LOAD_UBYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x021>;
2326defm SCRATCH_LOAD_SBYTE_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x022>;
2327defm SCRATCH_LOAD_SBYTE_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x023>;
2328defm SCRATCH_LOAD_SHORT_D16     : FLAT_Real_ScratchAllAddr_gfx10<0x024>;
2329defm SCRATCH_LOAD_SHORT_D16_HI  : FLAT_Real_ScratchAllAddr_gfx10<0x025>;
2330
2331defm SCRATCH_LOAD_LDS_UBYTE     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>;
2332defm SCRATCH_LOAD_LDS_SBYTE     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>;
2333defm SCRATCH_LOAD_LDS_USHORT    : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>;
2334defm SCRATCH_LOAD_LDS_SSHORT    : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>;
2335defm SCRATCH_LOAD_LDS_DWORD     : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>;
2336
2337//===----------------------------------------------------------------------===//
2338// GFX11
2339//===----------------------------------------------------------------------===//
2340
2341class get_FLAT_ps<string name> {
2342  string Mnemonic = !cast<FLAT_Pseudo>(name).Mnemonic;
2343}
2344
2345multiclass FLAT_Real_gfx11 <bits<7> op,
2346                            string name = get_FLAT_ps<NAME>.Mnemonic> {
2347  defvar ps = !cast<FLAT_Pseudo>(NAME);
2348  def _gfx11 : FLAT_Real <op, ps, name>,
2349               SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> {
2350    let AssemblerPredicate = isGFX11Only;
2351    let DecoderNamespace = "GFX11";
2352
2353    let Inst{13}    = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
2354    let Inst{14}    = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
2355    let Inst{15}    = cpol{CPolBit.SLC};
2356    let Inst{17-16} = seg;
2357    let Inst{54-48} = !if(ps.enabled_saddr, saddr, SGPR_NULL_gfx11plus.Index);
2358    let Inst{55}    = ps.sve;
2359  }
2360}
2361
2362multiclass FLAT_Aliases_gfx11<string name> {
2363  defvar ps = get_FLAT_ps<NAME>;
2364  if !ne(ps.Mnemonic, name) then
2365    def : AMDGPUMnemonicAlias<ps.Mnemonic, name> {
2366      let AssemblerPredicate = isGFX11Only;
2367    }
2368}
2369
2370multiclass FLAT_Real_Base_gfx11<bits<7> op,
2371                                string name = get_FLAT_ps<NAME>.Mnemonic> :
2372  FLAT_Aliases_gfx11<name>,
2373  FLAT_Real_gfx11<op, name>;
2374
2375multiclass FLAT_Real_Atomics_gfx11<bits<7> op,
2376                                   string name = get_FLAT_ps<NAME>.Mnemonic> :
2377  FLAT_Real_Base_gfx11<op, name> {
2378  defm _RTN : FLAT_Real_gfx11<op, name>;
2379}
2380
2381multiclass GLOBAL_Real_AllAddr_gfx11<bits<7> op,
2382                                     string name = get_FLAT_ps<NAME>.Mnemonic> :
2383  FLAT_Real_Base_gfx11<op, name> {
2384  defm _SADDR : FLAT_Real_gfx11<op, name>;
2385}
2386
2387multiclass GLOBAL_Real_Atomics_gfx11<bits<7> op,
2388                                     string name = get_FLAT_ps<NAME>.Mnemonic> :
2389  GLOBAL_Real_AllAddr_gfx11<op, name> {
2390  defm _RTN : FLAT_Real_gfx11<op, name>;
2391  defm _SADDR_RTN : FLAT_Real_gfx11<op, name>;
2392}
2393
2394multiclass SCRATCH_Real_AllAddr_gfx11<bits<7> op,
2395                                     string name = get_FLAT_ps<NAME>.Mnemonic> :
2396  FLAT_Real_Base_gfx11<op, name> {
2397  defm _SADDR : FLAT_Real_gfx11<op, name>;
2398  defm _ST : FLAT_Real_gfx11<op, name>;
2399  defm _SVS : FLAT_Real_gfx11<op, name>;
2400}
2401
2402// ENC_FLAT.
2403defm FLAT_LOAD_UBYTE            : FLAT_Real_Base_gfx11<0x010, "flat_load_u8">;
2404defm FLAT_LOAD_SBYTE            : FLAT_Real_Base_gfx11<0x011, "flat_load_i8">;
2405defm FLAT_LOAD_USHORT           : FLAT_Real_Base_gfx11<0x012, "flat_load_u16">;
2406defm FLAT_LOAD_SSHORT           : FLAT_Real_Base_gfx11<0x013, "flat_load_i16">;
2407defm FLAT_LOAD_DWORD            : FLAT_Real_Base_gfx11<0x014, "flat_load_b32">;
2408defm FLAT_LOAD_DWORDX2          : FLAT_Real_Base_gfx11<0x015, "flat_load_b64">;
2409defm FLAT_LOAD_DWORDX3          : FLAT_Real_Base_gfx11<0x016, "flat_load_b96">;
2410defm FLAT_LOAD_DWORDX4          : FLAT_Real_Base_gfx11<0x017, "flat_load_b128">;
2411defm FLAT_STORE_BYTE            : FLAT_Real_Base_gfx11<0x018, "flat_store_b8">;
2412defm FLAT_STORE_SHORT           : FLAT_Real_Base_gfx11<0x019, "flat_store_b16">;
2413defm FLAT_STORE_DWORD           : FLAT_Real_Base_gfx11<0x01a, "flat_store_b32">;
2414defm FLAT_STORE_DWORDX2         : FLAT_Real_Base_gfx11<0x01b, "flat_store_b64">;
2415defm FLAT_STORE_DWORDX3         : FLAT_Real_Base_gfx11<0x01c, "flat_store_b96">;
2416defm FLAT_STORE_DWORDX4         : FLAT_Real_Base_gfx11<0x01d, "flat_store_b128">;
2417defm FLAT_LOAD_UBYTE_D16        : FLAT_Real_Base_gfx11<0x01e, "flat_load_d16_u8">;
2418defm FLAT_LOAD_SBYTE_D16        : FLAT_Real_Base_gfx11<0x01f, "flat_load_d16_i8">;
2419defm FLAT_LOAD_SHORT_D16        : FLAT_Real_Base_gfx11<0x020, "flat_load_d16_b16">;
2420defm FLAT_LOAD_UBYTE_D16_HI     : FLAT_Real_Base_gfx11<0x021, "flat_load_d16_hi_u8">;
2421defm FLAT_LOAD_SBYTE_D16_HI     : FLAT_Real_Base_gfx11<0x022, "flat_load_d16_hi_i8">;
2422defm FLAT_LOAD_SHORT_D16_HI     : FLAT_Real_Base_gfx11<0x023, "flat_load_d16_hi_b16">;
2423defm FLAT_STORE_BYTE_D16_HI     : FLAT_Real_Base_gfx11<0x024, "flat_store_d16_hi_b8">;
2424defm FLAT_STORE_SHORT_D16_HI    : FLAT_Real_Base_gfx11<0x025, "flat_store_d16_hi_b16">;
2425defm FLAT_ATOMIC_SWAP           : FLAT_Real_Atomics_gfx11<0x033, "flat_atomic_swap_b32">;
2426defm FLAT_ATOMIC_CMPSWAP        : FLAT_Real_Atomics_gfx11<0x034, "flat_atomic_cmpswap_b32">;
2427defm FLAT_ATOMIC_ADD            : FLAT_Real_Atomics_gfx11<0x035, "flat_atomic_add_u32">;
2428defm FLAT_ATOMIC_SUB            : FLAT_Real_Atomics_gfx11<0x036, "flat_atomic_sub_u32">;
2429defm FLAT_ATOMIC_SMIN           : FLAT_Real_Atomics_gfx11<0x038, "flat_atomic_min_i32">;
2430defm FLAT_ATOMIC_UMIN           : FLAT_Real_Atomics_gfx11<0x039, "flat_atomic_min_u32">;
2431defm FLAT_ATOMIC_SMAX           : FLAT_Real_Atomics_gfx11<0x03a, "flat_atomic_max_i32">;
2432defm FLAT_ATOMIC_UMAX           : FLAT_Real_Atomics_gfx11<0x03b, "flat_atomic_max_u32">;
2433defm FLAT_ATOMIC_AND            : FLAT_Real_Atomics_gfx11<0x03c, "flat_atomic_and_b32">;
2434defm FLAT_ATOMIC_OR             : FLAT_Real_Atomics_gfx11<0x03d, "flat_atomic_or_b32">;
2435defm FLAT_ATOMIC_XOR            : FLAT_Real_Atomics_gfx11<0x03e, "flat_atomic_xor_b32">;
2436defm FLAT_ATOMIC_INC            : FLAT_Real_Atomics_gfx11<0x03f, "flat_atomic_inc_u32">;
2437defm FLAT_ATOMIC_DEC            : FLAT_Real_Atomics_gfx11<0x040, "flat_atomic_dec_u32">;
2438defm FLAT_ATOMIC_SWAP_X2        : FLAT_Real_Atomics_gfx11<0x041, "flat_atomic_swap_b64">;
2439defm FLAT_ATOMIC_CMPSWAP_X2     : FLAT_Real_Atomics_gfx11<0x042, "flat_atomic_cmpswap_b64">;
2440defm FLAT_ATOMIC_ADD_X2         : FLAT_Real_Atomics_gfx11<0x043, "flat_atomic_add_u64">;
2441defm FLAT_ATOMIC_SUB_X2         : FLAT_Real_Atomics_gfx11<0x044, "flat_atomic_sub_u64">;
2442defm FLAT_ATOMIC_SMIN_X2        : FLAT_Real_Atomics_gfx11<0x045, "flat_atomic_min_i64">;
2443defm FLAT_ATOMIC_UMIN_X2        : FLAT_Real_Atomics_gfx11<0x046, "flat_atomic_min_u64">;
2444defm FLAT_ATOMIC_SMAX_X2        : FLAT_Real_Atomics_gfx11<0x047, "flat_atomic_max_i64">;
2445defm FLAT_ATOMIC_UMAX_X2        : FLAT_Real_Atomics_gfx11<0x048, "flat_atomic_max_u64">;
2446defm FLAT_ATOMIC_AND_X2         : FLAT_Real_Atomics_gfx11<0x049, "flat_atomic_and_b64">;
2447defm FLAT_ATOMIC_OR_X2          : FLAT_Real_Atomics_gfx11<0x04a, "flat_atomic_or_b64">;
2448defm FLAT_ATOMIC_XOR_X2         : FLAT_Real_Atomics_gfx11<0x04b, "flat_atomic_xor_b64">;
2449defm FLAT_ATOMIC_INC_X2         : FLAT_Real_Atomics_gfx11<0x04c, "flat_atomic_inc_u64">;
2450defm FLAT_ATOMIC_DEC_X2         : FLAT_Real_Atomics_gfx11<0x04d, "flat_atomic_dec_u64">;
2451defm FLAT_ATOMIC_FCMPSWAP       : FLAT_Real_Atomics_gfx11<0x050, "flat_atomic_cmpswap_f32">;
2452defm FLAT_ATOMIC_FMIN           : FLAT_Real_Atomics_gfx11<0x051, "flat_atomic_min_f32">;
2453defm FLAT_ATOMIC_FMAX           : FLAT_Real_Atomics_gfx11<0x052, "flat_atomic_max_f32">;
2454defm FLAT_ATOMIC_ADD_F32        : FLAT_Real_Atomics_gfx11<0x056>;
2455
2456// ENC_FLAT_GLBL.
2457defm GLOBAL_LOAD_UBYTE          : GLOBAL_Real_AllAddr_gfx11<0x010, "global_load_u8">;
2458defm GLOBAL_LOAD_SBYTE          : GLOBAL_Real_AllAddr_gfx11<0x011, "global_load_i8">;
2459defm GLOBAL_LOAD_USHORT         : GLOBAL_Real_AllAddr_gfx11<0x012, "global_load_u16">;
2460defm GLOBAL_LOAD_SSHORT         : GLOBAL_Real_AllAddr_gfx11<0x013, "global_load_i16">;
2461defm GLOBAL_LOAD_DWORD          : GLOBAL_Real_AllAddr_gfx11<0x014, "global_load_b32">;
2462defm GLOBAL_LOAD_DWORDX2        : GLOBAL_Real_AllAddr_gfx11<0x015, "global_load_b64">;
2463defm GLOBAL_LOAD_DWORDX3        : GLOBAL_Real_AllAddr_gfx11<0x016, "global_load_b96">;
2464defm GLOBAL_LOAD_DWORDX4        : GLOBAL_Real_AllAddr_gfx11<0x017, "global_load_b128">;
2465defm GLOBAL_STORE_BYTE          : GLOBAL_Real_AllAddr_gfx11<0x018, "global_store_b8">;
2466defm GLOBAL_STORE_SHORT         : GLOBAL_Real_AllAddr_gfx11<0x019, "global_store_b16">;
2467defm GLOBAL_STORE_DWORD         : GLOBAL_Real_AllAddr_gfx11<0x01a, "global_store_b32">;
2468defm GLOBAL_STORE_DWORDX2       : GLOBAL_Real_AllAddr_gfx11<0x01b, "global_store_b64">;
2469defm GLOBAL_STORE_DWORDX3       : GLOBAL_Real_AllAddr_gfx11<0x01c, "global_store_b96">;
2470defm GLOBAL_STORE_DWORDX4       : GLOBAL_Real_AllAddr_gfx11<0x01d, "global_store_b128">;
2471defm GLOBAL_LOAD_UBYTE_D16      : GLOBAL_Real_AllAddr_gfx11<0x01e, "global_load_d16_u8">;
2472defm GLOBAL_LOAD_SBYTE_D16      : GLOBAL_Real_AllAddr_gfx11<0x01f, "global_load_d16_i8">;
2473defm GLOBAL_LOAD_SHORT_D16      : GLOBAL_Real_AllAddr_gfx11<0x020, "global_load_d16_b16">;
2474defm GLOBAL_LOAD_UBYTE_D16_HI   : GLOBAL_Real_AllAddr_gfx11<0x021, "global_load_d16_hi_u8">;
2475defm GLOBAL_LOAD_SBYTE_D16_HI   : GLOBAL_Real_AllAddr_gfx11<0x022, "global_load_d16_hi_i8">;
2476defm GLOBAL_LOAD_SHORT_D16_HI   : GLOBAL_Real_AllAddr_gfx11<0x023, "global_load_d16_hi_b16">;
2477defm GLOBAL_STORE_BYTE_D16_HI   : GLOBAL_Real_AllAddr_gfx11<0x024, "global_store_d16_hi_b8">;
2478defm GLOBAL_STORE_SHORT_D16_HI  : GLOBAL_Real_AllAddr_gfx11<0x025, "global_store_d16_hi_b16">;
2479defm GLOBAL_LOAD_DWORD_ADDTID   : GLOBAL_Real_AllAddr_gfx11<0x028, "global_load_addtid_b32">;
2480defm GLOBAL_STORE_DWORD_ADDTID  : GLOBAL_Real_AllAddr_gfx11<0x029, "global_store_addtid_b32">;
2481defm GLOBAL_ATOMIC_SWAP         : GLOBAL_Real_Atomics_gfx11<0x033, "global_atomic_swap_b32">;
2482defm GLOBAL_ATOMIC_CMPSWAP      : GLOBAL_Real_Atomics_gfx11<0x034, "global_atomic_cmpswap_b32">;
2483defm GLOBAL_ATOMIC_ADD          : GLOBAL_Real_Atomics_gfx11<0x035, "global_atomic_add_u32">;
2484defm GLOBAL_ATOMIC_SUB          : GLOBAL_Real_Atomics_gfx11<0x036, "global_atomic_sub_u32">;
2485defm GLOBAL_ATOMIC_CSUB         : GLOBAL_Real_Atomics_gfx11<0x037, "global_atomic_csub_u32">;
2486defm GLOBAL_ATOMIC_SMIN         : GLOBAL_Real_Atomics_gfx11<0x038, "global_atomic_min_i32">;
2487defm GLOBAL_ATOMIC_UMIN         : GLOBAL_Real_Atomics_gfx11<0x039, "global_atomic_min_u32">;
2488defm GLOBAL_ATOMIC_SMAX         : GLOBAL_Real_Atomics_gfx11<0x03a, "global_atomic_max_i32">;
2489defm GLOBAL_ATOMIC_UMAX         : GLOBAL_Real_Atomics_gfx11<0x03b, "global_atomic_max_u32">;
2490defm GLOBAL_ATOMIC_AND          : GLOBAL_Real_Atomics_gfx11<0x03c, "global_atomic_and_b32">;
2491defm GLOBAL_ATOMIC_OR           : GLOBAL_Real_Atomics_gfx11<0x03d, "global_atomic_or_b32">;
2492defm GLOBAL_ATOMIC_XOR          : GLOBAL_Real_Atomics_gfx11<0x03e, "global_atomic_xor_b32">;
2493defm GLOBAL_ATOMIC_INC          : GLOBAL_Real_Atomics_gfx11<0x03f, "global_atomic_inc_u32">;
2494defm GLOBAL_ATOMIC_DEC          : GLOBAL_Real_Atomics_gfx11<0x040, "global_atomic_dec_u32">;
2495defm GLOBAL_ATOMIC_SWAP_X2      : GLOBAL_Real_Atomics_gfx11<0x041, "global_atomic_swap_b64">;
2496defm GLOBAL_ATOMIC_CMPSWAP_X2   : GLOBAL_Real_Atomics_gfx11<0x042, "global_atomic_cmpswap_b64">;
2497defm GLOBAL_ATOMIC_ADD_X2       : GLOBAL_Real_Atomics_gfx11<0x043, "global_atomic_add_u64">;
2498defm GLOBAL_ATOMIC_SUB_X2       : GLOBAL_Real_Atomics_gfx11<0x044, "global_atomic_sub_u64">;
2499defm GLOBAL_ATOMIC_SMIN_X2      : GLOBAL_Real_Atomics_gfx11<0x045, "global_atomic_min_i64">;
2500defm GLOBAL_ATOMIC_UMIN_X2      : GLOBAL_Real_Atomics_gfx11<0x046, "global_atomic_min_u64">;
2501defm GLOBAL_ATOMIC_SMAX_X2      : GLOBAL_Real_Atomics_gfx11<0x047, "global_atomic_max_i64">;
2502defm GLOBAL_ATOMIC_UMAX_X2      : GLOBAL_Real_Atomics_gfx11<0x048, "global_atomic_max_u64">;
2503defm GLOBAL_ATOMIC_AND_X2       : GLOBAL_Real_Atomics_gfx11<0x049, "global_atomic_and_b64">;
2504defm GLOBAL_ATOMIC_OR_X2        : GLOBAL_Real_Atomics_gfx11<0x04a, "global_atomic_or_b64">;
2505defm GLOBAL_ATOMIC_XOR_X2       : GLOBAL_Real_Atomics_gfx11<0x04b, "global_atomic_xor_b64">;
2506defm GLOBAL_ATOMIC_INC_X2       : GLOBAL_Real_Atomics_gfx11<0x04c, "global_atomic_inc_u64">;
2507defm GLOBAL_ATOMIC_DEC_X2       : GLOBAL_Real_Atomics_gfx11<0x04d, "global_atomic_dec_u64">;
2508defm GLOBAL_ATOMIC_FCMPSWAP     : GLOBAL_Real_Atomics_gfx11<0x050, "global_atomic_cmpswap_f32">;
2509defm GLOBAL_ATOMIC_FMIN         : GLOBAL_Real_Atomics_gfx11<0x051, "global_atomic_min_f32">;
2510defm GLOBAL_ATOMIC_FMAX         : GLOBAL_Real_Atomics_gfx11<0x052, "global_atomic_max_f32">;
2511defm GLOBAL_ATOMIC_ADD_F32      : GLOBAL_Real_Atomics_gfx11<0x056>;
2512
2513// ENC_FLAT_SCRATCH.
2514defm SCRATCH_LOAD_UBYTE         : SCRATCH_Real_AllAddr_gfx11<0x10, "scratch_load_u8">;
2515defm SCRATCH_LOAD_SBYTE         : SCRATCH_Real_AllAddr_gfx11<0x11, "scratch_load_i8">;
2516defm SCRATCH_LOAD_USHORT        : SCRATCH_Real_AllAddr_gfx11<0x12, "scratch_load_u16">;
2517defm SCRATCH_LOAD_SSHORT        : SCRATCH_Real_AllAddr_gfx11<0x13, "scratch_load_i16">;
2518defm SCRATCH_LOAD_DWORD         : SCRATCH_Real_AllAddr_gfx11<0x14, "scratch_load_b32">;
2519defm SCRATCH_LOAD_DWORDX2       : SCRATCH_Real_AllAddr_gfx11<0x15, "scratch_load_b64">;
2520defm SCRATCH_LOAD_DWORDX3       : SCRATCH_Real_AllAddr_gfx11<0x16, "scratch_load_b96">;
2521defm SCRATCH_LOAD_DWORDX4       : SCRATCH_Real_AllAddr_gfx11<0x17, "scratch_load_b128">;
2522defm SCRATCH_STORE_BYTE         : SCRATCH_Real_AllAddr_gfx11<0x18, "scratch_store_b8">;
2523defm SCRATCH_STORE_SHORT        : SCRATCH_Real_AllAddr_gfx11<0x19, "scratch_store_b16">;
2524defm SCRATCH_STORE_DWORD        : SCRATCH_Real_AllAddr_gfx11<0x1a, "scratch_store_b32">;
2525defm SCRATCH_STORE_DWORDX2      : SCRATCH_Real_AllAddr_gfx11<0x1b, "scratch_store_b64">;
2526defm SCRATCH_STORE_DWORDX3      : SCRATCH_Real_AllAddr_gfx11<0x1c, "scratch_store_b96">;
2527defm SCRATCH_STORE_DWORDX4      : SCRATCH_Real_AllAddr_gfx11<0x1d, "scratch_store_b128">;
2528defm SCRATCH_LOAD_UBYTE_D16     : SCRATCH_Real_AllAddr_gfx11<0x1e, "scratch_load_d16_u8">;
2529defm SCRATCH_LOAD_SBYTE_D16     : SCRATCH_Real_AllAddr_gfx11<0x1f, "scratch_load_d16_i8">;
2530defm SCRATCH_LOAD_SHORT_D16     : SCRATCH_Real_AllAddr_gfx11<0x20, "scratch_load_d16_b16">;
2531defm SCRATCH_LOAD_UBYTE_D16_HI  : SCRATCH_Real_AllAddr_gfx11<0x21, "scratch_load_d16_hi_u8">;
2532defm SCRATCH_LOAD_SBYTE_D16_HI  : SCRATCH_Real_AllAddr_gfx11<0x22, "scratch_load_d16_hi_i8">;
2533defm SCRATCH_LOAD_SHORT_D16_HI  : SCRATCH_Real_AllAddr_gfx11<0x23, "scratch_load_d16_hi_b16">;
2534defm SCRATCH_STORE_BYTE_D16_HI  : SCRATCH_Real_AllAddr_gfx11<0x24, "scratch_store_d16_hi_b8">;
2535defm SCRATCH_STORE_SHORT_D16_HI : SCRATCH_Real_AllAddr_gfx11<0x25, "scratch_store_d16_hi_b16">;
2536
2537//===----------------------------------------------------------------------===//
2538// GFX12
2539//===----------------------------------------------------------------------===//
2540
2541multiclass VFLAT_Real_gfx12 <bits<8> op, string name = get_FLAT_ps<NAME>.Mnemonic> {
2542  defvar ps = !cast<FLAT_Pseudo>(NAME);
2543  def _gfx12 : VFLAT_Real <op, ps, name>,
2544               SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX12> {
2545    let AssemblerPredicate = isGFX12Only;
2546    let DecoderNamespace = "GFX12";
2547
2548    let Inst{25-24} = {ps.is_flat_global, ps.is_flat_scratch};
2549  }
2550}
2551
2552multiclass VFLAT_Aliases_gfx12<string name, string alias = name> {
2553  defvar ps = get_FLAT_ps<NAME>;
2554  let AssemblerPredicate = isGFX12Only in {
2555    if !ne(ps.Mnemonic, name) then
2556      def : AMDGPUMnemonicAlias<ps.Mnemonic, name>;
2557    if !ne(alias, name) then
2558      def : AMDGPUMnemonicAlias<alias, name>;
2559  }
2560}
2561
2562multiclass VFLAT_Real_Base_gfx12<bits<8> op,
2563                                 string name = get_FLAT_ps<NAME>.Mnemonic,
2564                                 string alias = name> :
2565  VFLAT_Aliases_gfx12<name, alias>,
2566  VFLAT_Real_gfx12<op, name>;
2567
2568multiclass VFLAT_Real_Atomics_gfx12<bits<8> op,
2569                                    string name = get_FLAT_ps<NAME>.Mnemonic,
2570                                    string alias = name> :
2571  VFLAT_Real_Base_gfx12<op, name, alias> {
2572  defm _RTN : VFLAT_Real_gfx12<op, name>;
2573}
2574
2575multiclass VGLOBAL_Real_AllAddr_gfx12<bits<8> op,
2576                                      string name = get_FLAT_ps<NAME>.Mnemonic,
2577                                      string alias = name> :
2578  VFLAT_Real_Base_gfx12<op, name, alias> {
2579  defm _SADDR : VFLAT_Real_gfx12<op, name>;
2580}
2581
2582multiclass VGLOBAL_Real_AllAddr_gfx12_w64<bits<8> op,
2583                                       string name = get_FLAT_ps<NAME>.Mnemonic> :
2584  VFLAT_Aliases_gfx12<name> {
2585  let DecoderNamespace = "GFX12W64" in {
2586    defm "" : VFLAT_Real_gfx12<op, name>;
2587    defm _SADDR : VFLAT_Real_gfx12<op, name>;
2588  }
2589}
2590
2591multiclass VGLOBAL_Real_Atomics_gfx12<bits<8> op,
2592                                      string name = get_FLAT_ps<NAME>.Mnemonic,
2593                                      string alias = name> :
2594  VGLOBAL_Real_AllAddr_gfx12<op, name, alias> {
2595  defm _RTN : VFLAT_Real_gfx12<op, name>;
2596  defm _SADDR_RTN : VFLAT_Real_gfx12<op, name>;
2597}
2598
2599multiclass VSCRATCH_Real_AllAddr_gfx12<bits<8> op,
2600                                       string name = get_FLAT_ps<NAME>.Mnemonic> :
2601  VFLAT_Real_Base_gfx12<op, name> {
2602  defm _SADDR : VFLAT_Real_gfx12<op, name>;
2603  defm _ST : VFLAT_Real_gfx12<op, name>;
2604  defm _SVS : VFLAT_Real_gfx12<op, name>;
2605}
2606
2607// ENC_VFLAT.
2608defm FLAT_LOAD_UBYTE               : VFLAT_Real_Base_gfx12<0x010, "flat_load_u8">;
2609defm FLAT_LOAD_SBYTE               : VFLAT_Real_Base_gfx12<0x011, "flat_load_i8">;
2610defm FLAT_LOAD_USHORT              : VFLAT_Real_Base_gfx12<0x012, "flat_load_u16">;
2611defm FLAT_LOAD_SSHORT              : VFLAT_Real_Base_gfx12<0x013, "flat_load_i16">;
2612defm FLAT_LOAD_DWORD               : VFLAT_Real_Base_gfx12<0x014, "flat_load_b32">;
2613defm FLAT_LOAD_DWORDX2             : VFLAT_Real_Base_gfx12<0x015, "flat_load_b64">;
2614defm FLAT_LOAD_DWORDX3             : VFLAT_Real_Base_gfx12<0x016, "flat_load_b96">;
2615defm FLAT_LOAD_DWORDX4             : VFLAT_Real_Base_gfx12<0x017, "flat_load_b128">;
2616defm FLAT_STORE_BYTE               : VFLAT_Real_Base_gfx12<0x018, "flat_store_b8">;
2617defm FLAT_STORE_SHORT              : VFLAT_Real_Base_gfx12<0x019, "flat_store_b16">;
2618defm FLAT_STORE_DWORD              : VFLAT_Real_Base_gfx12<0x01a, "flat_store_b32">;
2619defm FLAT_STORE_DWORDX2            : VFLAT_Real_Base_gfx12<0x01b, "flat_store_b64">;
2620defm FLAT_STORE_DWORDX3            : VFLAT_Real_Base_gfx12<0x01c, "flat_store_b96">;
2621defm FLAT_STORE_DWORDX4            : VFLAT_Real_Base_gfx12<0x01d, "flat_store_b128">;
2622defm FLAT_LOAD_UBYTE_D16           : VFLAT_Real_Base_gfx12<0x01e, "flat_load_d16_u8">;
2623defm FLAT_LOAD_SBYTE_D16           : VFLAT_Real_Base_gfx12<0x01f, "flat_load_d16_i8">;
2624defm FLAT_LOAD_SHORT_D16           : VFLAT_Real_Base_gfx12<0x020, "flat_load_d16_b16">;
2625defm FLAT_LOAD_UBYTE_D16_HI        : VFLAT_Real_Base_gfx12<0x021, "flat_load_d16_hi_u8">;
2626defm FLAT_LOAD_SBYTE_D16_HI        : VFLAT_Real_Base_gfx12<0x022, "flat_load_d16_hi_i8">;
2627defm FLAT_LOAD_SHORT_D16_HI        : VFLAT_Real_Base_gfx12<0x023, "flat_load_d16_hi_b16">;
2628defm FLAT_STORE_BYTE_D16_HI        : VFLAT_Real_Base_gfx12<0x024, "flat_store_d16_hi_b8">;
2629defm FLAT_STORE_SHORT_D16_HI       : VFLAT_Real_Base_gfx12<0x025, "flat_store_d16_hi_b16">;
2630defm FLAT_ATOMIC_SWAP              : VFLAT_Real_Atomics_gfx12<0x033, "flat_atomic_swap_b32">;
2631defm FLAT_ATOMIC_CMPSWAP           : VFLAT_Real_Atomics_gfx12<0x034, "flat_atomic_cmpswap_b32">;
2632defm FLAT_ATOMIC_ADD               : VFLAT_Real_Atomics_gfx12<0x035, "flat_atomic_add_u32">;
2633defm FLAT_ATOMIC_SUB               : VFLAT_Real_Atomics_gfx12<0x036, "flat_atomic_sub_u32">;
2634defm FLAT_ATOMIC_CSUB_U32          : VFLAT_Real_Atomics_gfx12<0x037, "flat_atomic_sub_clamp_u32">;
2635defm FLAT_ATOMIC_SMIN              : VFLAT_Real_Atomics_gfx12<0x038, "flat_atomic_min_i32">;
2636defm FLAT_ATOMIC_UMIN              : VFLAT_Real_Atomics_gfx12<0x039, "flat_atomic_min_u32">;
2637defm FLAT_ATOMIC_SMAX              : VFLAT_Real_Atomics_gfx12<0x03a, "flat_atomic_max_i32">;
2638defm FLAT_ATOMIC_UMAX              : VFLAT_Real_Atomics_gfx12<0x03b, "flat_atomic_max_u32">;
2639defm FLAT_ATOMIC_AND               : VFLAT_Real_Atomics_gfx12<0x03c, "flat_atomic_and_b32">;
2640defm FLAT_ATOMIC_OR                : VFLAT_Real_Atomics_gfx12<0x03d, "flat_atomic_or_b32">;
2641defm FLAT_ATOMIC_XOR               : VFLAT_Real_Atomics_gfx12<0x03e, "flat_atomic_xor_b32">;
2642defm FLAT_ATOMIC_INC               : VFLAT_Real_Atomics_gfx12<0x03f, "flat_atomic_inc_u32">;
2643defm FLAT_ATOMIC_DEC               : VFLAT_Real_Atomics_gfx12<0x040, "flat_atomic_dec_u32">;
2644defm FLAT_ATOMIC_SWAP_X2           : VFLAT_Real_Atomics_gfx12<0x041, "flat_atomic_swap_b64">;
2645defm FLAT_ATOMIC_CMPSWAP_X2        : VFLAT_Real_Atomics_gfx12<0x042, "flat_atomic_cmpswap_b64">;
2646defm FLAT_ATOMIC_ADD_X2            : VFLAT_Real_Atomics_gfx12<0x043, "flat_atomic_add_u64">;
2647defm FLAT_ATOMIC_SUB_X2            : VFLAT_Real_Atomics_gfx12<0x044, "flat_atomic_sub_u64">;
2648defm FLAT_ATOMIC_SMIN_X2           : VFLAT_Real_Atomics_gfx12<0x045, "flat_atomic_min_i64">;
2649defm FLAT_ATOMIC_UMIN_X2           : VFLAT_Real_Atomics_gfx12<0x046, "flat_atomic_min_u64">;
2650defm FLAT_ATOMIC_SMAX_X2           : VFLAT_Real_Atomics_gfx12<0x047, "flat_atomic_max_i64">;
2651defm FLAT_ATOMIC_UMAX_X2           : VFLAT_Real_Atomics_gfx12<0x048, "flat_atomic_max_u64">;
2652defm FLAT_ATOMIC_AND_X2            : VFLAT_Real_Atomics_gfx12<0x049, "flat_atomic_and_b64">;
2653defm FLAT_ATOMIC_OR_X2             : VFLAT_Real_Atomics_gfx12<0x04a, "flat_atomic_or_b64">;
2654defm FLAT_ATOMIC_XOR_X2            : VFLAT_Real_Atomics_gfx12<0x04b, "flat_atomic_xor_b64">;
2655defm FLAT_ATOMIC_INC_X2            : VFLAT_Real_Atomics_gfx12<0x04c, "flat_atomic_inc_u64">;
2656defm FLAT_ATOMIC_DEC_X2            : VFLAT_Real_Atomics_gfx12<0x04d, "flat_atomic_dec_u64">;
2657defm FLAT_ATOMIC_COND_SUB_U32      : VFLAT_Real_Atomics_gfx12<0x050>;
2658defm FLAT_ATOMIC_FMIN              : VFLAT_Real_Atomics_gfx12<0x051, "flat_atomic_min_num_f32", "flat_atomic_min_f32">;
2659defm FLAT_ATOMIC_FMAX              : VFLAT_Real_Atomics_gfx12<0x052, "flat_atomic_max_num_f32", "flat_atomic_max_f32">;
2660defm FLAT_ATOMIC_ADD_F32           : VFLAT_Real_Atomics_gfx12<0x056>;
2661defm FLAT_ATOMIC_PK_ADD_F16        : VFLAT_Real_Atomics_gfx12<0x059>;
2662defm FLAT_ATOMIC_PK_ADD_BF16       : VFLAT_Real_Atomics_gfx12<0x05a>;
2663
2664// ENC_VGLOBAL.
2665defm GLOBAL_LOAD_UBYTE             : VGLOBAL_Real_AllAddr_gfx12<0x010, "global_load_u8">;
2666defm GLOBAL_LOAD_SBYTE             : VGLOBAL_Real_AllAddr_gfx12<0x011, "global_load_i8">;
2667defm GLOBAL_LOAD_USHORT            : VGLOBAL_Real_AllAddr_gfx12<0x012, "global_load_u16">;
2668defm GLOBAL_LOAD_SSHORT            : VGLOBAL_Real_AllAddr_gfx12<0x013, "global_load_i16">;
2669defm GLOBAL_LOAD_DWORD             : VGLOBAL_Real_AllAddr_gfx12<0x014, "global_load_b32">;
2670defm GLOBAL_LOAD_DWORDX2           : VGLOBAL_Real_AllAddr_gfx12<0x015, "global_load_b64">;
2671defm GLOBAL_LOAD_DWORDX3           : VGLOBAL_Real_AllAddr_gfx12<0x016, "global_load_b96">;
2672defm GLOBAL_LOAD_DWORDX4           : VGLOBAL_Real_AllAddr_gfx12<0x017, "global_load_b128">;
2673defm GLOBAL_STORE_BYTE             : VGLOBAL_Real_AllAddr_gfx12<0x018, "global_store_b8">;
2674defm GLOBAL_STORE_SHORT            : VGLOBAL_Real_AllAddr_gfx12<0x019, "global_store_b16">;
2675defm GLOBAL_STORE_DWORD            : VGLOBAL_Real_AllAddr_gfx12<0x01a, "global_store_b32">;
2676defm GLOBAL_STORE_DWORDX2          : VGLOBAL_Real_AllAddr_gfx12<0x01b, "global_store_b64">;
2677defm GLOBAL_STORE_DWORDX3          : VGLOBAL_Real_AllAddr_gfx12<0x01c, "global_store_b96">;
2678defm GLOBAL_STORE_DWORDX4          : VGLOBAL_Real_AllAddr_gfx12<0x01d, "global_store_b128">;
2679defm GLOBAL_LOAD_UBYTE_D16         : VGLOBAL_Real_AllAddr_gfx12<0x01e, "global_load_d16_u8">;
2680defm GLOBAL_LOAD_SBYTE_D16         : VGLOBAL_Real_AllAddr_gfx12<0x01f, "global_load_d16_i8">;
2681defm GLOBAL_LOAD_SHORT_D16         : VGLOBAL_Real_AllAddr_gfx12<0x020, "global_load_d16_b16">;
2682defm GLOBAL_LOAD_UBYTE_D16_HI      : VGLOBAL_Real_AllAddr_gfx12<0x021, "global_load_d16_hi_u8">;
2683defm GLOBAL_LOAD_SBYTE_D16_HI      : VGLOBAL_Real_AllAddr_gfx12<0x022, "global_load_d16_hi_i8">;
2684defm GLOBAL_LOAD_SHORT_D16_HI      : VGLOBAL_Real_AllAddr_gfx12<0x023, "global_load_d16_hi_b16">;
2685defm GLOBAL_STORE_BYTE_D16_HI      : VGLOBAL_Real_AllAddr_gfx12<0x024, "global_store_d16_hi_b8">;
2686defm GLOBAL_STORE_SHORT_D16_HI     : VGLOBAL_Real_AllAddr_gfx12<0x025, "global_store_d16_hi_b16">;
2687defm GLOBAL_LOAD_DWORD_ADDTID      : VGLOBAL_Real_AllAddr_gfx12<0x028, "global_load_addtid_b32">;
2688defm GLOBAL_STORE_DWORD_ADDTID     : VGLOBAL_Real_AllAddr_gfx12<0x029, "global_store_addtid_b32">;
2689defm GLOBAL_LOAD_BLOCK             : VGLOBAL_Real_AllAddr_gfx12<0x053>;
2690defm GLOBAL_STORE_BLOCK            : VGLOBAL_Real_AllAddr_gfx12<0x054>;
2691
2692defm GLOBAL_ATOMIC_SWAP            : VGLOBAL_Real_Atomics_gfx12<0x033, "global_atomic_swap_b32">;
2693defm GLOBAL_ATOMIC_CMPSWAP         : VGLOBAL_Real_Atomics_gfx12<0x034, "global_atomic_cmpswap_b32">;
2694defm GLOBAL_ATOMIC_ADD             : VGLOBAL_Real_Atomics_gfx12<0x035, "global_atomic_add_u32">;
2695defm GLOBAL_ATOMIC_SUB             : VGLOBAL_Real_Atomics_gfx12<0x036, "global_atomic_sub_u32">;
2696defm GLOBAL_ATOMIC_CSUB            : VGLOBAL_Real_Atomics_gfx12<0x037, "global_atomic_sub_clamp_u32", "global_atomic_csub_u32">;
2697defm GLOBAL_ATOMIC_SMIN            : VGLOBAL_Real_Atomics_gfx12<0x038, "global_atomic_min_i32">;
2698defm GLOBAL_ATOMIC_UMIN            : VGLOBAL_Real_Atomics_gfx12<0x039, "global_atomic_min_u32">;
2699defm GLOBAL_ATOMIC_SMAX            : VGLOBAL_Real_Atomics_gfx12<0x03a, "global_atomic_max_i32">;
2700defm GLOBAL_ATOMIC_UMAX            : VGLOBAL_Real_Atomics_gfx12<0x03b, "global_atomic_max_u32">;
2701defm GLOBAL_ATOMIC_AND             : VGLOBAL_Real_Atomics_gfx12<0x03c, "global_atomic_and_b32">;
2702defm GLOBAL_ATOMIC_OR              : VGLOBAL_Real_Atomics_gfx12<0x03d, "global_atomic_or_b32">;
2703defm GLOBAL_ATOMIC_XOR             : VGLOBAL_Real_Atomics_gfx12<0x03e, "global_atomic_xor_b32">;
2704defm GLOBAL_ATOMIC_INC             : VGLOBAL_Real_Atomics_gfx12<0x03f, "global_atomic_inc_u32">;
2705defm GLOBAL_ATOMIC_DEC             : VGLOBAL_Real_Atomics_gfx12<0x040, "global_atomic_dec_u32">;
2706defm GLOBAL_ATOMIC_SWAP_X2         : VGLOBAL_Real_Atomics_gfx12<0x041, "global_atomic_swap_b64">;
2707defm GLOBAL_ATOMIC_CMPSWAP_X2      : VGLOBAL_Real_Atomics_gfx12<0x042, "global_atomic_cmpswap_b64">;
2708defm GLOBAL_ATOMIC_ADD_X2          : VGLOBAL_Real_Atomics_gfx12<0x043, "global_atomic_add_u64">;
2709defm GLOBAL_ATOMIC_SUB_X2          : VGLOBAL_Real_Atomics_gfx12<0x044, "global_atomic_sub_u64">;
2710defm GLOBAL_ATOMIC_SMIN_X2         : VGLOBAL_Real_Atomics_gfx12<0x045, "global_atomic_min_i64">;
2711defm GLOBAL_ATOMIC_UMIN_X2         : VGLOBAL_Real_Atomics_gfx12<0x046, "global_atomic_min_u64">;
2712defm GLOBAL_ATOMIC_SMAX_X2         : VGLOBAL_Real_Atomics_gfx12<0x047, "global_atomic_max_i64">;
2713defm GLOBAL_ATOMIC_UMAX_X2         : VGLOBAL_Real_Atomics_gfx12<0x048, "global_atomic_max_u64">;
2714defm GLOBAL_ATOMIC_AND_X2          : VGLOBAL_Real_Atomics_gfx12<0x049, "global_atomic_and_b64">;
2715defm GLOBAL_ATOMIC_OR_X2           : VGLOBAL_Real_Atomics_gfx12<0x04a, "global_atomic_or_b64">;
2716defm GLOBAL_ATOMIC_XOR_X2          : VGLOBAL_Real_Atomics_gfx12<0x04b, "global_atomic_xor_b64">;
2717defm GLOBAL_ATOMIC_INC_X2          : VGLOBAL_Real_Atomics_gfx12<0x04c, "global_atomic_inc_u64">;
2718defm GLOBAL_ATOMIC_DEC_X2          : VGLOBAL_Real_Atomics_gfx12<0x04d, "global_atomic_dec_u64">;
2719defm GLOBAL_ATOMIC_COND_SUB_U32    : VGLOBAL_Real_Atomics_gfx12<0x050>;
2720defm GLOBAL_ATOMIC_FMIN            : VGLOBAL_Real_Atomics_gfx12<0x051, "global_atomic_min_num_f32", "global_atomic_min_f32">;
2721defm GLOBAL_ATOMIC_FMAX            : VGLOBAL_Real_Atomics_gfx12<0x052, "global_atomic_max_num_f32", "global_atomic_max_f32">;
2722defm GLOBAL_ATOMIC_ADD_F32         : VGLOBAL_Real_Atomics_gfx12<0x056>;
2723
2724defm GLOBAL_LOAD_TR_B128_w32       : VGLOBAL_Real_AllAddr_gfx12<0x057>;
2725defm GLOBAL_LOAD_TR_B64_w32        : VGLOBAL_Real_AllAddr_gfx12<0x058>;
2726
2727defm GLOBAL_LOAD_TR_B128_w64       : VGLOBAL_Real_AllAddr_gfx12_w64<0x057>;
2728defm GLOBAL_LOAD_TR_B64_w64        : VGLOBAL_Real_AllAddr_gfx12_w64<0x058>;
2729
2730defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073>;
2731defm GLOBAL_ATOMIC_PK_ADD_F16      : VGLOBAL_Real_Atomics_gfx12<0x059>;
2732defm GLOBAL_ATOMIC_PK_ADD_BF16     : VGLOBAL_Real_Atomics_gfx12<0x05a>;
2733
2734defm GLOBAL_INV                    : VFLAT_Real_Base_gfx12<0x02b>;
2735defm GLOBAL_WB                     : VFLAT_Real_Base_gfx12<0x02c>;
2736defm GLOBAL_WBINV                  : VFLAT_Real_Base_gfx12<0x04f>;
2737
2738// ENC_VSCRATCH.
2739defm SCRATCH_LOAD_UBYTE            : VSCRATCH_Real_AllAddr_gfx12<0x10, "scratch_load_u8">;
2740defm SCRATCH_LOAD_SBYTE            : VSCRATCH_Real_AllAddr_gfx12<0x11, "scratch_load_i8">;
2741defm SCRATCH_LOAD_USHORT           : VSCRATCH_Real_AllAddr_gfx12<0x12, "scratch_load_u16">;
2742defm SCRATCH_LOAD_SSHORT           : VSCRATCH_Real_AllAddr_gfx12<0x13, "scratch_load_i16">;
2743defm SCRATCH_LOAD_DWORD            : VSCRATCH_Real_AllAddr_gfx12<0x14, "scratch_load_b32">;
2744defm SCRATCH_LOAD_DWORDX2          : VSCRATCH_Real_AllAddr_gfx12<0x15, "scratch_load_b64">;
2745defm SCRATCH_LOAD_DWORDX3          : VSCRATCH_Real_AllAddr_gfx12<0x16, "scratch_load_b96">;
2746defm SCRATCH_LOAD_DWORDX4          : VSCRATCH_Real_AllAddr_gfx12<0x17, "scratch_load_b128">;
2747defm SCRATCH_STORE_BYTE            : VSCRATCH_Real_AllAddr_gfx12<0x18, "scratch_store_b8">;
2748defm SCRATCH_STORE_SHORT           : VSCRATCH_Real_AllAddr_gfx12<0x19, "scratch_store_b16">;
2749defm SCRATCH_STORE_DWORD           : VSCRATCH_Real_AllAddr_gfx12<0x1a, "scratch_store_b32">;
2750defm SCRATCH_STORE_DWORDX2         : VSCRATCH_Real_AllAddr_gfx12<0x1b, "scratch_store_b64">;
2751defm SCRATCH_STORE_DWORDX3         : VSCRATCH_Real_AllAddr_gfx12<0x1c, "scratch_store_b96">;
2752defm SCRATCH_STORE_DWORDX4         : VSCRATCH_Real_AllAddr_gfx12<0x1d, "scratch_store_b128">;
2753defm SCRATCH_LOAD_UBYTE_D16        : VSCRATCH_Real_AllAddr_gfx12<0x1e, "scratch_load_d16_u8">;
2754defm SCRATCH_LOAD_SBYTE_D16        : VSCRATCH_Real_AllAddr_gfx12<0x1f, "scratch_load_d16_i8">;
2755defm SCRATCH_LOAD_SHORT_D16        : VSCRATCH_Real_AllAddr_gfx12<0x20, "scratch_load_d16_b16">;
2756defm SCRATCH_LOAD_UBYTE_D16_HI     : VSCRATCH_Real_AllAddr_gfx12<0x21, "scratch_load_d16_hi_u8">;
2757defm SCRATCH_LOAD_SBYTE_D16_HI     : VSCRATCH_Real_AllAddr_gfx12<0x22, "scratch_load_d16_hi_i8">;
2758defm SCRATCH_LOAD_SHORT_D16_HI     : VSCRATCH_Real_AllAddr_gfx12<0x23, "scratch_load_d16_hi_b16">;
2759defm SCRATCH_STORE_BYTE_D16_HI     : VSCRATCH_Real_AllAddr_gfx12<0x24, "scratch_store_d16_hi_b8">;
2760defm SCRATCH_STORE_SHORT_D16_HI    : VSCRATCH_Real_AllAddr_gfx12<0x25, "scratch_store_d16_hi_b16">;
2761
2762defm SCRATCH_LOAD_BLOCK            : VSCRATCH_Real_AllAddr_gfx12<0x53>;
2763defm SCRATCH_STORE_BLOCK           : VSCRATCH_Real_AllAddr_gfx12<0x54>;
2764