xref: /llvm-project/llvm/lib/Target/AMDGPU/DSInstructions.td (revision 3a29dfe37c585355dc70c7c614f5bbf071cd7efb)
1//===-- DSInstructions.td - DS Instruction Definitions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
10  InstSI <outs, ins, "", pattern>,
11  SIMCInstr <opName, SIEncodingFamily.NONE> {
12
13  let LGKM_CNT = 1;
14  let DS = 1;
15  let GWS = 0;
16  let Size = 8;
17  let UseNamedOperandTable = 1;
18
19  // Most instruction load and store data, so set this as the default.
20  let mayLoad = 1;
21  let mayStore = 1;
22
23  let hasSideEffects = 0;
24  let SchedRW = [WriteLDS];
25
26  let isPseudo = 1;
27  let isCodeGenOnly = 1;
28
29  string Mnemonic = opName;
30  string AsmOperands = asmOps;
31
32  // Well these bits a kind of hack because it would be more natural
33  // to test "outs" and "ins" dags for the presence of particular operands
34  bits<1> has_vdst = 1;
35  bits<1> has_addr = 1;
36  bits<1> has_data0 = 1;
37  bits<1> has_data1 = 1;
38
39  bits<1> has_gws_data0 = 0; // data0 is encoded as addr
40
41  bits<1> has_offset  = 1; // has "offset" that should be split to offset0,1
42  bits<1> has_offset0 = 1;
43  bits<1> has_offset1 = 1;
44
45  bits<1> has_gds = 1;
46  bits<1> gdsValue = 0; // if has_gds == 0 set gds to this value
47
48  bits<1> has_m0_read = 1;
49
50  let Uses = !if(has_m0_read, [M0, EXEC], [EXEC]);
51}
52
53class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
54  InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands>,
55  Enc64 {
56
57  let isPseudo = 0;
58  let isCodeGenOnly = 0;
59  let LGKM_CNT = 1;
60  let DS = 1;
61  let UseNamedOperandTable = 1;
62
63  // copy relevant pseudo op flags
64  let GWS                = ps.GWS;
65  let SubtargetPredicate = ps.SubtargetPredicate;
66  let WaveSizePredicate  = ps.WaveSizePredicate;
67  let OtherPredicates    = ps.OtherPredicates;
68  let TSFlags            = ps.TSFlags;
69  let SchedRW            = ps.SchedRW;
70  let mayLoad            = ps.mayLoad;
71  let mayStore           = ps.mayStore;
72  let IsAtomicRet        = ps.IsAtomicRet;
73  let IsAtomicNoRet      = ps.IsAtomicNoRet;
74  let Uses               = ps.Uses;
75  let Defs               = ps.Defs;
76  let isConvergent       = ps.isConvergent;
77
78  let Constraints = ps.Constraints;
79  let DisableEncoding = ps.DisableEncoding;
80
81  // encoding fields
82  bits<10> vdst;
83  bits<1> gds;
84  bits<8> addr;
85  bits<10> data0;
86  bits<10> data1;
87  bits<8> offset0;
88  bits<8> offset1;
89
90  bits<16> offset;
91  let offset0 = !if(ps.has_offset, offset{7-0}, ?);
92  let offset1 = !if(ps.has_offset, offset{15-8}, ?);
93
94  bits<1> acc = !if(ps.has_vdst, vdst{9},
95                    !if(!or(ps.has_data0, ps.has_gws_data0), data0{9}, 0));
96}
97
98// DS Pseudo instructions
99
100class DS_0A1D_NORET<string opName, RegisterClass rc = VGPR_32>
101: DS_Pseudo<opName,
102  (outs),
103  (ins getLdStRegisterOperand<rc>.ret:$data0, Offset:$offset, gds:$gds),
104  " $data0$offset$gds"> {
105
106  let has_addr = 0;
107  let has_data1 = 0;
108  let has_vdst = 0;
109}
110
111class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
112: DS_Pseudo<opName,
113  (outs),
114  (ins VGPR_32:$addr, getLdStRegisterOperand<rc>.ret:$data0, Offset:$offset, gds:$gds),
115  " $addr, $data0$offset$gds"> {
116
117  let has_data1 = 0;
118  let has_vdst = 0;
119  let IsAtomicNoRet = 1;
120}
121
122multiclass DS_1A1D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
123  def "" : DS_1A1D_NORET<opName, rc>;
124
125  let has_m0_read = 0 in {
126    def _gfx9 : DS_1A1D_NORET<opName, rc>;
127  }
128}
129
130multiclass DS_1A1D_NORET_mc_gfx9<string opName, RegisterClass rc = VGPR_32> {
131  let has_m0_read = 0 in {
132    def "" : DS_1A1D_NORET<opName, rc>;
133  }
134}
135
136class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32,
137                    RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
138: DS_Pseudo<opName,
139  (outs),
140  (ins VGPR_32:$addr, data_op:$data0, data_op:$data1, Offset:$offset, gds:$gds),
141  " $addr, $data0, $data1$offset$gds"> {
142
143  let has_vdst = 0;
144  let IsAtomicNoRet = 1;
145}
146
147multiclass DS_1A2D_NORET_mc<string opName, RegisterClass rc = VGPR_32> {
148  def "" : DS_1A2D_NORET<opName, rc>;
149
150  let has_m0_read = 0 in {
151    def _gfx9 : DS_1A2D_NORET<opName, rc>;
152  }
153}
154
155class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32,
156                          RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
157: DS_Pseudo<opName,
158  (outs),
159  (ins VGPR_32:$addr, data_op:$data0, data_op:$data1,
160       Offset0:$offset0, Offset1:$offset1, gds:$gds),
161  " $addr, $data0, $data1$offset0$offset1$gds"> {
162
163  let has_vdst = 0;
164  let has_offset = 0;
165}
166
167multiclass DS_1A2D_Off8_NORET_mc <string opName, RegisterClass rc = VGPR_32> {
168  def "" : DS_1A2D_Off8_NORET<opName, rc>;
169
170  let has_m0_read = 0 in {
171    def _gfx9 : DS_1A2D_Off8_NORET<opName, rc>;
172  }
173}
174
175class DS_0A1D_RET_GDS<string opName, RegisterClass rc = VGPR_32, RegisterClass src = rc,
176                  RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
177                  RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
178: DS_Pseudo<opName,
179  (outs dst_op:$vdst),
180  (ins src_op:$data0, Offset:$offset),
181  " $vdst, $data0$offset gds"> {
182
183  let has_addr = 0;
184  let has_data1 = 0;
185  let has_gds = 0;
186  let gdsValue = 1;
187  let hasSideEffects = 1;
188}
189
190class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32,
191                  RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
192: DS_Pseudo<opName,
193  (outs data_op:$vdst),
194  (ins VGPR_32:$addr, data_op:$data0, Offset:$offset, gds:$gds),
195  " $vdst, $addr, $data0$offset$gds"> {
196
197  let has_data1 = 0;
198  let IsAtomicRet = 1;
199}
200
201multiclass DS_1A1D_RET_mc <string opName, RegisterClass rc = VGPR_32> {
202  def "" : DS_1A1D_RET<opName, rc>;
203
204  let has_m0_read = 0 in {
205    def _gfx9 : DS_1A1D_RET<opName, rc>;
206  }
207}
208
209multiclass DS_1A1D_RET_mc_gfx9 <string opName, RegisterClass rc = VGPR_32> {
210  let has_m0_read = 0 in {
211    def "" : DS_1A1D_RET<opName, rc>;
212  }
213}
214
215class DS_1A2D_RET<string opName,
216                  RegisterClass rc = VGPR_32,
217                  RegisterClass src = rc,
218                  RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
219                  RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
220: DS_Pseudo<opName,
221  (outs dst_op:$vdst),
222  (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, Offset:$offset, gds:$gds),
223  " $vdst, $addr, $data0, $data1$offset$gds"> {
224
225  let IsAtomicRet = 1;
226}
227
228multiclass DS_1A2D_RET_mc<string opName,
229                          RegisterClass rc = VGPR_32,
230                          RegisterClass src = rc> {
231  def "" : DS_1A2D_RET<opName, rc, src>;
232
233  let has_m0_read = 0 in {
234    def _gfx9 : DS_1A2D_RET<opName, rc, src>;
235  }
236}
237
238class DS_1A2D_Off8_RET<string opName,
239                       RegisterClass rc = VGPR_32,
240                       RegisterClass src = rc,
241                       RegisterOperand dst_op = getLdStRegisterOperand<rc>.ret,
242                       RegisterOperand src_op = getLdStRegisterOperand<src>.ret>
243: DS_Pseudo<opName,
244  (outs dst_op:$vdst),
245  (ins VGPR_32:$addr, src_op:$data0, src_op:$data1, Offset0:$offset0, Offset1:$offset1, gds:$gds),
246  " $vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
247
248  let has_offset = 0;
249}
250
251multiclass DS_1A2D_Off8_RET_mc<string opName,
252                               RegisterClass rc = VGPR_32,
253                               RegisterClass src = rc> {
254  def "" : DS_1A2D_Off8_RET<opName, rc, src>;
255
256  let has_m0_read = 0 in {
257    def _gfx9 : DS_1A2D_Off8_RET<opName, rc, src>;
258  }
259}
260
261class DS_BVH_STACK<string opName>
262: DS_Pseudo<opName,
263  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst, VGPR_32:$addr),
264  (ins VGPR_32:$addr_in, getLdStRegisterOperand<VGPR_32>.ret:$data0, VReg_128:$data1, Offset:$offset),
265  " $vdst, $addr, $data0, $data1$offset"> {
266  let Constraints = "$addr = $addr_in";
267  let DisableEncoding = "$addr_in";
268  let has_gds = 0;
269  let gdsValue = 0;
270  // TODO: Use MMOs in the LDS address space instead of hasSideEffects = 1.
271  let hasSideEffects = 1;
272  let SchedRW = [WriteLDS, WriteLDS];
273}
274
275class DS_1A_RET<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = Offset,
276                RegisterOperand data_op = getLdStRegisterOperand<rc>.ret>
277: DS_Pseudo<opName,
278  (outs data_op:$vdst),
279  !if(HasTiedOutput,
280    (ins VGPR_32:$addr, ofs:$offset, gds:$gds, data_op:$vdst_in),
281    (ins VGPR_32:$addr, ofs:$offset, gds:$gds)),
282  " $vdst, $addr$offset$gds"> {
283  let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", "");
284  let DisableEncoding = !if(HasTiedOutput, "$vdst_in", "");
285  let has_data0 = 0;
286  let has_data1 = 0;
287}
288
289multiclass DS_1A_RET_mc<string opName, RegisterClass rc = VGPR_32, bit HasTiedOutput = 0, Operand ofs = Offset> {
290  def "" : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
291
292  let has_m0_read = 0 in {
293    def _gfx9 : DS_1A_RET<opName, rc, HasTiedOutput, ofs>;
294  }
295}
296
297multiclass DS_1A_RET_NoM0<string opName, RegisterClass rc = VGPR_32> {
298  let has_m0_read = 0 in {
299    def "" : DS_1A_RET<opName, rc>;
300  }
301}
302
303class DS_1A_RET_Tied<string opName, RegisterClass rc = VGPR_32> :
304  DS_1A_RET<opName, rc, 1>;
305
306class DS_1A_Off8_RET <string opName, RegisterClass rc = VGPR_32>
307: DS_Pseudo<opName,
308  (outs getLdStRegisterOperand<rc>.ret:$vdst),
309  (ins VGPR_32:$addr, Offset0:$offset0, Offset1:$offset1, gds:$gds),
310  " $vdst, $addr$offset0$offset1$gds"> {
311
312  let has_offset = 0;
313  let has_data0 = 0;
314  let has_data1 = 0;
315}
316
317multiclass DS_1A_Off8_RET_mc <string opName, RegisterClass rc = VGPR_32> {
318  def "" : DS_1A_Off8_RET<opName, rc>;
319
320  let has_m0_read = 0 in {
321    def _gfx9 : DS_1A_Off8_RET<opName, rc>;
322  }
323}
324
325class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
326  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
327  (ins VGPR_32:$addr, Offset:$offset),
328  " $vdst, $addr$offset gds"> {
329
330  let has_data0 = 0;
331  let has_data1 = 0;
332  let has_gds = 0;
333  let gdsValue = 1;
334}
335
336class DS_0A_RET <string opName> : DS_Pseudo<opName,
337  (outs getLdStRegisterOperand<VGPR_32>.ret:$vdst),
338  (ins Offset:$offset, gds:$gds),
339  " $vdst$offset$gds"> {
340
341  let mayLoad = 1;
342  let mayStore = 1;
343
344  let has_addr = 0;
345  let has_data0 = 0;
346  let has_data1 = 0;
347}
348
349class DS_1A <string opName> : DS_Pseudo<opName,
350  (outs),
351  (ins VGPR_32:$addr, Offset:$offset, gds:$gds),
352  " $addr$offset$gds"> {
353
354  let mayLoad = 1;
355  let mayStore = 1;
356
357  let has_vdst = 0;
358  let has_data0 = 0;
359  let has_data1 = 0;
360}
361
362multiclass DS_1A_mc <string opName> {
363  def "" : DS_1A<opName>;
364
365  let has_m0_read = 0 in {
366    def _gfx9 : DS_1A<opName>;
367  }
368}
369
370
371class DS_GWS <string opName, dag ins, string asmOps>
372: DS_Pseudo<opName, (outs), ins, asmOps> {
373  let GWS = 1;
374
375  let has_vdst  = 0;
376  let has_addr  = 0;
377  let has_data0 = 0;
378  let has_data1 = 0;
379
380  let has_gds   = 0;
381  let gdsValue  = 1;
382}
383
384class DS_GWS_0D <string opName>
385: DS_GWS<opName,
386  (ins Offset:$offset), "$offset gds"> {
387  let hasSideEffects = 1;
388}
389
390class DS_GWS_1D <string opName>
391: DS_GWS<opName,
392  (ins getLdStRegisterOperand<VGPR_32>.ret:$data0, Offset:$offset),
393  " $data0$offset gds"> {
394
395  let has_gws_data0 = 1;
396  let hasSideEffects = 1;
397}
398
399class DS_VOID <string opName> : DS_Pseudo<opName,
400  (outs), (ins), ""> {
401  let mayLoad = 0;
402  let mayStore = 0;
403  let hasSideEffects = 1;
404  let UseNamedOperandTable = 0;
405
406  let has_vdst = 0;
407  let has_addr = 0;
408  let has_data0 = 0;
409  let has_data1 = 0;
410  let has_offset = 0;
411  let has_offset0 = 0;
412  let has_offset1 = 0;
413  let has_gds = 0;
414}
415
416class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag,
417                       RegisterOperand data_op = getLdStRegisterOperand<VGPR_32>.ret>
418: DS_Pseudo<opName,
419  (outs data_op:$vdst),
420  (ins VGPR_32:$addr, data_op:$data0, Offset:$offset),
421  " $vdst, $addr, $data0$offset",
422  [(set i32:$vdst,
423   (node (DS1Addr1Offset i32:$addr, i32:$offset), i32:$data0))] > {
424
425  let mayLoad = 0;
426  let mayStore = 0;
427  let isConvergent = 1;
428
429  let has_data1 = 0;
430  let has_gds = 0;
431}
432
433class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0,
434  bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
435  (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))> {
436  let AddedComplexity = complexity;
437}
438
439defm DS_ADD_U32       : DS_1A1D_NORET_mc<"ds_add_u32">;
440defm DS_SUB_U32       : DS_1A1D_NORET_mc<"ds_sub_u32">;
441defm DS_RSUB_U32      : DS_1A1D_NORET_mc<"ds_rsub_u32">;
442defm DS_INC_U32       : DS_1A1D_NORET_mc<"ds_inc_u32">;
443defm DS_DEC_U32       : DS_1A1D_NORET_mc<"ds_dec_u32">;
444defm DS_MIN_I32       : DS_1A1D_NORET_mc<"ds_min_i32">;
445defm DS_MAX_I32       : DS_1A1D_NORET_mc<"ds_max_i32">;
446defm DS_MIN_U32       : DS_1A1D_NORET_mc<"ds_min_u32">;
447defm DS_MAX_U32       : DS_1A1D_NORET_mc<"ds_max_u32">;
448defm DS_AND_B32       : DS_1A1D_NORET_mc<"ds_and_b32">;
449defm DS_OR_B32        : DS_1A1D_NORET_mc<"ds_or_b32">;
450defm DS_XOR_B32       : DS_1A1D_NORET_mc<"ds_xor_b32">;
451
452let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
453defm DS_ADD_F32       : DS_1A1D_NORET_mc<"ds_add_f32">;
454}
455
456defm DS_MIN_F32       : DS_1A1D_NORET_mc<"ds_min_f32">;
457defm DS_MAX_F32       : DS_1A1D_NORET_mc<"ds_max_f32">;
458
459let mayLoad = 0 in {
460defm DS_WRITE_B8      : DS_1A1D_NORET_mc<"ds_write_b8">;
461defm DS_WRITE_B16     : DS_1A1D_NORET_mc<"ds_write_b16">;
462defm DS_WRITE_B32     : DS_1A1D_NORET_mc<"ds_write_b32">;
463defm DS_WRITE2_B32    : DS_1A2D_Off8_NORET_mc<"ds_write2_b32">;
464defm DS_WRITE2ST64_B32: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b32">;
465
466
467let has_m0_read = 0 in {
468
469let SubtargetPredicate = HasD16LoadStore in {
470def DS_WRITE_B8_D16_HI  : DS_1A1D_NORET<"ds_write_b8_d16_hi">;
471def DS_WRITE_B16_D16_HI : DS_1A1D_NORET<"ds_write_b16_d16_hi">;
472}
473
474} // End has_m0_read = 0
475
476let SubtargetPredicate = HasDSAddTid in {
477def DS_WRITE_ADDTID_B32 : DS_0A1D_NORET<"ds_write_addtid_b32">;
478}
479
480} // End mayLoad = 0
481
482let SubtargetPredicate = HasLdsAtomicAddF64 in {
483  defm DS_ADD_F64     : DS_1A1D_NORET_mc_gfx9<"ds_add_f64", VReg_64>;
484  defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64>;
485} // End SubtargetPredicate = HasLdsAtomicAddF64
486
487let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
488  defm DS_PK_ADD_F16      : DS_1A1D_NORET_mc<"ds_pk_add_f16">;
489  defm DS_PK_ADD_RTN_F16  : DS_1A1D_RET_mc<"ds_pk_add_rtn_f16", VGPR_32>;
490  defm DS_PK_ADD_BF16     : DS_1A1D_NORET_mc<"ds_pk_add_bf16">;
491  defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc<"ds_pk_add_rtn_bf16", VGPR_32>;
492} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
493
494defm DS_CMPSTORE_B32     : DS_1A2D_NORET_mc<"ds_cmpstore_b32">;
495defm DS_CMPSTORE_F32     : DS_1A2D_NORET_mc<"ds_cmpstore_f32">;
496defm DS_CMPSTORE_B64     : DS_1A2D_NORET_mc<"ds_cmpstore_b64", VReg_64>;
497defm DS_CMPSTORE_F64     : DS_1A2D_NORET_mc<"ds_cmpstore_f64", VReg_64>;
498defm DS_CMPSTORE_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b32", VGPR_32>;
499defm DS_CMPSTORE_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f32", VGPR_32>;
500defm DS_CMPSTORE_RTN_B64  : DS_1A2D_RET_mc<"ds_cmpstore_rtn_b64", VReg_64>;
501defm DS_CMPSTORE_RTN_F64  : DS_1A2D_RET_mc<"ds_cmpstore_rtn_f64", VReg_64>;
502
503defm DS_MSKOR_B32     : DS_1A2D_NORET_mc<"ds_mskor_b32">;
504defm DS_CMPST_B32     : DS_1A2D_NORET_mc<"ds_cmpst_b32">;
505defm DS_CMPST_F32     : DS_1A2D_NORET_mc<"ds_cmpst_f32">;
506
507defm DS_ADD_U64       : DS_1A1D_NORET_mc<"ds_add_u64", VReg_64>;
508defm DS_SUB_U64       : DS_1A1D_NORET_mc<"ds_sub_u64", VReg_64>;
509defm DS_RSUB_U64      : DS_1A1D_NORET_mc<"ds_rsub_u64", VReg_64>;
510defm DS_INC_U64       : DS_1A1D_NORET_mc<"ds_inc_u64", VReg_64>;
511defm DS_DEC_U64       : DS_1A1D_NORET_mc<"ds_dec_u64", VReg_64>;
512defm DS_MIN_I64       : DS_1A1D_NORET_mc<"ds_min_i64", VReg_64>;
513defm DS_MAX_I64       : DS_1A1D_NORET_mc<"ds_max_i64", VReg_64>;
514defm DS_MIN_U64       : DS_1A1D_NORET_mc<"ds_min_u64", VReg_64>;
515defm DS_MAX_U64       : DS_1A1D_NORET_mc<"ds_max_u64", VReg_64>;
516defm DS_AND_B64       : DS_1A1D_NORET_mc<"ds_and_b64", VReg_64>;
517defm DS_OR_B64        : DS_1A1D_NORET_mc<"ds_or_b64", VReg_64>;
518defm DS_XOR_B64       : DS_1A1D_NORET_mc<"ds_xor_b64", VReg_64>;
519defm DS_MSKOR_B64     : DS_1A2D_NORET_mc<"ds_mskor_b64", VReg_64>;
520let mayLoad = 0 in {
521defm DS_WRITE_B64     : DS_1A1D_NORET_mc<"ds_write_b64", VReg_64>;
522defm DS_WRITE2_B64    : DS_1A2D_Off8_NORET_mc<"ds_write2_b64", VReg_64>;
523defm DS_WRITE2ST64_B64: DS_1A2D_Off8_NORET_mc<"ds_write2st64_b64", VReg_64>;
524}
525defm DS_CMPST_B64     : DS_1A2D_NORET_mc<"ds_cmpst_b64", VReg_64>;
526defm DS_CMPST_F64     : DS_1A2D_NORET_mc<"ds_cmpst_f64", VReg_64>;
527defm DS_MIN_F64       : DS_1A1D_NORET_mc<"ds_min_f64", VReg_64>;
528defm DS_MAX_F64       : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>;
529
530defm DS_ADD_RTN_U32   : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32>;
531
532let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
533defm DS_ADD_RTN_F32   : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32>;
534}
535defm DS_SUB_RTN_U32   : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32>;
536defm DS_RSUB_RTN_U32  : DS_1A1D_RET_mc<"ds_rsub_rtn_u32", VGPR_32>;
537defm DS_INC_RTN_U32   : DS_1A1D_RET_mc<"ds_inc_rtn_u32", VGPR_32>;
538defm DS_DEC_RTN_U32   : DS_1A1D_RET_mc<"ds_dec_rtn_u32", VGPR_32>;
539defm DS_MIN_RTN_I32   : DS_1A1D_RET_mc<"ds_min_rtn_i32", VGPR_32>;
540defm DS_MAX_RTN_I32   : DS_1A1D_RET_mc<"ds_max_rtn_i32", VGPR_32>;
541defm DS_MIN_RTN_U32   : DS_1A1D_RET_mc<"ds_min_rtn_u32", VGPR_32>;
542defm DS_MAX_RTN_U32   : DS_1A1D_RET_mc<"ds_max_rtn_u32", VGPR_32>;
543defm DS_AND_RTN_B32   : DS_1A1D_RET_mc<"ds_and_rtn_b32", VGPR_32>;
544defm DS_OR_RTN_B32    : DS_1A1D_RET_mc<"ds_or_rtn_b32", VGPR_32>;
545defm DS_XOR_RTN_B32   : DS_1A1D_RET_mc<"ds_xor_rtn_b32", VGPR_32>;
546defm DS_MSKOR_RTN_B32 : DS_1A2D_RET_mc<"ds_mskor_rtn_b32", VGPR_32>;
547defm DS_CMPST_RTN_B32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_b32", VGPR_32>;
548defm DS_CMPST_RTN_F32 : DS_1A2D_RET_mc<"ds_cmpst_rtn_f32", VGPR_32>;
549defm DS_MIN_RTN_F32   : DS_1A1D_RET_mc<"ds_min_rtn_f32", VGPR_32>;
550defm DS_MAX_RTN_F32   : DS_1A1D_RET_mc<"ds_max_rtn_f32", VGPR_32>;
551
552defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b32">;
553defm DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>;
554defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>;
555
556defm DS_ADD_RTN_U64  : DS_1A1D_RET_mc<"ds_add_rtn_u64", VReg_64>;
557defm DS_SUB_RTN_U64  : DS_1A1D_RET_mc<"ds_sub_rtn_u64", VReg_64>;
558defm DS_RSUB_RTN_U64  : DS_1A1D_RET_mc<"ds_rsub_rtn_u64", VReg_64>;
559defm DS_INC_RTN_U64   : DS_1A1D_RET_mc<"ds_inc_rtn_u64", VReg_64>;
560defm DS_DEC_RTN_U64   : DS_1A1D_RET_mc<"ds_dec_rtn_u64", VReg_64>;
561defm DS_MIN_RTN_I64    : DS_1A1D_RET_mc<"ds_min_rtn_i64", VReg_64>;
562defm DS_MAX_RTN_I64    : DS_1A1D_RET_mc<"ds_max_rtn_i64", VReg_64>;
563defm DS_MIN_RTN_U64   : DS_1A1D_RET_mc<"ds_min_rtn_u64", VReg_64>;
564defm DS_MAX_RTN_U64   : DS_1A1D_RET_mc<"ds_max_rtn_u64", VReg_64>;
565defm DS_AND_RTN_B64    : DS_1A1D_RET_mc<"ds_and_rtn_b64", VReg_64>;
566defm DS_OR_RTN_B64     : DS_1A1D_RET_mc<"ds_or_rtn_b64", VReg_64>;
567defm DS_XOR_RTN_B64    : DS_1A1D_RET_mc<"ds_xor_rtn_b64", VReg_64>;
568defm DS_MSKOR_RTN_B64  : DS_1A2D_RET_mc<"ds_mskor_rtn_b64", VReg_64>;
569defm DS_CMPST_RTN_B64  : DS_1A2D_RET_mc<"ds_cmpst_rtn_b64", VReg_64>;
570defm DS_CMPST_RTN_F64  : DS_1A2D_RET_mc<"ds_cmpst_rtn_f64", VReg_64>;
571defm DS_MIN_RTN_F64    : DS_1A1D_RET_mc<"ds_min_rtn_f64", VReg_64>;
572defm DS_MAX_RTN_F64    : DS_1A1D_RET_mc<"ds_max_rtn_f64", VReg_64>;
573
574defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>;
575defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>;
576defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>;
577
578let isConvergent = 1, usesCustomInserter = 1 in {
579def DS_GWS_INIT       : DS_GWS_1D<"ds_gws_init"> {
580  let mayLoad = 0;
581}
582def DS_GWS_SEMA_V     : DS_GWS_0D<"ds_gws_sema_v">;
583def DS_GWS_SEMA_BR    : DS_GWS_1D<"ds_gws_sema_br">;
584def DS_GWS_SEMA_P     : DS_GWS_0D<"ds_gws_sema_p">;
585def DS_GWS_BARRIER    : DS_GWS_1D<"ds_gws_barrier">;
586}
587
588let SubtargetPredicate = HasDsSrc2Insts in {
589def DS_ADD_SRC2_U32   : DS_1A<"ds_add_src2_u32">;
590def DS_SUB_SRC2_U32   : DS_1A<"ds_sub_src2_u32">;
591def DS_RSUB_SRC2_U32  : DS_1A<"ds_rsub_src2_u32">;
592def DS_INC_SRC2_U32   : DS_1A<"ds_inc_src2_u32">;
593def DS_DEC_SRC2_U32   : DS_1A<"ds_dec_src2_u32">;
594def DS_MIN_SRC2_I32   : DS_1A<"ds_min_src2_i32">;
595def DS_MAX_SRC2_I32   : DS_1A<"ds_max_src2_i32">;
596def DS_MIN_SRC2_U32   : DS_1A<"ds_min_src2_u32">;
597def DS_MAX_SRC2_U32   : DS_1A<"ds_max_src2_u32">;
598def DS_AND_SRC2_B32   : DS_1A<"ds_and_src2_b32">;
599def DS_OR_SRC2_B32    : DS_1A<"ds_or_src2_b32">;
600def DS_XOR_SRC2_B32   : DS_1A<"ds_xor_src2_b32">;
601def DS_MIN_SRC2_F32   : DS_1A<"ds_min_src2_f32">;
602def DS_MAX_SRC2_F32   : DS_1A<"ds_max_src2_f32">;
603
604def DS_ADD_SRC2_U64   : DS_1A<"ds_add_src2_u64">;
605def DS_SUB_SRC2_U64   : DS_1A<"ds_sub_src2_u64">;
606def DS_RSUB_SRC2_U64  : DS_1A<"ds_rsub_src2_u64">;
607def DS_INC_SRC2_U64   : DS_1A<"ds_inc_src2_u64">;
608def DS_DEC_SRC2_U64   : DS_1A<"ds_dec_src2_u64">;
609def DS_MIN_SRC2_I64   : DS_1A<"ds_min_src2_i64">;
610def DS_MAX_SRC2_I64   : DS_1A<"ds_max_src2_i64">;
611def DS_MIN_SRC2_U64   : DS_1A<"ds_min_src2_u64">;
612def DS_MAX_SRC2_U64   : DS_1A<"ds_max_src2_u64">;
613def DS_AND_SRC2_B64   : DS_1A<"ds_and_src2_b64">;
614def DS_OR_SRC2_B64    : DS_1A<"ds_or_src2_b64">;
615def DS_XOR_SRC2_B64   : DS_1A<"ds_xor_src2_b64">;
616def DS_MIN_SRC2_F64   : DS_1A<"ds_min_src2_f64">;
617def DS_MAX_SRC2_F64   : DS_1A<"ds_max_src2_f64">;
618
619def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
620def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
621} // End SubtargetPredicate = HasDsSrc2Insts
622
623let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
624def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>;
625}
626
627let mayStore = 0 in {
628defm DS_READ_I8      : DS_1A_RET_mc<"ds_read_i8">;
629defm DS_READ_U8      : DS_1A_RET_mc<"ds_read_u8">;
630defm DS_READ_I16     : DS_1A_RET_mc<"ds_read_i16">;
631defm DS_READ_U16     : DS_1A_RET_mc<"ds_read_u16">;
632defm DS_READ_B32     : DS_1A_RET_mc<"ds_read_b32">;
633defm DS_READ_B64     : DS_1A_RET_mc<"ds_read_b64", VReg_64>;
634
635defm DS_READ2_B32    : DS_1A_Off8_RET_mc<"ds_read2_b32", VReg_64>;
636defm DS_READ2ST64_B32: DS_1A_Off8_RET_mc<"ds_read2st64_b32", VReg_64>;
637
638defm DS_READ2_B64    : DS_1A_Off8_RET_mc<"ds_read2_b64", VReg_128>;
639defm DS_READ2ST64_B64: DS_1A_Off8_RET_mc<"ds_read2st64_b64", VReg_128>;
640
641let has_m0_read = 0 in {
642let SubtargetPredicate = HasD16LoadStore, TiedSourceNotRead = 1 in {
643def DS_READ_U8_D16     : DS_1A_RET_Tied<"ds_read_u8_d16">;
644def DS_READ_U8_D16_HI  : DS_1A_RET_Tied<"ds_read_u8_d16_hi">;
645def DS_READ_I8_D16     : DS_1A_RET_Tied<"ds_read_i8_d16">;
646def DS_READ_I8_D16_HI  : DS_1A_RET_Tied<"ds_read_i8_d16_hi">;
647def DS_READ_U16_D16    : DS_1A_RET_Tied<"ds_read_u16_d16">;
648def DS_READ_U16_D16_HI : DS_1A_RET_Tied<"ds_read_u16_d16_hi">;
649}
650} // End has_m0_read = 0
651
652let SubtargetPredicate = HasDSAddTid in {
653def DS_READ_ADDTID_B32 : DS_0A_RET<"ds_read_addtid_b32">;
654}
655
656} // End mayStore = 0
657
658def DS_CONSUME       : DS_0A_RET<"ds_consume">;
659def DS_APPEND        : DS_0A_RET<"ds_append">;
660
661let SubtargetPredicate = isNotGFX90APlus in
662def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
663
664//===----------------------------------------------------------------------===//
665// Instruction definitions for CI and newer.
666//===----------------------------------------------------------------------===//
667
668let SubtargetPredicate = isGFX7Plus in {
669
670defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>;
671defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>;
672
673let isConvergent = 1, usesCustomInserter = 1 in {
674def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
675}
676
677let mayStore = 0 in {
678defm DS_READ_B96 : DS_1A_RET_mc<"ds_read_b96", VReg_96>;
679defm DS_READ_B128: DS_1A_RET_mc<"ds_read_b128", VReg_128>;
680} // End mayStore = 0
681
682let mayLoad = 0 in {
683defm DS_WRITE_B96 : DS_1A1D_NORET_mc<"ds_write_b96", VReg_96>;
684defm DS_WRITE_B128 : DS_1A1D_NORET_mc<"ds_write_b128", VReg_128>;
685} // End mayLoad = 0
686
687def DS_NOP : DS_VOID<"ds_nop">;
688
689} // let SubtargetPredicate = isGFX7Plus
690
691//===----------------------------------------------------------------------===//
692// Instruction definitions for VI and newer.
693//===----------------------------------------------------------------------===//
694
695let SubtargetPredicate = isGFX8Plus in {
696
697let Uses = [EXEC] in {
698def DS_PERMUTE_B32  : DS_1A1D_PERMUTE <"ds_permute_b32",
699                                       int_amdgcn_ds_permute>;
700def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
701                                       int_amdgcn_ds_bpermute>;
702}
703
704} // let SubtargetPredicate = isGFX8Plus
705
706let SubtargetPredicate = HasLDSFPAtomicAddF32, OtherPredicates = [HasDsSrc2Insts] in {
707def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
708}
709
710
711//===----------------------------------------------------------------------===//
712// Instruction definitions for GFX11.
713//===----------------------------------------------------------------------===//
714
715let SubtargetPredicate = isGFX11Only in {
716
717def DS_ADD_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_add_gs_reg_rtn", VReg_64, VGPR_32>;
718def DS_SUB_GS_REG_RTN : DS_0A1D_RET_GDS<"ds_sub_gs_reg_rtn", VReg_64, VGPR_32>;
719
720} // let SubtargetPredicate = isGFX11Only
721
722let SubtargetPredicate = isGFX11Plus in {
723
724let OtherPredicates = [HasImageInsts] in
725def DS_BVH_STACK_RTN_B32 : DS_BVH_STACK<"ds_bvh_stack_rtn_b32">;
726
727} // let SubtargetPredicate = isGFX11Plus
728
729//===----------------------------------------------------------------------===//
730// Instruction definitions for GFX12 and newer.
731//===----------------------------------------------------------------------===//
732
733let SubtargetPredicate = isGFX12Plus in {
734
735defm DS_COND_SUB_U32      : DS_1A1D_NORET_mc<"ds_cond_sub_u32">;
736defm DS_COND_SUB_RTN_U32  : DS_1A1D_RET_mc<"ds_cond_sub_rtn_u32", VGPR_32>;
737defm DS_SUB_CLAMP_U32     : DS_1A1D_NORET_mc<"ds_sub_clamp_u32">;
738defm DS_SUB_CLAMP_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_clamp_rtn_u32", VGPR_32>;
739def DS_BPERMUTE_FI_B32    : DS_1A1D_PERMUTE <"ds_bpermute_fi_b32",
740                                             int_amdgcn_ds_bpermute_fi_b32>;
741
742multiclass DSAtomicRetNoRetPatIntrinsic_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
743                                  ValueType vt, string frag> {
744  def : DSAtomicRetPat<inst, vt,
745                        !cast<PatFrag>(frag#"_local_addrspace")>;
746
747  let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
748    def : DSAtomicRetPat<noRetInst, vt,
749                          !cast<PatFrag>(frag#"_noret_local_addrspace"), /* complexity */ 1>;
750}
751
752defm : DSAtomicRetNoRetPatIntrinsic_mc<DS_COND_SUB_RTN_U32, DS_COND_SUB_U32, i32, "int_amdgcn_atomic_cond_sub_u32">;
753} // let SubtargetPredicate = isGFX12Plus
754
755let WaveSizePredicate = isWave64, SubtargetPredicate = HasGFX950Insts, mayStore = 0 in {
756  defm DS_READ_B64_TR_B4  : DS_1A_RET_NoM0<"ds_read_b64_tr_b4", VReg_64>;
757  defm DS_READ_B64_TR_B8  : DS_1A_RET_NoM0<"ds_read_b64_tr_b8", VReg_64>;
758  defm DS_READ_B64_TR_B16 : DS_1A_RET_NoM0<"ds_read_b64_tr_b16", VReg_64>;
759  defm DS_READ_B96_TR_B6  : DS_1A_RET_NoM0<"ds_read_b96_tr_b6", VReg_96>;
760}
761
762//===----------------------------------------------------------------------===//
763// DS Patterns
764//===----------------------------------------------------------------------===//
765
766def : GCNPat <
767  (int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
768  (DS_SWIZZLE_B32 VGPR_32:$src, (as_i16timm $offset16), (i1 0))
769>;
770
771class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
772  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
773  (inst $ptr, Offset:$offset, (i1 gds))
774>;
775
776multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
777
778  let OtherPredicates = [LDSRequiresM0Init] in {
779    def : DSReadPat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
780  }
781
782  let OtherPredicates = [NotLDSRequiresM0Init] in {
783    def : DSReadPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
784  }
785}
786
787class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
788  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
789  (inst $ptr, Offset:$offset, (i1 0), $in)
790>;
791
792defm : DSReadPat_mc <DS_READ_I8, i32, "sextloadi8_local">;
793defm : DSReadPat_mc <DS_READ_I8,  i16, "sextloadi8_local">;
794defm : DSReadPat_mc <DS_READ_U8,  i32, "extloadi8_local">;
795defm : DSReadPat_mc <DS_READ_U8,  i32, "zextloadi8_local">;
796defm : DSReadPat_mc <DS_READ_U8,  i16, "extloadi8_local">;
797defm : DSReadPat_mc <DS_READ_U8,  i16, "zextloadi8_local">;
798defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
799defm : DSReadPat_mc <DS_READ_I16, i32, "sextloadi16_local">;
800defm : DSReadPat_mc <DS_READ_U16, i32, "extloadi16_local">;
801defm : DSReadPat_mc <DS_READ_U16, i32, "zextloadi16_local">;
802defm : DSReadPat_mc <DS_READ_U16, i16, "load_local">;
803
804foreach vt = Reg32Types.types in {
805defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
806}
807
808defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
809defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
810defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_zext_8_local">;
811defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_zext_8_local">;
812defm : DSReadPat_mc <DS_READ_I8, i16, "atomic_load_sext_8_local">;
813defm : DSReadPat_mc <DS_READ_I8, i32, "atomic_load_sext_8_local">;
814defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
815defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
816defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_zext_16_local">;
817defm : DSReadPat_mc <DS_READ_I16, i32, "atomic_load_sext_16_local">;
818defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
819defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
820
821let OtherPredicates = [D16PreservesUnusedBits] in {
822// TODO: Atomic loads
823def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
824def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
825def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2i16>;
826def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2f16>;
827def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2i16>;
828def : DSReadPat_D16<DS_READ_I8_D16_HI, sextloadi8_d16_hi_local, v2f16>;
829
830def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2i16>;
831def : DSReadPat_D16<DS_READ_U16_D16, load_d16_lo_local, v2f16>;
832def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2i16>;
833def : DSReadPat_D16<DS_READ_U8_D16, az_extloadi8_d16_lo_local, v2f16>;
834def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2i16>;
835def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2f16>;
836}
837
838class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
839  (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
840  (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))
841>;
842
843multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
844  let OtherPredicates = [LDSRequiresM0Init] in {
845    def : DSWritePat<inst, vt, !cast<PatFrag>(frag#"_m0")>;
846  }
847
848  let OtherPredicates = [NotLDSRequiresM0Init] in {
849    def : DSWritePat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt, !cast<PatFrag>(frag)>;
850  }
851}
852
853defm : DSWritePat_mc <DS_WRITE_B8, i32, "truncstorei8_local">;
854defm : DSWritePat_mc <DS_WRITE_B16, i32, "truncstorei16_local">;
855defm : DSWritePat_mc <DS_WRITE_B8, i16, "truncstorei8_local">;
856defm : DSWritePat_mc <DS_WRITE_B16, i16, "store_local">;
857
858foreach vt = Reg32Types.types in {
859defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
860}
861
862defm : DSWritePat_mc <DS_WRITE_B8, i16, "atomic_store_8_local">;
863defm : DSWritePat_mc <DS_WRITE_B8, i32, "atomic_store_8_local">;
864defm : DSWritePat_mc <DS_WRITE_B16, i16, "atomic_store_16_local">;
865defm : DSWritePat_mc <DS_WRITE_B16, i32, "atomic_store_16_local">;
866defm : DSWritePat_mc <DS_WRITE_B32, i32, "atomic_store_32_local">;
867defm : DSWritePat_mc <DS_WRITE_B64, i64, "atomic_store_64_local">;
868
869let OtherPredicates = [HasD16LoadStore] in {
870def : DSWritePat <DS_WRITE_B16_D16_HI, i32, store_hi16_local>;
871def : DSWritePat <DS_WRITE_B8_D16_HI, i32, truncstorei8_hi16_local>;
872}
873
874class DS64Bit4ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
875  (vt:$value (frag (DS64Bit4ByteAligned i32:$ptr, i32:$offset0, i32:$offset1))),
876  (inst $ptr, $offset0, $offset1, (i1 0))
877>;
878
879class DS64Bit4ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
880  (frag vt:$value, (DS64Bit4ByteAligned i32:$ptr, i32:$offset0, i32:$offset1)),
881  (inst $ptr, (i32 (EXTRACT_SUBREG VReg_64:$value, sub0)),
882              (i32 (EXTRACT_SUBREG VReg_64:$value, sub1)), $offset0, $offset1,
883              (i1 0))
884>;
885
886class DS128Bit8ByteAlignedReadPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
887  (vt:$value (frag (DS128Bit8ByteAligned i32:$ptr, i32:$offset0, i32:$offset1))),
888  (inst $ptr, $offset0, $offset1, (i1 0))
889>;
890
891class DS128Bit8ByteAlignedWritePat<DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat<
892  (frag vt:$value, (DS128Bit8ByteAligned i32:$ptr, i32:$offset0, i32:$offset1)),
893  (inst $ptr, (i64 (EXTRACT_SUBREG VReg_128:$value, sub0_sub1)),
894              (i64 (EXTRACT_SUBREG VReg_128:$value, sub2_sub3)), $offset0, $offset1,
895              (i1 0))
896>;
897
898multiclass DS64Bit4ByteAlignedPat_mc<ValueType vt> {
899  let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
900    def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32, vt, load_local_m0>;
901    def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32, vt, store_local_m0>;
902  }
903
904  let OtherPredicates = [NotLDSRequiresM0Init] in {
905    def : DS64Bit4ByteAlignedReadPat<DS_READ2_B32_gfx9, vt, load_local>;
906    def : DS64Bit4ByteAlignedWritePat<DS_WRITE2_B32_gfx9, vt, store_local>;
907  }
908}
909
910multiclass DS128Bit8ByteAlignedPat_mc<ValueType vt> {
911  let OtherPredicates = [LDSRequiresM0Init, isGFX7Plus] in {
912    def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64, vt, load_local_m0>;
913    def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64, vt, store_local_m0>;
914  }
915
916  let OtherPredicates = [NotLDSRequiresM0Init] in {
917    def : DS128Bit8ByteAlignedReadPat<DS_READ2_B64_gfx9, vt, load_local>;
918    def : DS128Bit8ByteAlignedWritePat<DS_WRITE2_B64_gfx9, vt, store_local>;
919  }
920}
921
922// v2i32 loads are split into i32 loads on SI during lowering, due to a bug
923// related to bounds checking.
924foreach vt = VReg_64.RegTypes in {
925defm : DS64Bit4ByteAlignedPat_mc<vt>;
926}
927
928foreach vt = VReg_128.RegTypes in {
929defm : DS128Bit8ByteAlignedPat_mc<vt>;
930}
931
932// Prefer ds_read over ds_read2 and ds_write over ds_write2, all other things
933// being equal, because it has a larger immediate offset range.
934let AddedComplexity = 100 in {
935
936foreach vt = VReg_64.RegTypes in {
937defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
938defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">;
939}
940
941let SubtargetPredicate = isGFX7Plus in {
942
943foreach vt = VReg_96.RegTypes in {
944defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
945defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">;
946}
947
948foreach vt = VReg_128.RegTypes in {
949defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
950defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
951}
952
953let SubtargetPredicate = HasUnalignedAccessMode in {
954
955// Select 64 bit loads and stores aligned less than 4 as a single ds_read_b64/
956// ds_write_b64 instruction as this is faster than ds_read2_b32/ds_write2_b32
957// which would be used otherwise. In this case a b32 access would still be
958// misaligned, but we will have 2 of them.
959foreach vt = VReg_64.RegTypes in {
960defm : DSReadPat_mc <DS_READ_B64, vt, "load_align_less_than_4_local">;
961defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align_less_than_4_local">;
962}
963
964// Selection will split most of the unaligned 3 dword accesses due to performance
965// reasons when beneficial. Keep these two patterns for the rest of the cases.
966foreach vt = VReg_96.RegTypes in {
967defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
968defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">;
969}
970
971// Select 128 bit loads and stores aligned less than 4 as a single ds_read_b128/
972// ds_write_b128 instruction as this is faster than ds_read2_b64/ds_write2_b64
973// which would be used otherwise. In this case a b64 access would still be
974// misaligned, but we will have 2 of them.
975foreach vt = VReg_128.RegTypes in {
976defm : DSReadPat_mc <DS_READ_B128, vt, "load_align_less_than_4_local">;
977defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align_less_than_4_local">;
978}
979
980} // End SubtargetPredicate = HasUnalignedAccessMode
981
982} // End SubtargetPredicate = isGFX7Plus
983
984} // End AddedComplexity = 100
985
986multiclass DSAtomicRetPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
987  let OtherPredicates = [LDSRequiresM0Init] in {
988    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt)>;
989  }
990
991  let OtherPredicates = [NotLDSRequiresM0Init] in {
992    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
993                         !cast<PatFrag>(frag#"_local_"#vt)>;
994  }
995
996  let OtherPredicates = [HasGDS] in {
997    def : DSAtomicRetPat<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
998                         /* complexity */ 0, /* gds */ 1>;
999  }
1000}
1001
1002multiclass DSAtomicRetNoRetPat_mc<DS_Pseudo inst, DS_Pseudo noRetInst,
1003                                  ValueType vt, string frag> {
1004  let OtherPredicates = [LDSRequiresM0Init] in {
1005    def : DSAtomicRetPat<inst, vt,
1006                         !cast<PatFrag>(frag#"_local_m0_"#vt)>;
1007    def : DSAtomicRetPat<noRetInst, vt,
1008                         !cast<PatFrag>(frag#"_local_m0_noret_"#vt), /* complexity */ 1>;
1009  }
1010
1011  let OtherPredicates = [NotLDSRequiresM0Init] in {
1012    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1013                         !cast<PatFrag>(frag#"_local_"#vt)>;
1014    def : DSAtomicRetPat<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1015                         !cast<PatFrag>(frag#"_local_noret_"#vt), /* complexity */ 1>;
1016  }
1017
1018  let OtherPredicates = [HasGDS] in {
1019    def : DSAtomicRetPat<inst, vt,
1020                         !cast<PatFrag>(frag#"_region_m0_"#vt),
1021                         /* complexity */ 0, /* gds */ 1>;
1022    def : DSAtomicRetPat<noRetInst, vt,
1023                         !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1024                         /* complexity */ 1, /* gds */ 1>;
1025  }
1026}
1027
1028
1029
1030let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1031// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
1032class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag,
1033  int complexity = 0, bit gds=0> : GCNPat<
1034  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1035  (inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, Offset:$offset, (i1 gds))> {
1036  let AddedComplexity = complexity;
1037}
1038
1039multiclass DSAtomicCmpXChgSwapped_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
1040                                     string frag> {
1041  let OtherPredicates = [LDSRequiresM0Init] in {
1042    def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_local_m0_"#vt)>;
1043    def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_local_m0_noret_"#vt),
1044                                 /* complexity */ 1>;
1045  }
1046
1047  let OtherPredicates = [NotLDSRequiresM0Init] in {
1048    def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1049                                 !cast<PatFrag>(frag#"_local_"#vt)>;
1050    def : DSAtomicCmpXChgSwapped<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1051                                 !cast<PatFrag>(frag#"_local_noret_"#vt),
1052                                 /* complexity */ 1>;
1053  }
1054
1055  let OtherPredicates = [HasGDS] in {
1056    def : DSAtomicCmpXChgSwapped<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
1057                                 /* complexity */ 0, /* gds */ 1>;
1058    def : DSAtomicCmpXChgSwapped<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1059                                 /* complexity */ 1, /* gds */ 1>;
1060  }
1061}
1062} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1063
1064let SubtargetPredicate = isGFX11Plus in {
1065// The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode.
1066class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag,
1067  int complexity = 0, bit gds=0> : GCNPat<
1068  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
1069  (inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, Offset:$offset, (i1 gds))> {
1070  let AddedComplexity = complexity;
1071}
1072
1073multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt, string frag> {
1074
1075  def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(inst)#"_gfx9"), vt,
1076                        !cast<PatFrag>(frag#"_local_"#vt)>;
1077  def : DSAtomicCmpXChg<!cast<DS_Pseudo>(!cast<string>(noRetInst)#"_gfx9"), vt,
1078                        !cast<PatFrag>(frag#"_local_noret_"#vt), /* complexity */ 1>;
1079
1080  let OtherPredicates = [HasGDS] in {
1081    def : DSAtomicCmpXChg<inst, vt, !cast<PatFrag>(frag#"_region_m0_"#vt),
1082                          /* complexity */ 0, /* gds */ 1>;
1083    def : DSAtomicCmpXChg<noRetInst, vt, !cast<PatFrag>(frag#"_region_m0_noret_"#vt),
1084                          /* complexity */ 1, /* gds */ 1>;
1085  }
1086}
1087} // End SubtargetPredicate = isGFX11Plus
1088
1089// 32-bit atomics.
1090defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
1091defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U32, DS_ADD_U32, i32, "atomic_load_add">;
1092defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U32, DS_SUB_U32, i32, "atomic_load_sub">;
1093defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_load_uinc_wrap">;
1094defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_load_udec_wrap">;
1095defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B32, DS_AND_B32, i32, "atomic_load_and">;
1096defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B32, DS_OR_B32, i32, "atomic_load_or">;
1097defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B32, DS_XOR_B32, i32, "atomic_load_xor">;
1098defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I32, DS_MIN_I32, i32, "atomic_load_min">;
1099defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I32, DS_MAX_I32, i32, "atomic_load_max">;
1100defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U32, DS_MIN_U32, i32, "atomic_load_umin">;
1101defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U32, DS_MAX_U32, i32, "atomic_load_umax">;
1102defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F32, DS_MIN_F32, f32, "atomic_load_fmin">;
1103defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F32, DS_MAX_F32, f32, "atomic_load_fmax">;
1104
1105
1106let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
1107defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_load_fadd">;
1108defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_BF16, DS_PK_ADD_BF16, v2bf16, "atomic_load_fadd">;
1109}
1110
1111let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1112defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B32, DS_CMPST_B32, i32, "atomic_cmp_swap">;
1113}
1114
1115let SubtargetPredicate = isGFX11Plus in {
1116defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B32, DS_CMPSTORE_B32, i32, "atomic_cmp_swap">;
1117}
1118
1119let SubtargetPredicate = HasLDSFPAtomicAddF32 in {
1120defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_F32, DS_ADD_F32, f32, "atomic_load_fadd">;
1121}
1122
1123// 64-bit atomics.
1124defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
1125defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U64, DS_ADD_U64, i64, "atomic_load_add">;
1126defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U64, DS_SUB_U64, i64, "atomic_load_sub">;
1127defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_load_uinc_wrap">;
1128defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_load_udec_wrap">;
1129defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B64, DS_AND_B64, i64, "atomic_load_and">;
1130defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B64, DS_OR_B64, i64, "atomic_load_or">;
1131defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B64, DS_XOR_B64, i64, "atomic_load_xor">;
1132defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_I64, DS_MIN_I64, i64, "atomic_load_min">;
1133defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_I64, DS_MAX_I64, i64, "atomic_load_max">;
1134defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_U64, DS_MIN_U64, i64, "atomic_load_umin">;
1135defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_U64, DS_MAX_U64, i64, "atomic_load_umax">;
1136defm : DSAtomicRetNoRetPat_mc<DS_MIN_RTN_F64, DS_MIN_F64, f64, "atomic_load_fmin">;
1137defm : DSAtomicRetNoRetPat_mc<DS_MAX_RTN_F64, DS_MAX_F64, f64, "atomic_load_fmax">;
1138
1139let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
1140defm : DSAtomicCmpXChgSwapped_mc<DS_CMPST_RTN_B64, DS_CMPST_B64, i64, "atomic_cmp_swap">;
1141} // End SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10
1142
1143let SubtargetPredicate = isGFX11Plus in {
1144defm : DSAtomicCmpXChg_mc<DS_CMPSTORE_RTN_B64, DS_CMPSTORE_B64, i64, "atomic_cmp_swap">;
1145} // End SubtargetPredicate = isGFX11Plus
1146
1147let SubtargetPredicate = HasLdsAtomicAddF64 in {
1148def : DSAtomicRetPat<DS_ADD_RTN_F64, f64, atomic_load_fadd_local_f64>;
1149let AddedComplexity = 1 in
1150def : DSAtomicRetPat<DS_ADD_F64, f64, atomic_load_fadd_local_noret_f64>;
1151
1152class DSAtomicRetPatIntrinsic<DS_Pseudo inst, ValueType vt, PatFrag frag,
1153  bit gds=0> : GCNPat <
1154  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value)),
1155  (inst $ptr, getVregSrcForVT<vt>.ret:$value, Offset:$offset, (i1 gds))> {
1156}
1157} // End SubtargetPredicate = HasLdsAtomicAddF64
1158
1159let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
1160defm : DSAtomicRetNoRetPat_mc<DS_PK_ADD_RTN_F16, DS_PK_ADD_F16, v2f16, "atomic_load_fadd">;
1161} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
1162
1163let OtherPredicates = [HasGDS] in
1164def : GCNPat <
1165  (SIds_ordered_count i32:$value, i16:$offset),
1166  (DS_ORDERED_COUNT $value, (as_i16imm $offset))
1167>;
1168
1169def : GCNPat <
1170  (i64 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1171  (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1172>;
1173
1174def : GCNPat <
1175  (i32 (int_amdgcn_ds_add_gs_reg_rtn i32:$src, timm:$offset32)),
1176  (EXTRACT_SUBREG
1177    (i64 (COPY_TO_REGCLASS
1178      (DS_ADD_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1179      VReg_64)),
1180    sub0)
1181>;
1182
1183def : GCNPat <
1184  (i64 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1185  (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32))
1186>;
1187
1188def : GCNPat <
1189  (i32 (int_amdgcn_ds_sub_gs_reg_rtn i32:$src, timm:$offset32)),
1190  (EXTRACT_SUBREG
1191    (i64 (COPY_TO_REGCLASS
1192      (DS_SUB_GS_REG_RTN VGPR_32:$src, (as_i32timm $offset32)),
1193      VReg_64)),
1194    sub0)
1195>;
1196
1197class DSLoadTrPat <DS_Pseudo inst, ValueType vt, SDPatternOperator node> : GCNPat <
1198  (vt (node (DS1Addr1Offset i32:$ptr, i32:$offset))),
1199  (inst $ptr, Offset:$offset, (i1 0))
1200>;
1201
1202let SubtargetPredicate = HasGFX950Insts in {
1203  def : DSLoadTrPat <DS_READ_B64_TR_B4,  v2i32, int_amdgcn_ds_read_tr4_b64>;
1204  def : DSLoadTrPat <DS_READ_B64_TR_B8,  v2i32, int_amdgcn_ds_read_tr8_b64>;
1205  def : DSLoadTrPat <DS_READ_B96_TR_B6,  v3i32, int_amdgcn_ds_read_tr6_b96>;
1206  def : DSLoadTrPat <DS_READ_B64_TR_B16, v4i16, int_amdgcn_ds_read_tr16_b64>;
1207  def : DSLoadTrPat <DS_READ_B64_TR_B16, v4f16, int_amdgcn_ds_read_tr16_b64>;
1208  def : DSLoadTrPat <DS_READ_B64_TR_B16, v4bf16, int_amdgcn_ds_read_tr16_b64>;
1209}
1210
1211//===----------------------------------------------------------------------===//
1212// Target-specific instruction encodings.
1213//===----------------------------------------------------------------------===//
1214
1215//===----------------------------------------------------------------------===//
1216// Base ENC_DS for GFX6, GFX7, GFX10, GFX11, GFX12.
1217//===----------------------------------------------------------------------===//
1218
1219class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op, DS_Pseudo ps, int ef,
1220                                               string opName = ps.Mnemonic,
1221                                               bit hasGDS = true>
1222    : DS_Real<ps, opName>, SIMCInstr <ps.PseudoInstr, ef> {
1223
1224  let Inst{7-0}   = !if(ps.has_offset0, offset0, 0);
1225  let Inst{15-8}  = !if(ps.has_offset1, offset1, 0);
1226  let Inst{17}    = !if(ps.has_gds, gds, ps.gdsValue);
1227  let Inst{25-18} = op;
1228  let Inst{31-26} = 0x36;
1229  let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1230  let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1231  let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1232  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1233
1234  let gds = !if(hasGDS, ?, 0);
1235}
1236
1237//===----------------------------------------------------------------------===//
1238// GFX12.
1239//===----------------------------------------------------------------------===//
1240
1241multiclass DS_Real_gfx12<bits<8> op, string name = !tolower(NAME), bit needAlias = true> {
1242  defvar ps = !cast<DS_Pseudo>(NAME);
1243  let AssemblerPredicate = isGFX12Plus in {
1244    let DecoderNamespace = "GFX12" in
1245      def _gfx12 :
1246        Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX12,
1247                                               name, /*hasGDS=*/false>;
1248    if !and(needAlias, !ne(ps.Mnemonic, name)) then
1249      def : AMDGPUMnemonicAlias<ps.Mnemonic, name>;
1250  } // End AssemblerPredicate
1251}
1252
1253defm DS_MIN_F32           : DS_Real_gfx12<0x012, "ds_min_num_f32">;
1254defm DS_MAX_F32           : DS_Real_gfx12<0x013, "ds_max_num_f32">;
1255defm DS_MIN_RTN_F32       : DS_Real_gfx12<0x032, "ds_min_num_rtn_f32">;
1256defm DS_MAX_RTN_F32       : DS_Real_gfx12<0x033, "ds_max_num_rtn_f32">;
1257defm DS_MIN_F64           : DS_Real_gfx12<0x052, "ds_min_num_f64">;
1258defm DS_MAX_F64           : DS_Real_gfx12<0x053, "ds_max_num_f64">;
1259defm DS_MIN_RTN_F64       : DS_Real_gfx12<0x072, "ds_min_num_rtn_f64">;
1260defm DS_MAX_RTN_F64       : DS_Real_gfx12<0x073, "ds_max_num_rtn_f64">;
1261defm DS_COND_SUB_U32      : DS_Real_gfx12<0x098>;
1262defm DS_SUB_CLAMP_U32     : DS_Real_gfx12<0x099>;
1263defm DS_COND_SUB_RTN_U32  : DS_Real_gfx12<0x0a8>;
1264defm DS_SUB_CLAMP_RTN_U32 : DS_Real_gfx12<0x0a9>;
1265defm DS_PK_ADD_F16        : DS_Real_gfx12<0x09a>;
1266defm DS_PK_ADD_RTN_F16    : DS_Real_gfx12<0x0aa>;
1267defm DS_PK_ADD_BF16       : DS_Real_gfx12<0x09b>;
1268defm DS_PK_ADD_RTN_BF16   : DS_Real_gfx12<0x0ab>;
1269defm DS_BPERMUTE_FI_B32   : DS_Real_gfx12<0x0cd>;
1270
1271// New aliases added in GFX12 without renaming the instructions.
1272let AssemblerPredicate = isGFX12Plus in {
1273  def : AMDGPUMnemonicAlias<"ds_subrev_u32", "ds_rsub_u32">;
1274  def : AMDGPUMnemonicAlias<"ds_subrev_rtn_u32", "ds_rsub_rtn_u32">;
1275  def : AMDGPUMnemonicAlias<"ds_subrev_u64", "ds_rsub_u64">;
1276  def : AMDGPUMnemonicAlias<"ds_subrev_rtn_u64", "ds_rsub_rtn_u64">;
1277}
1278
1279//===----------------------------------------------------------------------===//
1280// GFX11.
1281//===----------------------------------------------------------------------===//
1282
1283multiclass DS_Real_gfx11<bits<8> op, string name = !tolower(NAME)> {
1284  defvar ps = !cast<DS_Pseudo>(NAME);
1285  let AssemblerPredicate = isGFX11Only in {
1286    let DecoderNamespace = "GFX11" in
1287      def _gfx11 :
1288        Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op, ps, SIEncodingFamily.GFX11,
1289                                               name>;
1290    if !ne(ps.Mnemonic, name) then
1291      def : AMDGPUMnemonicAlias<ps.Mnemonic, name>;
1292  } // End AssemblerPredicate
1293}
1294
1295multiclass DS_Real_gfx11_gfx12<bits<8> op, string name = !tolower(NAME)>
1296  : DS_Real_gfx11<op, name>, DS_Real_gfx12<op, name>;
1297
1298defm DS_WRITE_B32           : DS_Real_gfx11_gfx12<0x00d, "ds_store_b32">;
1299defm DS_WRITE2_B32          : DS_Real_gfx11_gfx12<0x00e, "ds_store_2addr_b32">;
1300defm DS_WRITE2ST64_B32      : DS_Real_gfx11_gfx12<0x00f, "ds_store_2addr_stride64_b32">;
1301defm DS_WRITE_B8            : DS_Real_gfx11_gfx12<0x01e, "ds_store_b8">;
1302defm DS_WRITE_B16           : DS_Real_gfx11_gfx12<0x01f, "ds_store_b16">;
1303defm DS_WRXCHG_RTN_B32      : DS_Real_gfx11_gfx12<0x02d, "ds_storexchg_rtn_b32">;
1304defm DS_WRXCHG2_RTN_B32     : DS_Real_gfx11_gfx12<0x02e, "ds_storexchg_2addr_rtn_b32">;
1305defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx11_gfx12<0x02f, "ds_storexchg_2addr_stride64_rtn_b32">;
1306defm DS_READ_B32            : DS_Real_gfx11_gfx12<0x036, "ds_load_b32">;
1307defm DS_READ2_B32           : DS_Real_gfx11_gfx12<0x037, "ds_load_2addr_b32">;
1308defm DS_READ2ST64_B32       : DS_Real_gfx11_gfx12<0x038, "ds_load_2addr_stride64_b32">;
1309defm DS_READ_I8             : DS_Real_gfx11_gfx12<0x039, "ds_load_i8">;
1310defm DS_READ_U8             : DS_Real_gfx11_gfx12<0x03a, "ds_load_u8">;
1311defm DS_READ_I16            : DS_Real_gfx11_gfx12<0x03b, "ds_load_i16">;
1312defm DS_READ_U16            : DS_Real_gfx11_gfx12<0x03c, "ds_load_u16">;
1313defm DS_WRITE_B64           : DS_Real_gfx11_gfx12<0x04d, "ds_store_b64">;
1314defm DS_WRITE2_B64          : DS_Real_gfx11_gfx12<0x04e, "ds_store_2addr_b64">;
1315defm DS_WRITE2ST64_B64      : DS_Real_gfx11_gfx12<0x04f, "ds_store_2addr_stride64_b64">;
1316defm DS_WRXCHG_RTN_B64      : DS_Real_gfx11_gfx12<0x06d, "ds_storexchg_rtn_b64">;
1317defm DS_WRXCHG2_RTN_B64     : DS_Real_gfx11_gfx12<0x06e, "ds_storexchg_2addr_rtn_b64">;
1318defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx11_gfx12<0x06f, "ds_storexchg_2addr_stride64_rtn_b64">;
1319defm DS_READ_B64            : DS_Real_gfx11_gfx12<0x076, "ds_load_b64">;
1320defm DS_READ2_B64           : DS_Real_gfx11_gfx12<0x077, "ds_load_2addr_b64">;
1321defm DS_READ2ST64_B64       : DS_Real_gfx11_gfx12<0x078, "ds_load_2addr_stride64_b64">;
1322defm DS_WRITE_B8_D16_HI     : DS_Real_gfx11_gfx12<0x0a0, "ds_store_b8_d16_hi">;
1323defm DS_WRITE_B16_D16_HI    : DS_Real_gfx11_gfx12<0x0a1, "ds_store_b16_d16_hi">;
1324defm DS_READ_U8_D16         : DS_Real_gfx11_gfx12<0x0a2, "ds_load_u8_d16">;
1325defm DS_READ_U8_D16_HI      : DS_Real_gfx11_gfx12<0x0a3, "ds_load_u8_d16_hi">;
1326defm DS_READ_I8_D16         : DS_Real_gfx11_gfx12<0x0a4, "ds_load_i8_d16">;
1327defm DS_READ_I8_D16_HI      : DS_Real_gfx11_gfx12<0x0a5, "ds_load_i8_d16_hi">;
1328defm DS_READ_U16_D16        : DS_Real_gfx11_gfx12<0x0a6, "ds_load_u16_d16">;
1329defm DS_READ_U16_D16_HI     : DS_Real_gfx11_gfx12<0x0a7, "ds_load_u16_d16_hi">;
1330defm DS_WRITE_ADDTID_B32    : DS_Real_gfx11_gfx12<0x0b0, "ds_store_addtid_b32">;
1331defm DS_READ_ADDTID_B32     : DS_Real_gfx11_gfx12<0x0b1, "ds_load_addtid_b32">;
1332defm DS_WRITE_B96           : DS_Real_gfx11_gfx12<0x0de, "ds_store_b96">;
1333defm DS_WRITE_B128          : DS_Real_gfx11_gfx12<0x0df, "ds_store_b128">;
1334defm DS_READ_B96            : DS_Real_gfx11_gfx12<0x0fe, "ds_load_b96">;
1335defm DS_READ_B128           : DS_Real_gfx11_gfx12<0x0ff, "ds_load_b128">;
1336
1337// DS_CMPST_* are renamed to DS_CMPSTORE_* in GFX11, but also the data operands (src and cmp) are swapped
1338// comparing to pre-GFX11.
1339// Note: the mnemonic alias is not generated to avoid a potential ambiguity due to the semantics change.
1340
1341defm DS_CMPSTORE_B32                     : DS_Real_gfx11_gfx12<0x010>;
1342defm DS_CMPSTORE_F32                     : DS_Real_gfx11<0x011>;
1343defm DS_CMPSTORE_RTN_B32                 : DS_Real_gfx11_gfx12<0x030>;
1344defm DS_CMPSTORE_RTN_F32                 : DS_Real_gfx11<0x031>;
1345defm DS_CMPSTORE_B64                     : DS_Real_gfx11_gfx12<0x050>;
1346defm DS_CMPSTORE_F64                     : DS_Real_gfx11<0x051>;
1347defm DS_CMPSTORE_RTN_B64                 : DS_Real_gfx11_gfx12<0x070>;
1348defm DS_CMPSTORE_RTN_F64                 : DS_Real_gfx11<0x071>;
1349
1350defm DS_ADD_RTN_F32                      : DS_Real_gfx11_gfx12<0x079>;
1351defm DS_ADD_GS_REG_RTN                   : DS_Real_gfx11<0x07a>;
1352defm DS_SUB_GS_REG_RTN                   : DS_Real_gfx11<0x07b>;
1353defm DS_BVH_STACK_RTN_B32                : DS_Real_gfx11<0x0ad>;
1354
1355//===----------------------------------------------------------------------===//
1356// GFX10.
1357//===----------------------------------------------------------------------===//
1358
1359let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
1360  multiclass DS_Real_gfx10<bits<8> op>  {
1361    def _gfx10 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1362      !cast<DS_Pseudo>(NAME), SIEncodingFamily.GFX10>;
1363  }
1364} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10"
1365
1366defm DS_ADD_RTN_F32      : DS_Real_gfx10<0x055>;
1367defm DS_WRITE_B8_D16_HI  : DS_Real_gfx10<0x0a0>;
1368defm DS_WRITE_B16_D16_HI : DS_Real_gfx10<0x0a1>;
1369defm DS_READ_U8_D16      : DS_Real_gfx10<0x0a2>;
1370defm DS_READ_U8_D16_HI   : DS_Real_gfx10<0x0a3>;
1371defm DS_READ_I8_D16      : DS_Real_gfx10<0x0a4>;
1372defm DS_READ_I8_D16_HI   : DS_Real_gfx10<0x0a5>;
1373defm DS_READ_U16_D16     : DS_Real_gfx10<0x0a6>;
1374defm DS_READ_U16_D16_HI  : DS_Real_gfx10<0x0a7>;
1375defm DS_WRITE_ADDTID_B32 : DS_Real_gfx10<0x0b0>;
1376defm DS_READ_ADDTID_B32  : DS_Real_gfx10<0x0b1>;
1377
1378//===----------------------------------------------------------------------===//
1379// GFX10, GFX11, GFX12.
1380//===----------------------------------------------------------------------===//
1381
1382multiclass DS_Real_gfx10_gfx11_gfx12<bits<8> op> :
1383  DS_Real_gfx10<op>, DS_Real_gfx11<op>, DS_Real_gfx12<op>;
1384
1385multiclass DS_Real_gfx10_gfx11<bits<8> op> :
1386  DS_Real_gfx10<op>, DS_Real_gfx11<op>;
1387
1388defm DS_ADD_F32          : DS_Real_gfx10_gfx11_gfx12<0x015>;
1389defm DS_ADD_SRC2_F32     : DS_Real_gfx10<0x095>;
1390defm DS_PERMUTE_B32      : DS_Real_gfx10_gfx11_gfx12<0x0b2>;
1391defm DS_BPERMUTE_B32     : DS_Real_gfx10_gfx11_gfx12<0x0b3>;
1392
1393//===----------------------------------------------------------------------===//
1394// GFX7, GFX10, GFX11, GFX12.
1395//===----------------------------------------------------------------------===//
1396
1397let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in {
1398  multiclass DS_Real_gfx7<bits<8> op> {
1399    def _gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1400      !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1401  }
1402} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7"
1403
1404multiclass DS_Real_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1405  DS_Real_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1406
1407multiclass DS_Real_gfx7_gfx10_gfx11<bits<8> op> :
1408  DS_Real_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1409
1410multiclass DS_Real_gfx7_gfx10<bits<8> op> :
1411  DS_Real_gfx7<op>, DS_Real_gfx10<op>;
1412
1413// FIXME-GFX7: Add tests when upstreaming this part.
1414defm DS_GWS_SEMA_RELEASE_ALL : DS_Real_gfx7_gfx10_gfx11<0x018>;
1415defm DS_WRAP_RTN_B32         : DS_Real_gfx7_gfx10_gfx11<0x034>;
1416defm DS_CONDXCHG32_RTN_B64   : DS_Real_gfx7_gfx10_gfx11_gfx12<0x07e>;
1417defm DS_WRITE_B96            : DS_Real_gfx7_gfx10<0x0de>;
1418defm DS_WRITE_B128           : DS_Real_gfx7_gfx10<0x0df>;
1419defm DS_READ_B96             : DS_Real_gfx7_gfx10<0x0fe>;
1420defm DS_READ_B128            : DS_Real_gfx7_gfx10<0x0ff>;
1421
1422//===----------------------------------------------------------------------===//
1423// GFX6, GFX7, GFX10, GFX11.
1424//===----------------------------------------------------------------------===//
1425
1426let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
1427  multiclass DS_Real_gfx6_gfx7<bits<8> op> {
1428    def _gfx6_gfx7 : Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<op,
1429      !cast<DS_Pseudo>(NAME), SIEncodingFamily.SI>;
1430  }
1431} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
1432
1433multiclass DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<8> op> :
1434  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11_gfx12<op>;
1435
1436multiclass DS_Real_gfx6_gfx7_gfx10_gfx11<bits<8> op> :
1437  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10_gfx11<op>;
1438
1439multiclass DS_Real_gfx6_gfx7_gfx10<bits<8> op> :
1440  DS_Real_gfx6_gfx7<op>, DS_Real_gfx10<op>;
1441
1442defm DS_ADD_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x000>;
1443defm DS_SUB_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x001>;
1444defm DS_RSUB_U32            : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>;
1445defm DS_INC_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>;
1446defm DS_DEC_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>;
1447defm DS_MIN_I32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>;
1448defm DS_MAX_I32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x006>;
1449defm DS_MIN_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x007>;
1450defm DS_MAX_U32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>;
1451defm DS_AND_B32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>;
1452defm DS_OR_B32              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>;
1453defm DS_XOR_B32             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>;
1454defm DS_MSKOR_B32           : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>;
1455
1456defm DS_WRITE_B32           : DS_Real_gfx6_gfx7_gfx10<0x00d>;
1457defm DS_WRITE2_B32          : DS_Real_gfx6_gfx7_gfx10<0x00e>;
1458defm DS_WRITE2ST64_B32      : DS_Real_gfx6_gfx7_gfx10<0x00f>;
1459defm DS_CMPST_B32           : DS_Real_gfx6_gfx7_gfx10<0x010>;
1460defm DS_CMPST_F32           : DS_Real_gfx6_gfx7_gfx10<0x011>;
1461
1462defm DS_MIN_F32             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x012>;
1463defm DS_MAX_F32             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x013>;
1464defm DS_NOP                 : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>;
1465defm DS_GWS_INIT            : DS_Real_gfx6_gfx7_gfx10_gfx11<0x019>;
1466defm DS_GWS_SEMA_V          : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01a>;
1467defm DS_GWS_SEMA_BR         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01b>;
1468defm DS_GWS_SEMA_P          : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01c>;
1469defm DS_GWS_BARRIER         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x01d>;
1470
1471defm DS_WRITE_B8            : DS_Real_gfx6_gfx7_gfx10<0x01e>;
1472defm DS_WRITE_B16           : DS_Real_gfx6_gfx7_gfx10<0x01f>;
1473
1474defm DS_ADD_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x020>;
1475defm DS_SUB_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x021>;
1476defm DS_RSUB_RTN_U32        : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x022>;
1477defm DS_INC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x023>;
1478defm DS_DEC_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x024>;
1479defm DS_MIN_RTN_I32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x025>;
1480defm DS_MAX_RTN_I32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x026>;
1481defm DS_MIN_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x027>;
1482defm DS_MAX_RTN_U32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x028>;
1483defm DS_AND_RTN_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x029>;
1484defm DS_OR_RTN_B32          : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02a>;
1485defm DS_XOR_RTN_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02b>;
1486defm DS_MSKOR_RTN_B32       : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x02c>;
1487
1488defm DS_WRXCHG_RTN_B32      : DS_Real_gfx6_gfx7_gfx10<0x02d>;
1489defm DS_WRXCHG2_RTN_B32     : DS_Real_gfx6_gfx7_gfx10<0x02e>;
1490defm DS_WRXCHG2ST64_RTN_B32 : DS_Real_gfx6_gfx7_gfx10<0x02f>;
1491defm DS_CMPST_RTN_B32       : DS_Real_gfx6_gfx7_gfx10<0x030>;
1492defm DS_CMPST_RTN_F32       : DS_Real_gfx6_gfx7_gfx10<0x031>;
1493
1494defm DS_MIN_RTN_F32         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x032>;
1495defm DS_MAX_RTN_F32         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x033>;
1496defm DS_SWIZZLE_B32         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x035>;
1497
1498defm DS_READ_B32            : DS_Real_gfx6_gfx7_gfx10<0x036>;
1499defm DS_READ2_B32           : DS_Real_gfx6_gfx7_gfx10<0x037>;
1500defm DS_READ2ST64_B32       : DS_Real_gfx6_gfx7_gfx10<0x038>;
1501defm DS_READ_I8             : DS_Real_gfx6_gfx7_gfx10<0x039>;
1502defm DS_READ_U8             : DS_Real_gfx6_gfx7_gfx10<0x03a>;
1503defm DS_READ_I16            : DS_Real_gfx6_gfx7_gfx10<0x03b>;
1504defm DS_READ_U16            : DS_Real_gfx6_gfx7_gfx10<0x03c>;
1505
1506defm DS_CONSUME             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03d>;
1507defm DS_APPEND              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x03e>;
1508defm DS_ORDERED_COUNT       : DS_Real_gfx6_gfx7_gfx10_gfx11<0x03f>;
1509defm DS_ADD_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x040>;
1510defm DS_SUB_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x041>;
1511defm DS_RSUB_U64            : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x042>;
1512defm DS_INC_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x043>;
1513defm DS_DEC_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x044>;
1514defm DS_MIN_I64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x045>;
1515defm DS_MAX_I64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x046>;
1516defm DS_MIN_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x047>;
1517defm DS_MAX_U64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x048>;
1518defm DS_AND_B64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x049>;
1519defm DS_OR_B64              : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04a>;
1520defm DS_XOR_B64             : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04b>;
1521defm DS_MSKOR_B64           : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x04c>;
1522
1523defm DS_WRITE_B64           : DS_Real_gfx6_gfx7_gfx10<0x04d>;
1524defm DS_WRITE2_B64          : DS_Real_gfx6_gfx7_gfx10<0x04e>;
1525defm DS_WRITE2ST64_B64      : DS_Real_gfx6_gfx7_gfx10<0x04f>;
1526defm DS_CMPST_B64           : DS_Real_gfx6_gfx7_gfx10<0x050>;
1527defm DS_CMPST_F64           : DS_Real_gfx6_gfx7_gfx10<0x051>;
1528
1529defm DS_MIN_F64             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x052>;
1530defm DS_MAX_F64             : DS_Real_gfx6_gfx7_gfx10_gfx11<0x053>;
1531defm DS_ADD_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x060>;
1532defm DS_SUB_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x061>;
1533defm DS_RSUB_RTN_U64        : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x062>;
1534defm DS_INC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x063>;
1535defm DS_DEC_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x064>;
1536defm DS_MIN_RTN_I64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x065>;
1537defm DS_MAX_RTN_I64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x066>;
1538defm DS_MIN_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x067>;
1539defm DS_MAX_RTN_U64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x068>;
1540defm DS_AND_RTN_B64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x069>;
1541defm DS_OR_RTN_B64          : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06a>;
1542defm DS_XOR_RTN_B64         : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06b>;
1543defm DS_MSKOR_RTN_B64       : DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x06c>;
1544
1545defm DS_WRXCHG_RTN_B64      : DS_Real_gfx6_gfx7_gfx10<0x06d>;
1546defm DS_WRXCHG2_RTN_B64     : DS_Real_gfx6_gfx7_gfx10<0x06e>;
1547defm DS_WRXCHG2ST64_RTN_B64 : DS_Real_gfx6_gfx7_gfx10<0x06f>;
1548defm DS_CMPST_RTN_B64       : DS_Real_gfx6_gfx7_gfx10<0x070>;
1549defm DS_CMPST_RTN_F64       : DS_Real_gfx6_gfx7_gfx10<0x071>;
1550
1551defm DS_MIN_RTN_F64         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x072>;
1552defm DS_MAX_RTN_F64         : DS_Real_gfx6_gfx7_gfx10_gfx11<0x073>;
1553
1554defm DS_READ_B64            : DS_Real_gfx6_gfx7_gfx10<0x076>;
1555defm DS_READ2_B64           : DS_Real_gfx6_gfx7_gfx10<0x077>;
1556defm DS_READ2ST64_B64       : DS_Real_gfx6_gfx7_gfx10<0x078>;
1557defm DS_ADD_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x080>;
1558defm DS_SUB_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x081>;
1559defm DS_RSUB_SRC2_U32       : DS_Real_gfx6_gfx7_gfx10<0x082>;
1560defm DS_INC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x083>;
1561defm DS_DEC_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x084>;
1562defm DS_MIN_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x085>;
1563defm DS_MAX_SRC2_I32        : DS_Real_gfx6_gfx7_gfx10<0x086>;
1564defm DS_MIN_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x087>;
1565defm DS_MAX_SRC2_U32        : DS_Real_gfx6_gfx7_gfx10<0x088>;
1566defm DS_AND_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x089>;
1567defm DS_OR_SRC2_B32         : DS_Real_gfx6_gfx7_gfx10<0x08a>;
1568defm DS_XOR_SRC2_B32        : DS_Real_gfx6_gfx7_gfx10<0x08b>;
1569defm DS_WRITE_SRC2_B32      : DS_Real_gfx6_gfx7_gfx10<0x08d>;
1570defm DS_MIN_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x092>;
1571defm DS_MAX_SRC2_F32        : DS_Real_gfx6_gfx7_gfx10<0x093>;
1572defm DS_ADD_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c0>;
1573defm DS_SUB_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c1>;
1574defm DS_RSUB_SRC2_U64       : DS_Real_gfx6_gfx7_gfx10<0x0c2>;
1575defm DS_INC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c3>;
1576defm DS_DEC_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c4>;
1577defm DS_MIN_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c5>;
1578defm DS_MAX_SRC2_I64        : DS_Real_gfx6_gfx7_gfx10<0x0c6>;
1579defm DS_MIN_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c7>;
1580defm DS_MAX_SRC2_U64        : DS_Real_gfx6_gfx7_gfx10<0x0c8>;
1581defm DS_AND_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0c9>;
1582defm DS_OR_SRC2_B64         : DS_Real_gfx6_gfx7_gfx10<0x0ca>;
1583defm DS_XOR_SRC2_B64        : DS_Real_gfx6_gfx7_gfx10<0x0cb>;
1584defm DS_WRITE_SRC2_B64      : DS_Real_gfx6_gfx7_gfx10<0x0cd>;
1585defm DS_MIN_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d2>;
1586defm DS_MAX_SRC2_F64        : DS_Real_gfx6_gfx7_gfx10<0x0d3>;
1587
1588//===----------------------------------------------------------------------===//
1589// GFX8, GFX9 (VI).
1590//===----------------------------------------------------------------------===//
1591
1592class DS_Real_vi <bits<8> op, DS_Pseudo ps> :
1593  DS_Real <ps>,
1594  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
1595  let AssemblerPredicate = isGFX8GFX9;
1596  let DecoderNamespace = "GFX8";
1597
1598  // encoding
1599  let Inst{7-0}   = !if(ps.has_offset0, offset0, 0);
1600  let Inst{15-8}  = !if(ps.has_offset1, offset1, 0);
1601  let Inst{16}    = !if(ps.has_gds, gds, ps.gdsValue);
1602  let Inst{24-17} = op;
1603  let Inst{25}    = acc;
1604  let Inst{31-26} = 0x36; // ds prefix
1605  let Inst{39-32} = !if(ps.has_addr, addr, !if(ps.has_gws_data0, data0{7-0}, 0));
1606  let Inst{47-40} = !if(ps.has_data0, data0{7-0}, 0);
1607  let Inst{55-48} = !if(ps.has_data1, data1{7-0}, 0);
1608  let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, 0);
1609}
1610
1611def DS_ADD_U32_vi         : DS_Real_vi<0x0,  DS_ADD_U32>;
1612def DS_SUB_U32_vi         : DS_Real_vi<0x1,  DS_SUB_U32>;
1613def DS_RSUB_U32_vi        : DS_Real_vi<0x2,  DS_RSUB_U32>;
1614def DS_INC_U32_vi         : DS_Real_vi<0x3,  DS_INC_U32>;
1615def DS_DEC_U32_vi         : DS_Real_vi<0x4,  DS_DEC_U32>;
1616def DS_MIN_I32_vi         : DS_Real_vi<0x5,  DS_MIN_I32>;
1617def DS_MAX_I32_vi         : DS_Real_vi<0x6,  DS_MAX_I32>;
1618def DS_MIN_U32_vi         : DS_Real_vi<0x7,  DS_MIN_U32>;
1619def DS_MAX_U32_vi         : DS_Real_vi<0x8,  DS_MAX_U32>;
1620def DS_AND_B32_vi         : DS_Real_vi<0x9,  DS_AND_B32>;
1621def DS_OR_B32_vi          : DS_Real_vi<0xa,  DS_OR_B32>;
1622def DS_XOR_B32_vi         : DS_Real_vi<0xb,  DS_XOR_B32>;
1623def DS_MSKOR_B32_vi       : DS_Real_vi<0xc,  DS_MSKOR_B32>;
1624def DS_WRITE_B32_vi       : DS_Real_vi<0xd,  DS_WRITE_B32>;
1625def DS_WRITE2_B32_vi      : DS_Real_vi<0xe,  DS_WRITE2_B32>;
1626def DS_WRITE2ST64_B32_vi  : DS_Real_vi<0xf,  DS_WRITE2ST64_B32>;
1627def DS_CMPST_B32_vi       : DS_Real_vi<0x10, DS_CMPST_B32>;
1628def DS_CMPST_F32_vi       : DS_Real_vi<0x11, DS_CMPST_F32>;
1629def DS_MIN_F32_vi         : DS_Real_vi<0x12, DS_MIN_F32>;
1630def DS_MAX_F32_vi         : DS_Real_vi<0x13, DS_MAX_F32>;
1631def DS_NOP_vi             : DS_Real_vi<0x14, DS_NOP>;
1632def DS_ADD_F32_vi         : DS_Real_vi<0x15, DS_ADD_F32>;
1633def DS_GWS_INIT_vi        : DS_Real_vi<0x99, DS_GWS_INIT>;
1634def DS_GWS_SEMA_V_vi      : DS_Real_vi<0x9a, DS_GWS_SEMA_V>;
1635def DS_GWS_SEMA_BR_vi     : DS_Real_vi<0x9b, DS_GWS_SEMA_BR>;
1636def DS_GWS_SEMA_P_vi      : DS_Real_vi<0x9c, DS_GWS_SEMA_P>;
1637def DS_GWS_BARRIER_vi     : DS_Real_vi<0x9d, DS_GWS_BARRIER>;
1638def DS_WRITE_ADDTID_B32_vi : DS_Real_vi<0x1d, DS_WRITE_ADDTID_B32>;
1639def DS_WRITE_B8_vi        : DS_Real_vi<0x1e, DS_WRITE_B8>;
1640def DS_WRITE_B16_vi       : DS_Real_vi<0x1f, DS_WRITE_B16>;
1641def DS_ADD_RTN_U32_vi     : DS_Real_vi<0x20, DS_ADD_RTN_U32>;
1642def DS_SUB_RTN_U32_vi     : DS_Real_vi<0x21, DS_SUB_RTN_U32>;
1643def DS_RSUB_RTN_U32_vi    : DS_Real_vi<0x22, DS_RSUB_RTN_U32>;
1644def DS_INC_RTN_U32_vi     : DS_Real_vi<0x23, DS_INC_RTN_U32>;
1645def DS_DEC_RTN_U32_vi     : DS_Real_vi<0x24, DS_DEC_RTN_U32>;
1646def DS_MIN_RTN_I32_vi     : DS_Real_vi<0x25, DS_MIN_RTN_I32>;
1647def DS_MAX_RTN_I32_vi     : DS_Real_vi<0x26, DS_MAX_RTN_I32>;
1648def DS_MIN_RTN_U32_vi     : DS_Real_vi<0x27, DS_MIN_RTN_U32>;
1649def DS_MAX_RTN_U32_vi     : DS_Real_vi<0x28, DS_MAX_RTN_U32>;
1650def DS_AND_RTN_B32_vi     : DS_Real_vi<0x29, DS_AND_RTN_B32>;
1651def DS_OR_RTN_B32_vi      : DS_Real_vi<0x2a, DS_OR_RTN_B32>;
1652def DS_XOR_RTN_B32_vi     : DS_Real_vi<0x2b, DS_XOR_RTN_B32>;
1653def DS_MSKOR_RTN_B32_vi   : DS_Real_vi<0x2c, DS_MSKOR_RTN_B32>;
1654def DS_WRXCHG_RTN_B32_vi  : DS_Real_vi<0x2d, DS_WRXCHG_RTN_B32>;
1655def DS_WRXCHG2_RTN_B32_vi : DS_Real_vi<0x2e, DS_WRXCHG2_RTN_B32>;
1656def DS_WRXCHG2ST64_RTN_B32_vi : DS_Real_vi<0x2f, DS_WRXCHG2ST64_RTN_B32>;
1657def DS_CMPST_RTN_B32_vi   : DS_Real_vi<0x30, DS_CMPST_RTN_B32>;
1658def DS_CMPST_RTN_F32_vi   : DS_Real_vi<0x31, DS_CMPST_RTN_F32>;
1659def DS_MIN_RTN_F32_vi     : DS_Real_vi<0x32, DS_MIN_RTN_F32>;
1660def DS_MAX_RTN_F32_vi     : DS_Real_vi<0x33, DS_MAX_RTN_F32>;
1661def DS_WRAP_RTN_B32_vi    : DS_Real_vi<0x34, DS_WRAP_RTN_B32>;
1662def DS_ADD_RTN_F32_vi     : DS_Real_vi<0x35, DS_ADD_RTN_F32>;
1663def DS_READ_B32_vi        : DS_Real_vi<0x36, DS_READ_B32>;
1664def DS_READ2_B32_vi       : DS_Real_vi<0x37, DS_READ2_B32>;
1665def DS_READ2ST64_B32_vi   : DS_Real_vi<0x38, DS_READ2ST64_B32>;
1666def DS_READ_I8_vi         : DS_Real_vi<0x39, DS_READ_I8>;
1667def DS_READ_U8_vi         : DS_Real_vi<0x3a, DS_READ_U8>;
1668def DS_READ_I16_vi        : DS_Real_vi<0x3b, DS_READ_I16>;
1669def DS_READ_U16_vi        : DS_Real_vi<0x3c, DS_READ_U16>;
1670def DS_READ_ADDTID_B32_vi : DS_Real_vi<0xb6, DS_READ_ADDTID_B32>;
1671def DS_CONSUME_vi         : DS_Real_vi<0xbd, DS_CONSUME>;
1672def DS_APPEND_vi          : DS_Real_vi<0xbe, DS_APPEND>;
1673def DS_ORDERED_COUNT_vi   : DS_Real_vi<0xbf, DS_ORDERED_COUNT>;
1674def DS_SWIZZLE_B32_vi     : DS_Real_vi<0x3d, DS_SWIZZLE_B32>;
1675def DS_PERMUTE_B32_vi     : DS_Real_vi<0x3e, DS_PERMUTE_B32>;
1676def DS_BPERMUTE_B32_vi    : DS_Real_vi<0x3f, DS_BPERMUTE_B32>;
1677
1678def DS_ADD_U64_vi         : DS_Real_vi<0x40, DS_ADD_U64>;
1679def DS_SUB_U64_vi         : DS_Real_vi<0x41, DS_SUB_U64>;
1680def DS_RSUB_U64_vi        : DS_Real_vi<0x42, DS_RSUB_U64>;
1681def DS_INC_U64_vi         : DS_Real_vi<0x43, DS_INC_U64>;
1682def DS_DEC_U64_vi         : DS_Real_vi<0x44, DS_DEC_U64>;
1683def DS_MIN_I64_vi         : DS_Real_vi<0x45, DS_MIN_I64>;
1684def DS_MAX_I64_vi         : DS_Real_vi<0x46, DS_MAX_I64>;
1685def DS_MIN_U64_vi         : DS_Real_vi<0x47, DS_MIN_U64>;
1686def DS_MAX_U64_vi         : DS_Real_vi<0x48, DS_MAX_U64>;
1687def DS_AND_B64_vi         : DS_Real_vi<0x49, DS_AND_B64>;
1688def DS_OR_B64_vi          : DS_Real_vi<0x4a, DS_OR_B64>;
1689def DS_XOR_B64_vi         : DS_Real_vi<0x4b, DS_XOR_B64>;
1690def DS_MSKOR_B64_vi       : DS_Real_vi<0x4c, DS_MSKOR_B64>;
1691def DS_WRITE_B64_vi       : DS_Real_vi<0x4d, DS_WRITE_B64>;
1692def DS_WRITE2_B64_vi      : DS_Real_vi<0x4E, DS_WRITE2_B64>;
1693def DS_WRITE2ST64_B64_vi  : DS_Real_vi<0x4f, DS_WRITE2ST64_B64>;
1694def DS_CMPST_B64_vi       : DS_Real_vi<0x50, DS_CMPST_B64>;
1695def DS_CMPST_F64_vi       : DS_Real_vi<0x51, DS_CMPST_F64>;
1696def DS_MIN_F64_vi         : DS_Real_vi<0x52, DS_MIN_F64>;
1697def DS_MAX_F64_vi         : DS_Real_vi<0x53, DS_MAX_F64>;
1698
1699def DS_WRITE_B8_D16_HI_vi  : DS_Real_vi<0x54, DS_WRITE_B8_D16_HI>;
1700def DS_WRITE_B16_D16_HI_vi : DS_Real_vi<0x55, DS_WRITE_B16_D16_HI>;
1701
1702def DS_READ_U8_D16_vi     : DS_Real_vi<0x56, DS_READ_U8_D16>;
1703def DS_READ_U8_D16_HI_vi  : DS_Real_vi<0x57, DS_READ_U8_D16_HI>;
1704def DS_READ_I8_D16_vi     : DS_Real_vi<0x58, DS_READ_I8_D16>;
1705def DS_READ_I8_D16_HI_vi  : DS_Real_vi<0x59, DS_READ_I8_D16_HI>;
1706def DS_READ_U16_D16_vi    : DS_Real_vi<0x5a, DS_READ_U16_D16>;
1707def DS_READ_U16_D16_HI_vi : DS_Real_vi<0x5b, DS_READ_U16_D16_HI>;
1708
1709def DS_ADD_RTN_U64_vi     : DS_Real_vi<0x60, DS_ADD_RTN_U64>;
1710def DS_SUB_RTN_U64_vi     : DS_Real_vi<0x61, DS_SUB_RTN_U64>;
1711def DS_RSUB_RTN_U64_vi    : DS_Real_vi<0x62, DS_RSUB_RTN_U64>;
1712def DS_INC_RTN_U64_vi     : DS_Real_vi<0x63, DS_INC_RTN_U64>;
1713def DS_DEC_RTN_U64_vi     : DS_Real_vi<0x64, DS_DEC_RTN_U64>;
1714def DS_MIN_RTN_I64_vi     : DS_Real_vi<0x65, DS_MIN_RTN_I64>;
1715def DS_MAX_RTN_I64_vi     : DS_Real_vi<0x66, DS_MAX_RTN_I64>;
1716def DS_MIN_RTN_U64_vi     : DS_Real_vi<0x67, DS_MIN_RTN_U64>;
1717def DS_MAX_RTN_U64_vi     : DS_Real_vi<0x68, DS_MAX_RTN_U64>;
1718def DS_AND_RTN_B64_vi     : DS_Real_vi<0x69, DS_AND_RTN_B64>;
1719def DS_OR_RTN_B64_vi      : DS_Real_vi<0x6a, DS_OR_RTN_B64>;
1720def DS_XOR_RTN_B64_vi     : DS_Real_vi<0x6b, DS_XOR_RTN_B64>;
1721def DS_MSKOR_RTN_B64_vi   : DS_Real_vi<0x6c, DS_MSKOR_RTN_B64>;
1722def DS_WRXCHG_RTN_B64_vi  : DS_Real_vi<0x6d, DS_WRXCHG_RTN_B64>;
1723def DS_WRXCHG2_RTN_B64_vi : DS_Real_vi<0x6e, DS_WRXCHG2_RTN_B64>;
1724def DS_WRXCHG2ST64_RTN_B64_vi : DS_Real_vi<0x6f, DS_WRXCHG2ST64_RTN_B64>;
1725def DS_CONDXCHG32_RTN_B64_vi   : DS_Real_vi<0x7e, DS_CONDXCHG32_RTN_B64>;
1726def DS_GWS_SEMA_RELEASE_ALL_vi : DS_Real_vi<0x98, DS_GWS_SEMA_RELEASE_ALL>;
1727def DS_CMPST_RTN_B64_vi   : DS_Real_vi<0x70, DS_CMPST_RTN_B64>;
1728def DS_CMPST_RTN_F64_vi   : DS_Real_vi<0x71, DS_CMPST_RTN_F64>;
1729def DS_MIN_RTN_F64_vi     : DS_Real_vi<0x72, DS_MIN_RTN_F64>;
1730def DS_MAX_RTN_F64_vi     : DS_Real_vi<0x73, DS_MAX_RTN_F64>;
1731
1732def DS_READ_B64_vi        : DS_Real_vi<0x76, DS_READ_B64>;
1733def DS_READ2_B64_vi       : DS_Real_vi<0x77, DS_READ2_B64>;
1734def DS_READ2ST64_B64_vi   : DS_Real_vi<0x78, DS_READ2ST64_B64>;
1735
1736def DS_ADD_SRC2_U32_vi    : DS_Real_vi<0x80, DS_ADD_SRC2_U32>;
1737def DS_SUB_SRC2_U32_vi    : DS_Real_vi<0x81, DS_SUB_SRC2_U32>;
1738def DS_RSUB_SRC2_U32_vi   : DS_Real_vi<0x82, DS_RSUB_SRC2_U32>;
1739def DS_INC_SRC2_U32_vi    : DS_Real_vi<0x83, DS_INC_SRC2_U32>;
1740def DS_DEC_SRC2_U32_vi    : DS_Real_vi<0x84, DS_DEC_SRC2_U32>;
1741def DS_MIN_SRC2_I32_vi    : DS_Real_vi<0x85, DS_MIN_SRC2_I32>;
1742def DS_MAX_SRC2_I32_vi    : DS_Real_vi<0x86, DS_MAX_SRC2_I32>;
1743def DS_MIN_SRC2_U32_vi    : DS_Real_vi<0x87, DS_MIN_SRC2_U32>;
1744def DS_MAX_SRC2_U32_vi    : DS_Real_vi<0x88, DS_MAX_SRC2_U32>;
1745def DS_AND_SRC2_B32_vi    : DS_Real_vi<0x89, DS_AND_SRC2_B32>;
1746def DS_OR_SRC2_B32_vi     : DS_Real_vi<0x8a, DS_OR_SRC2_B32>;
1747def DS_XOR_SRC2_B32_vi    : DS_Real_vi<0x8b, DS_XOR_SRC2_B32>;
1748def DS_WRITE_SRC2_B32_vi  : DS_Real_vi<0x8d, DS_WRITE_SRC2_B32>;
1749def DS_MIN_SRC2_F32_vi    : DS_Real_vi<0x92, DS_MIN_SRC2_F32>;
1750def DS_MAX_SRC2_F32_vi    : DS_Real_vi<0x93, DS_MAX_SRC2_F32>;
1751def DS_ADD_SRC2_F32_vi    : DS_Real_vi<0x95, DS_ADD_SRC2_F32>;
1752def DS_ADD_SRC2_U64_vi    : DS_Real_vi<0xc0, DS_ADD_SRC2_U64>;
1753def DS_SUB_SRC2_U64_vi    : DS_Real_vi<0xc1, DS_SUB_SRC2_U64>;
1754def DS_RSUB_SRC2_U64_vi   : DS_Real_vi<0xc2, DS_RSUB_SRC2_U64>;
1755def DS_INC_SRC2_U64_vi    : DS_Real_vi<0xc3, DS_INC_SRC2_U64>;
1756def DS_DEC_SRC2_U64_vi    : DS_Real_vi<0xc4, DS_DEC_SRC2_U64>;
1757def DS_MIN_SRC2_I64_vi    : DS_Real_vi<0xc5, DS_MIN_SRC2_I64>;
1758def DS_MAX_SRC2_I64_vi    : DS_Real_vi<0xc6, DS_MAX_SRC2_I64>;
1759def DS_MIN_SRC2_U64_vi    : DS_Real_vi<0xc7, DS_MIN_SRC2_U64>;
1760def DS_MAX_SRC2_U64_vi    : DS_Real_vi<0xc8, DS_MAX_SRC2_U64>;
1761def DS_AND_SRC2_B64_vi    : DS_Real_vi<0xc9, DS_AND_SRC2_B64>;
1762def DS_OR_SRC2_B64_vi     : DS_Real_vi<0xca, DS_OR_SRC2_B64>;
1763def DS_XOR_SRC2_B64_vi    : DS_Real_vi<0xcb, DS_XOR_SRC2_B64>;
1764def DS_WRITE_SRC2_B64_vi  : DS_Real_vi<0xcd, DS_WRITE_SRC2_B64>;
1765def DS_MIN_SRC2_F64_vi    : DS_Real_vi<0xd2, DS_MIN_SRC2_F64>;
1766def DS_MAX_SRC2_F64_vi    : DS_Real_vi<0xd3, DS_MAX_SRC2_F64>;
1767def DS_WRITE_B96_vi       : DS_Real_vi<0xde, DS_WRITE_B96>;
1768def DS_WRITE_B128_vi      : DS_Real_vi<0xdf, DS_WRITE_B128>;
1769def DS_READ_B96_vi        : DS_Real_vi<0xfe, DS_READ_B96>;
1770def DS_READ_B128_vi       : DS_Real_vi<0xff, DS_READ_B128>;
1771
1772// GFX90A+.
1773def DS_ADD_F64_vi     : DS_Real_vi<0x5c, DS_ADD_F64>;
1774def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
1775
1776// GFX940+.
1777def DS_PK_ADD_F16_vi     : DS_Real_vi<0x17, DS_PK_ADD_F16>;
1778def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
1779def DS_PK_ADD_BF16_vi     : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
1780def DS_PK_ADD_RTN_BF16_vi : DS_Real_vi<0xb8, DS_PK_ADD_RTN_BF16>;
1781
1782//===----------------------------------------------------------------------===//
1783// GFX950.
1784//===----------------------------------------------------------------------===//
1785def DS_READ_B64_TR_B4_vi  : DS_Real_vi<0x0e0, DS_READ_B64_TR_B4>;
1786def DS_READ_B96_TR_B6_vi  : DS_Real_vi<0x0e1, DS_READ_B96_TR_B6>;
1787def DS_READ_B64_TR_B8_vi  : DS_Real_vi<0x0e2, DS_READ_B64_TR_B8>;
1788def DS_READ_B64_TR_B16_vi : DS_Real_vi<0x0e3, DS_READ_B64_TR_B16>;
1789