xref: /llvm-project/llvm/test/tools/llvm-mca/AMDGPU/gfx950.s (revision d9c4e9ffe78c34db247b164aa46eea2625b08d3a)
1# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx950 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s
2
3# CHECK: Iterations:        1
4# CHECK: Instructions:      133
5# CHECK: Total Cycles:      1101
6# CHECK: Total uOps:        133
7
8v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
9v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7]
10v_mfma_f32_32x32x16_f16 v[0:15], v[0:3], v[0:3], v[0:15]
11v_mfma_f32_32x32x16_f16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2
12v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15]
13v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2
14v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
15v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[0:3], a[4:7]
16v_mfma_i32_32x32x32_i8 v[0:15], v[0:3], v[0:3], v[0:15]
17v_mfma_i32_32x32x32_i8 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2
18v_mfma_f32_16x16x32_bf16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
19v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[0:3], a[4:7]
20
21v_mfma_ld_scale_b32 v0, v0
22
23;; FIXME: should have different cycle count depending on whether either matrix is f8
24;;  TODO: test vdc/adc
25v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3]
26v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:1
27v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] blgp:2
28v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] blgp:3
29v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:7], v[0:3] blgp:4
30v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1
31v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:2
32v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:3
33v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:7], v[4:11], v[0:3] cbsz:4
34v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:2 blgp:1
35v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] cbsz:1 blgp:2
36
37;; FIXME: should have different cycle count depending on whether either matrix is f8
38v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15]
39v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1
40v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:9], v[0:15] blgp:2
41v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:9], v[0:15] blgp:3
42v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:7], v[0:15] blgp:4
43v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:1
44v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:2
45v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:3
46v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:7], v[4:11], v[0:15] cbsz:4
47v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:2
48v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1
49
50;; FIXME: should have different cycle count depending on whether either matrix is f8
51v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5
52v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 blgp:1
53v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:9], v[0:3], v5, v5 cbsz:2 blgp:2
54
55v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5
56v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15], v5, v5 cbsz:2 blgp:1
57v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:9], v[0:15], v5, v5 cbsz:2 blgp:2
58
59;;  TODO: These results are wrong
60v_smfmac_f32_16x16x64_f16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
61v_smfmac_f32_32x32x32_f16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
62v_smfmac_f32_16x16x64_bf16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
63v_smfmac_f32_32x32x32_bf16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
64v_smfmac_i32_16x16x128_i8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
65v_smfmac_i32_32x32x64_i8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
66
67v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
68v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
69v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
70v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
71
72v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
73v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
74v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
75v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
76
77v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
78v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
79
80v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
81v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
82
83v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3]
84v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3]
85
86v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
87v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7]
88
89v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3]
90v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3]
91
92v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15]
93v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15]
94
95v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3]
96v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3]
97
98v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15]
99v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15]
100
101v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3]
102v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3]
103
104v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15]
105v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15]
106
107v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5]
108v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5]
109
110v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33]
111v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33]
112
113v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65]
114v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65]
115
116v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5]
117v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5]
118
119v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33]
120v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33]
121
122v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65]
123v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65]
124
125v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5]
126v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5]
127
128v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33]
129v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33]
130
131v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
132v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
133
134v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7
135v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7
136
137v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
138v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
139
140v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5]
141v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5]
142
143v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33]
144v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33]
145
146v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65]
147v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65]
148
149v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1
150v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1
151
152v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1
153v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3
154
155v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1
156v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5
157
158v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1
159v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9
160
161v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1
162v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11
163
164v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3]
165v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3]
166
167v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3]
168v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3]
169
170v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3]
171v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3]
172
173v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3]
174v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3]
175
176v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15]
177v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15]
178v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15]
179v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15]
180
181v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
182v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
183v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
184v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
185
186v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1
187v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1
188v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1
189v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1
190
191# CHECK:     [0]    [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
192# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
193# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7]
194# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x16_f16 v[0:15], v[0:3], v[0:3], v[0:15]
195# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x16_f16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2
196# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15]
197# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2
198# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
199# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[0:3], a[4:7]
200# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_i32_32x32x32_i8 v[0:15], v[0:3], v[0:3], v[0:15]
201# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_i32_32x32x32_i8 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2
202# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x32_bf16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1
203# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[0:3], a[4:7]
204# CHECK-NEXT: -      -      -      -     1.00    -      -     v_mfma_ld_scale_b32 v0, v0
205# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3]
206# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:1
207# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] blgp:2
208# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] blgp:3
209# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:7], v[0:3] blgp:4
210# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1
211# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:2
212# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:3
213# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:7], v[4:11], v[0:3] cbsz:4
214# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:2 blgp:1
215# CHECK-NEXT: -      -      -      -      -      -     4.00  v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] cbsz:1 blgp:2
216# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15]
217# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1
218# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:9], v[0:15] blgp:2
219# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:9], v[0:15] blgp:3
220# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:7], v[0:15] blgp:4
221# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:1
222# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:2
223# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:3
224# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:7], v[4:11], v[0:15] cbsz:4
225# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:2
226# CHECK-NEXT: -      -      -      -      -      -     8.00  v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1
227# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 op_sel_hi:[0,0,0]
228# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 op_sel_hi:[0,0,0] blgp:1
229# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:9], v[0:3], v5, v5 op_sel_hi:[0,0,0] cbsz:2 blgp:2
230# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 op_sel_hi:[0,0,0]
231# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15], v5, v5 op_sel_hi:[0,0,0] cbsz:2 blgp:1
232# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:9], v[0:15], v5, v5 op_sel_hi:[0,0,0] cbsz:2 blgp:2
233# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x64_f16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
234# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x32_f16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
235# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x64_bf16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
236# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x32_bf16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
237# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_i32_16x16x128_i8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
238# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_i32_32x32x64_i8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
239# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
240# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
241# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
242# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1
243# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
244# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
245# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
246# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1
247# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
248# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
249# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
250# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
251# CHECK-NEXT: -      -      -      -     4.00    -      -     v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3]
252# CHECK-NEXT: -      -      -      -     4.00    -      -     v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3]
253# CHECK-NEXT: -      -      -      -     16.00   -      -     v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7]
254# CHECK-NEXT: -      -      -      -     16.00   -      -     v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7]
255# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3]
256# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3]
257# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15]
258# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15]
259# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3]
260# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3]
261# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15]
262# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15]
263# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3]
264# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3]
265# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15]
266# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15]
267# CHECK-NEXT: -      -      -      -      -      -     2.00   v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5]
268# CHECK-NEXT: -      -      -      -      -      -     2.00   v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5]
269# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33]
270# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33]
271# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65]
272# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65]
273# CHECK-NEXT: -      -      -      -      -      -     2.00   v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5]
274# CHECK-NEXT: -      -      -      -      -      -     2.00   v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5]
275# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33]
276# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33]
277# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65]
278# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65]
279# CHECK-NEXT: -      -      -      -      -      -     2.00   v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5]
280# CHECK-NEXT: -      -      -      -      -      -     2.00   v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5]
281# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33]
282# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33]
283# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
284# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
285# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7
286# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7
287# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33]
288# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33]
289# CHECK-NEXT: -      -      -      -      -      -     2.00   v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5]
290# CHECK-NEXT: -      -      -      -      -      -     2.00   v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5]
291# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33]
292# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33]
293# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65]
294# CHECK-NEXT: -      -      -      -      -      -     16.00  v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65]
295# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1
296# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1
297# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1
298# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3
299# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1
300# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5
301# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1
302# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9
303# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1
304# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11
305# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3]
306# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3]
307# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3]
308# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3]
309# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3]
310# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3]
311# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3]
312# CHECK-NEXT: -      -      -      -      -      -     4.00   v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3]
313# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15]
314# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15]
315# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15]
316# CHECK-NEXT: -      -      -      -      -      -     8.00   v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15]
317# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
318# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
319# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
320# CHECK-NEXT: -      -      -      -      -      -     4.00   v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1
321# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1
322# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1
323# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1
324# CHECK-NEXT: -      -      -      -      -      -     8.00   v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1
325