xref: /llvm-project/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll (revision f4037277bb0220cb1dece91d21d4fdc2995eae7a)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s
3
4; Check propagation of amdgpu-flat-work-group-size attribute.
5
6; Called from a single kernel with 1,8
7define internal void @default_to_1_8_a() {
8; CHECK-LABEL: define internal void @default_to_1_8_a
9; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
10; CHECK-NEXT:    ret void
11;
12  ret void
13}
14
15define amdgpu_kernel void @kernel_1_8() #0 {
16; CHECK-LABEL: define amdgpu_kernel void @kernel_1_8
17; CHECK-SAME: () #[[ATTR0]] {
18; CHECK-NEXT:    call void @default_to_1_8_a()
19; CHECK-NEXT:    ret void
20;
21  call void @default_to_1_8_a()
22  ret void
23}
24
25; Called from a single kernel with 1,2
26define internal void @default_to_1_2() {
27; CHECK-LABEL: define internal void @default_to_1_2
28; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
29; CHECK-NEXT:    ret void
30;
31  ret void
32}
33
34define amdgpu_kernel void @kernel_1_2() #1 {
35; CHECK-LABEL: define amdgpu_kernel void @kernel_1_2
36; CHECK-SAME: () #[[ATTR1]] {
37; CHECK-NEXT:    call void @default_to_1_2()
38; CHECK-NEXT:    call void @flat_group_1_1()
39; CHECK-NEXT:    call void @default_to_1_8_b()
40; CHECK-NEXT:    call void @flat_group_2_8()
41; CHECK-NEXT:    ret void
42;
43  call void @default_to_1_2()
44  call void @flat_group_1_1()
45  call void @default_to_1_8_b()
46  call void @flat_group_2_8()
47  ret void
48}
49
50; Called from a single kernel with 1,4
51define internal void @default_to_1_4() {
52; CHECK-LABEL: define internal void @default_to_1_4
53; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
54; CHECK-NEXT:    ret void
55;
56  ret void
57}
58
59define amdgpu_kernel void @kernel_1_4() #2 {
60; CHECK-LABEL: define amdgpu_kernel void @kernel_1_4
61; CHECK-SAME: () #[[ATTR2]] {
62; CHECK-NEXT:    call void @default_to_1_4()
63; CHECK-NEXT:    ret void
64;
65  call void @default_to_1_4()
66  ret void
67}
68
69; Called from kernels with 2,9 and 9,9
70define internal void @default_to_2_9() {
71; CHECK-LABEL: define internal void @default_to_2_9
72; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
73; CHECK-NEXT:    ret void
74;
75  ret void
76}
77
78; This already has strict bounds, but called from kernels with wider
79; bounds, and should not be changed.
80define internal void @flat_group_1_1() #3 {
81; CHECK-LABEL: define internal void @flat_group_1_1
82; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
83; CHECK-NEXT:    ret void
84;
85  ret void
86}
87
88; 2,8 -> 2,2
89define internal void @flat_group_2_8() #4 {
90; CHECK-LABEL: define internal void @flat_group_2_8
91; CHECK-SAME: () #[[ATTR5:[0-9]+]] {
92; CHECK-NEXT:    ret void
93;
94  ret void
95}
96
97; 9,10 -> 9,9
98define internal void @flat_group_9_10() #5 {
99; CHECK-LABEL: define internal void @flat_group_9_10
100; CHECK-SAME: () #[[ATTR6:[0-9]+]] {
101; CHECK-NEXT:    ret void
102;
103  ret void
104}
105
106define amdgpu_kernel void @kernel_2_9() #6 {
107; CHECK-LABEL: define amdgpu_kernel void @kernel_2_9
108; CHECK-SAME: () #[[ATTR3]] {
109; CHECK-NEXT:    call void @default_to_2_9()
110; CHECK-NEXT:    call void @flat_group_1_1()
111; CHECK-NEXT:    ret void
112;
113  call void @default_to_2_9()
114  call void @flat_group_1_1()
115  ret void
116}
117
118define amdgpu_kernel void @kernel_9_9() #7 {
119; CHECK-LABEL: define amdgpu_kernel void @kernel_9_9
120; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
121; CHECK-NEXT:    call void @default_to_2_9()
122; CHECK-NEXT:    call void @flat_group_9_10()
123; CHECK-NEXT:    ret void
124;
125  call void @default_to_2_9()
126  call void @flat_group_9_10()
127  ret void
128}
129
130; Called from kernels with 2,8 and 1,2 => 1,8
131define internal void @default_to_1_8_b() {
132; CHECK-LABEL: define internal void @default_to_1_8_b
133; CHECK-SAME: () #[[ATTR0]] {
134; CHECK-NEXT:    ret void
135;
136  ret void
137}
138
139; The kernel's lower bound is higher than the callee's lower bound, so
140; this should probably be illegal.
141define amdgpu_kernel void @kernel_2_8() #4 {
142; CHECK-LABEL: define amdgpu_kernel void @kernel_2_8
143; CHECK-SAME: () #[[ATTR5]] {
144; CHECK-NEXT:    call void @default_to_1_8_a()
145; CHECK-NEXT:    call void @default_to_1_8_b()
146; CHECK-NEXT:    ret void
147;
148  call void @default_to_1_8_a()
149  call void @default_to_1_8_b()
150  ret void
151}
152
153; 1,2 -> 2,2
154define internal void @merge_cycle_0() #1 {
155; CHECK-LABEL: define internal void @merge_cycle_0
156; CHECK-SAME: () #[[ATTR1]] {
157; CHECK-NEXT:    call void @merge_cycle_1()
158; CHECK-NEXT:    ret void
159;
160  call void @merge_cycle_1()
161  ret void
162}
163
164; Called from 1,2 + 3,8
165; 2,8 -> 2,8
166define internal void @merge_cycle_1() #4 {
167; CHECK-LABEL: define internal void @merge_cycle_1
168; CHECK-SAME: () #[[ATTR5]] {
169; CHECK-NEXT:    call void @merge_cycle_0()
170; CHECK-NEXT:    ret void
171;
172  call void @merge_cycle_0()
173  ret void
174}
175
176define amdgpu_kernel void @kernel_3_8() #8 {
177; CHECK-LABEL: define amdgpu_kernel void @kernel_3_8
178; CHECK-SAME: () #[[ATTR8:[0-9]+]] {
179; CHECK-NEXT:    call void @merge_cycle_0()
180; CHECK-NEXT:    call void @default_captured_address()
181; CHECK-NEXT:    call void @externally_visible_default()
182; CHECK-NEXT:    [[F32:%.*]] = call float @bitcasted_function()
183; CHECK-NEXT:    ret void
184;
185  call void @merge_cycle_0()
186  call void @default_captured_address()
187  call void @externally_visible_default()
188  %f32 = call float @bitcasted_function()
189  ret void
190}
191
192define internal void @default_captured_address() {
193; CHECK-LABEL: define internal void @default_captured_address
194; CHECK-SAME: () #[[ATTR9:[0-9]+]] {
195; CHECK-NEXT:    store volatile ptr @default_captured_address, ptr undef, align 8
196; CHECK-NEXT:    ret void
197;
198  store volatile ptr @default_captured_address, ptr undef, align 8
199  ret void
200}
201
202define void @externally_visible_default() {
203; CHECK-LABEL: define void @externally_visible_default
204; CHECK-SAME: () #[[ATTR9]] {
205; CHECK-NEXT:    ret void
206;
207  ret void
208}
209
210; 1,10 -> 3,8
211define internal i32 @bitcasted_function() {
212; CHECK-LABEL: define internal i32 @bitcasted_function
213; CHECK-SAME: () #[[ATTR8]] {
214; CHECK-NEXT:    ret i32 0
215;
216  ret i32 0
217}
218
219define internal void @called_from_invalid_bounds_0() {
220; CHECK-LABEL: define internal void @called_from_invalid_bounds_0
221; CHECK-SAME: () #[[ATTR10:[0-9]+]] {
222; CHECK-NEXT:    ret void
223;
224  ret void
225}
226
227define internal void @called_from_invalid_bounds_1() {
228; CHECK-LABEL: define internal void @called_from_invalid_bounds_1
229; CHECK-SAME: () #[[ATTR10]] {
230; CHECK-NEXT:    ret void
231;
232  ret void
233}
234
235; Invalid range for amdgpu-waves-per-eu
236define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 {
237; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_0_8
238; CHECK-SAME: () #[[ATTR0]] {
239; CHECK-NEXT:    call void @called_from_invalid_bounds_0()
240; CHECK-NEXT:    ret void
241;
242  call void @called_from_invalid_bounds_0()
243  ret void
244}
245
246; Invalid range for amdgpu-waves-per-eu
247define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 {
248; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_1_123
249; CHECK-SAME: () #[[ATTR11:[0-9]+]] {
250; CHECK-NEXT:    call void @called_from_invalid_bounds_1()
251; CHECK-NEXT:    ret void
252;
253  call void @called_from_invalid_bounds_1()
254  ret void
255}
256
257; XXX - Why is the maximum not 6?
258; The 512 maximum workgroup size implies a minimum occupancy of 2. The
259; implied minimum waves-per-eu should not be 3
260; -> 2,10
261define void @larger_group_size_implies_lower_minimum() #11 {
262; CHECK-LABEL: define void @larger_group_size_implies_lower_minimum
263; CHECK-SAME: () #[[ATTR12:[0-9]+]] {
264; CHECK-NEXT:    ret void
265;
266  ret void
267}
268
269define amdgpu_kernel void @kernel_3_6() #12 {
270; CHECK-LABEL: define amdgpu_kernel void @kernel_3_6
271; CHECK-SAME: () #[[ATTR13:[0-9]+]] {
272; CHECK-NEXT:    call void @larger_group_size_implies_lower_minimum()
273; CHECK-NEXT:    ret void
274;
275  call void @larger_group_size_implies_lower_minimum()
276  ret void
277}
278
279; 3,6 -> 6,9
280define internal void @refine_upper_func_3_6() #13 {
281; CHECK-LABEL: define internal void @refine_upper_func_3_6
282; CHECK-SAME: () #[[ATTR14:[0-9]+]] {
283; CHECK-NEXT:    ret void
284;
285  ret void
286}
287
288; 4,8 -> 6,8
289define internal void @refine_lower_func_4_8() #14 {
290; CHECK-LABEL: define internal void @refine_lower_func_4_8
291; CHECK-SAME: () #[[ATTR15:[0-9]+]] {
292; CHECK-NEXT:    call void @refine_upper_func_3_6()
293; CHECK-NEXT:    ret void
294;
295  call void @refine_upper_func_3_6()
296  ret void
297}
298
299define amdgpu_kernel void @kernel_foo_6_8() #15 {
300; CHECK-LABEL: define amdgpu_kernel void @kernel_foo_6_8
301; CHECK-SAME: () #[[ATTR16:[0-9]+]] {
302; CHECK-NEXT:    call void @refine_upper_func_3_6()
303; CHECK-NEXT:    call void @refine_lower_func_4_8()
304; CHECK-NEXT:    call void @func_9_10_a()
305; CHECK-NEXT:    ret void
306;
307  call void @refine_upper_func_3_6()
308  call void @refine_lower_func_4_8()
309  call void @func_9_10_a()
310  ret void
311}
312
313; 5,5 -> 5,5
314define internal void @func_5_5() #16 {
315; CHECK-LABEL: define internal void @func_5_5
316; CHECK-SAME: () #[[ATTR17:[0-9]+]] {
317; CHECK-NEXT:    ret void
318;
319  ret void
320}
321
322; 5,8 -> 8,8
323define internal void @func_5_8() #17 {
324; CHECK-LABEL: define internal void @func_5_8
325; CHECK-SAME: () #[[ATTR18:[0-9]+]] {
326; CHECK-NEXT:    ret void
327;
328  ret void
329}
330
331; 9,10 -> 9,10
332define internal void @func_9_10_a() #18 {
333; CHECK-LABEL: define internal void @func_9_10_a
334; CHECK-SAME: () #[[ATTR19:[0-9]+]] {
335; CHECK-NEXT:    ret void
336;
337  ret void
338}
339
340; 9,10 -> 9,9
341define internal void @func_9_10_b() #18 {
342; CHECK-LABEL: define internal void @func_9_10_b
343; CHECK-SAME: () #[[ATTR19]] {
344; CHECK-NEXT:    ret void
345;
346  ret void
347}
348
349define amdgpu_kernel void @kernel_bar_8_9() #19 {
350; CHECK-LABEL: define amdgpu_kernel void @kernel_bar_8_9
351; CHECK-SAME: () #[[ATTR20:[0-9]+]] {
352; CHECK-NEXT:    call void @refine_upper_func_3_6()
353; CHECK-NEXT:    call void @func_5_5()
354; CHECK-NEXT:    call void @func_9_10_b()
355; CHECK-NEXT:    call void @func_5_8()
356; CHECK-NEXT:    call void @externally_visible()
357; CHECK-NEXT:    ret void
358;
359  call void @refine_upper_func_3_6()
360  call void @func_5_5()
361  call void @func_9_10_b()
362  call void @func_5_8()
363  call void @externally_visible()
364  ret void
365}
366
367; This is an optimization hint based on users, so it's not strictly
368; required that all callers be visible.
369define void @externally_visible() {
370; CHECK-LABEL: define void @externally_visible
371; CHECK-SAME: () #[[ATTR9]] {
372; CHECK-NEXT:    ret void
373;
374  ret void
375}
376
377
378; Use a 1 wave workgroup so there is no interaction by the workgroup
379; size on the implied waves per EU.
380
381attributes #0 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,8" }
382attributes #1 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" }
383attributes #2 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,4" }
384attributes #3 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,1" }
385attributes #4 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,8" }
386attributes #5 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,10" }
387attributes #6 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,9" }
388attributes #7 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,9" }
389attributes #8 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="3,8" }
390attributes #9 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="0,8" }
391attributes #10 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,123" }
392attributes #11 = { "amdgpu-flat-work-group-size"="1,512" }
393attributes #12 = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-waves-per-eu"="3,6" }
394attributes #13 = { "amdgpu-waves-per-eu"="3,6" }
395attributes #14 = { "amdgpu-waves-per-eu"="4,8" }
396attributes #15 = { "amdgpu-waves-per-eu"="6,8" }
397attributes #16 = { "amdgpu-waves-per-eu"="5,5" }
398attributes #17 = { "amdgpu-waves-per-eu"="5,8" }
399attributes #18 = { "amdgpu-waves-per-eu"="9,10" }
400attributes #19 = { "amdgpu-waves-per-eu"="8,9" }
401;.
402; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" }
403; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" }
404; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" }
405; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" }
406; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" }
407; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" }
408; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
409; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
410; CHECK: attributes #[[ATTR8]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" }
411; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
412; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
413; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" }
414; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
415; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
416; CHECK: attributes #[[ATTR14]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
417; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" }
418; CHECK: attributes #[[ATTR16]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" }
419; CHECK: attributes #[[ATTR17]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" }
420; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" }
421; CHECK: attributes #[[ATTR19]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
422; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" }
423;.
424