1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s 3 4; Check propagation of amdgpu-flat-work-group-size attribute. 5 6; Called from a single kernel with 1,8 7define internal void @default_to_1_8_a() { 8; CHECK-LABEL: define internal void @default_to_1_8_a 9; CHECK-SAME: () #[[ATTR0:[0-9]+]] { 10; CHECK-NEXT: ret void 11; 12 ret void 13} 14 15define amdgpu_kernel void @kernel_1_8() #0 { 16; CHECK-LABEL: define amdgpu_kernel void @kernel_1_8 17; CHECK-SAME: () #[[ATTR0]] { 18; CHECK-NEXT: call void @default_to_1_8_a() 19; CHECK-NEXT: ret void 20; 21 call void @default_to_1_8_a() 22 ret void 23} 24 25; Called from a single kernel with 1,2 26define internal void @default_to_1_2() { 27; CHECK-LABEL: define internal void @default_to_1_2 28; CHECK-SAME: () #[[ATTR1:[0-9]+]] { 29; CHECK-NEXT: ret void 30; 31 ret void 32} 33 34define amdgpu_kernel void @kernel_1_2() #1 { 35; CHECK-LABEL: define amdgpu_kernel void @kernel_1_2 36; CHECK-SAME: () #[[ATTR1]] { 37; CHECK-NEXT: call void @default_to_1_2() 38; CHECK-NEXT: call void @flat_group_1_1() 39; CHECK-NEXT: call void @default_to_1_8_b() 40; CHECK-NEXT: call void @flat_group_2_8() 41; CHECK-NEXT: ret void 42; 43 call void @default_to_1_2() 44 call void @flat_group_1_1() 45 call void @default_to_1_8_b() 46 call void @flat_group_2_8() 47 ret void 48} 49 50; Called from a single kernel with 1,4 51define internal void @default_to_1_4() { 52; CHECK-LABEL: define internal void @default_to_1_4 53; CHECK-SAME: () #[[ATTR2:[0-9]+]] { 54; CHECK-NEXT: ret void 55; 56 ret void 57} 58 59define amdgpu_kernel void @kernel_1_4() #2 { 60; CHECK-LABEL: define amdgpu_kernel void @kernel_1_4 61; CHECK-SAME: () #[[ATTR2]] { 62; CHECK-NEXT: call void @default_to_1_4() 63; CHECK-NEXT: ret void 64; 65 call void @default_to_1_4() 66 ret void 67} 68 69; Called from kernels with 2,9 and 9,9 70define internal void @default_to_2_9() { 71; CHECK-LABEL: define internal void @default_to_2_9 72; CHECK-SAME: () #[[ATTR3:[0-9]+]] { 73; CHECK-NEXT: ret void 74; 75 ret void 76} 77 78; This already has strict bounds, but called from kernels with wider 79; bounds, and should not be changed. 80define internal void @flat_group_1_1() #3 { 81; CHECK-LABEL: define internal void @flat_group_1_1 82; CHECK-SAME: () #[[ATTR4:[0-9]+]] { 83; CHECK-NEXT: ret void 84; 85 ret void 86} 87 88; 2,8 -> 2,2 89define internal void @flat_group_2_8() #4 { 90; CHECK-LABEL: define internal void @flat_group_2_8 91; CHECK-SAME: () #[[ATTR5:[0-9]+]] { 92; CHECK-NEXT: ret void 93; 94 ret void 95} 96 97; 9,10 -> 9,9 98define internal void @flat_group_9_10() #5 { 99; CHECK-LABEL: define internal void @flat_group_9_10 100; CHECK-SAME: () #[[ATTR6:[0-9]+]] { 101; CHECK-NEXT: ret void 102; 103 ret void 104} 105 106define amdgpu_kernel void @kernel_2_9() #6 { 107; CHECK-LABEL: define amdgpu_kernel void @kernel_2_9 108; CHECK-SAME: () #[[ATTR3]] { 109; CHECK-NEXT: call void @default_to_2_9() 110; CHECK-NEXT: call void @flat_group_1_1() 111; CHECK-NEXT: ret void 112; 113 call void @default_to_2_9() 114 call void @flat_group_1_1() 115 ret void 116} 117 118define amdgpu_kernel void @kernel_9_9() #7 { 119; CHECK-LABEL: define amdgpu_kernel void @kernel_9_9 120; CHECK-SAME: () #[[ATTR7:[0-9]+]] { 121; CHECK-NEXT: call void @default_to_2_9() 122; CHECK-NEXT: call void @flat_group_9_10() 123; CHECK-NEXT: ret void 124; 125 call void @default_to_2_9() 126 call void @flat_group_9_10() 127 ret void 128} 129 130; Called from kernels with 2,8 and 1,2 => 1,8 131define internal void @default_to_1_8_b() { 132; CHECK-LABEL: define internal void @default_to_1_8_b 133; CHECK-SAME: () #[[ATTR0]] { 134; CHECK-NEXT: ret void 135; 136 ret void 137} 138 139; The kernel's lower bound is higher than the callee's lower bound, so 140; this should probably be illegal. 141define amdgpu_kernel void @kernel_2_8() #4 { 142; CHECK-LABEL: define amdgpu_kernel void @kernel_2_8 143; CHECK-SAME: () #[[ATTR5]] { 144; CHECK-NEXT: call void @default_to_1_8_a() 145; CHECK-NEXT: call void @default_to_1_8_b() 146; CHECK-NEXT: ret void 147; 148 call void @default_to_1_8_a() 149 call void @default_to_1_8_b() 150 ret void 151} 152 153; 1,2 -> 2,2 154define internal void @merge_cycle_0() #1 { 155; CHECK-LABEL: define internal void @merge_cycle_0 156; CHECK-SAME: () #[[ATTR1]] { 157; CHECK-NEXT: call void @merge_cycle_1() 158; CHECK-NEXT: ret void 159; 160 call void @merge_cycle_1() 161 ret void 162} 163 164; Called from 1,2 + 3,8 165; 2,8 -> 2,8 166define internal void @merge_cycle_1() #4 { 167; CHECK-LABEL: define internal void @merge_cycle_1 168; CHECK-SAME: () #[[ATTR5]] { 169; CHECK-NEXT: call void @merge_cycle_0() 170; CHECK-NEXT: ret void 171; 172 call void @merge_cycle_0() 173 ret void 174} 175 176define amdgpu_kernel void @kernel_3_8() #8 { 177; CHECK-LABEL: define amdgpu_kernel void @kernel_3_8 178; CHECK-SAME: () #[[ATTR8:[0-9]+]] { 179; CHECK-NEXT: call void @merge_cycle_0() 180; CHECK-NEXT: call void @default_captured_address() 181; CHECK-NEXT: call void @externally_visible_default() 182; CHECK-NEXT: [[F32:%.*]] = call float @bitcasted_function() 183; CHECK-NEXT: ret void 184; 185 call void @merge_cycle_0() 186 call void @default_captured_address() 187 call void @externally_visible_default() 188 %f32 = call float @bitcasted_function() 189 ret void 190} 191 192define internal void @default_captured_address() { 193; CHECK-LABEL: define internal void @default_captured_address 194; CHECK-SAME: () #[[ATTR9:[0-9]+]] { 195; CHECK-NEXT: store volatile ptr @default_captured_address, ptr undef, align 8 196; CHECK-NEXT: ret void 197; 198 store volatile ptr @default_captured_address, ptr undef, align 8 199 ret void 200} 201 202define void @externally_visible_default() { 203; CHECK-LABEL: define void @externally_visible_default 204; CHECK-SAME: () #[[ATTR9]] { 205; CHECK-NEXT: ret void 206; 207 ret void 208} 209 210; 1,10 -> 3,8 211define internal i32 @bitcasted_function() { 212; CHECK-LABEL: define internal i32 @bitcasted_function 213; CHECK-SAME: () #[[ATTR8]] { 214; CHECK-NEXT: ret i32 0 215; 216 ret i32 0 217} 218 219define internal void @called_from_invalid_bounds_0() { 220; CHECK-LABEL: define internal void @called_from_invalid_bounds_0 221; CHECK-SAME: () #[[ATTR10:[0-9]+]] { 222; CHECK-NEXT: ret void 223; 224 ret void 225} 226 227define internal void @called_from_invalid_bounds_1() { 228; CHECK-LABEL: define internal void @called_from_invalid_bounds_1 229; CHECK-SAME: () #[[ATTR10]] { 230; CHECK-NEXT: ret void 231; 232 ret void 233} 234 235; Invalid range for amdgpu-waves-per-eu 236define amdgpu_kernel void @kernel_invalid_bounds_0_8() #9 { 237; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_0_8 238; CHECK-SAME: () #[[ATTR0]] { 239; CHECK-NEXT: call void @called_from_invalid_bounds_0() 240; CHECK-NEXT: ret void 241; 242 call void @called_from_invalid_bounds_0() 243 ret void 244} 245 246; Invalid range for amdgpu-waves-per-eu 247define amdgpu_kernel void @kernel_invalid_bounds_1_123() #10 { 248; CHECK-LABEL: define amdgpu_kernel void @kernel_invalid_bounds_1_123 249; CHECK-SAME: () #[[ATTR11:[0-9]+]] { 250; CHECK-NEXT: call void @called_from_invalid_bounds_1() 251; CHECK-NEXT: ret void 252; 253 call void @called_from_invalid_bounds_1() 254 ret void 255} 256 257; XXX - Why is the maximum not 6? 258; The 512 maximum workgroup size implies a minimum occupancy of 2. The 259; implied minimum waves-per-eu should not be 3 260; -> 2,10 261define void @larger_group_size_implies_lower_minimum() #11 { 262; CHECK-LABEL: define void @larger_group_size_implies_lower_minimum 263; CHECK-SAME: () #[[ATTR12:[0-9]+]] { 264; CHECK-NEXT: ret void 265; 266 ret void 267} 268 269define amdgpu_kernel void @kernel_3_6() #12 { 270; CHECK-LABEL: define amdgpu_kernel void @kernel_3_6 271; CHECK-SAME: () #[[ATTR13:[0-9]+]] { 272; CHECK-NEXT: call void @larger_group_size_implies_lower_minimum() 273; CHECK-NEXT: ret void 274; 275 call void @larger_group_size_implies_lower_minimum() 276 ret void 277} 278 279; 3,6 -> 6,9 280define internal void @refine_upper_func_3_6() #13 { 281; CHECK-LABEL: define internal void @refine_upper_func_3_6 282; CHECK-SAME: () #[[ATTR14:[0-9]+]] { 283; CHECK-NEXT: ret void 284; 285 ret void 286} 287 288; 4,8 -> 6,8 289define internal void @refine_lower_func_4_8() #14 { 290; CHECK-LABEL: define internal void @refine_lower_func_4_8 291; CHECK-SAME: () #[[ATTR15:[0-9]+]] { 292; CHECK-NEXT: call void @refine_upper_func_3_6() 293; CHECK-NEXT: ret void 294; 295 call void @refine_upper_func_3_6() 296 ret void 297} 298 299define amdgpu_kernel void @kernel_foo_6_8() #15 { 300; CHECK-LABEL: define amdgpu_kernel void @kernel_foo_6_8 301; CHECK-SAME: () #[[ATTR16:[0-9]+]] { 302; CHECK-NEXT: call void @refine_upper_func_3_6() 303; CHECK-NEXT: call void @refine_lower_func_4_8() 304; CHECK-NEXT: call void @func_9_10_a() 305; CHECK-NEXT: ret void 306; 307 call void @refine_upper_func_3_6() 308 call void @refine_lower_func_4_8() 309 call void @func_9_10_a() 310 ret void 311} 312 313; 5,5 -> 5,5 314define internal void @func_5_5() #16 { 315; CHECK-LABEL: define internal void @func_5_5 316; CHECK-SAME: () #[[ATTR17:[0-9]+]] { 317; CHECK-NEXT: ret void 318; 319 ret void 320} 321 322; 5,8 -> 8,8 323define internal void @func_5_8() #17 { 324; CHECK-LABEL: define internal void @func_5_8 325; CHECK-SAME: () #[[ATTR18:[0-9]+]] { 326; CHECK-NEXT: ret void 327; 328 ret void 329} 330 331; 9,10 -> 9,10 332define internal void @func_9_10_a() #18 { 333; CHECK-LABEL: define internal void @func_9_10_a 334; CHECK-SAME: () #[[ATTR19:[0-9]+]] { 335; CHECK-NEXT: ret void 336; 337 ret void 338} 339 340; 9,10 -> 9,9 341define internal void @func_9_10_b() #18 { 342; CHECK-LABEL: define internal void @func_9_10_b 343; CHECK-SAME: () #[[ATTR19]] { 344; CHECK-NEXT: ret void 345; 346 ret void 347} 348 349define amdgpu_kernel void @kernel_bar_8_9() #19 { 350; CHECK-LABEL: define amdgpu_kernel void @kernel_bar_8_9 351; CHECK-SAME: () #[[ATTR20:[0-9]+]] { 352; CHECK-NEXT: call void @refine_upper_func_3_6() 353; CHECK-NEXT: call void @func_5_5() 354; CHECK-NEXT: call void @func_9_10_b() 355; CHECK-NEXT: call void @func_5_8() 356; CHECK-NEXT: call void @externally_visible() 357; CHECK-NEXT: ret void 358; 359 call void @refine_upper_func_3_6() 360 call void @func_5_5() 361 call void @func_9_10_b() 362 call void @func_5_8() 363 call void @externally_visible() 364 ret void 365} 366 367; This is an optimization hint based on users, so it's not strictly 368; required that all callers be visible. 369define void @externally_visible() { 370; CHECK-LABEL: define void @externally_visible 371; CHECK-SAME: () #[[ATTR9]] { 372; CHECK-NEXT: ret void 373; 374 ret void 375} 376 377 378; Use a 1 wave workgroup so there is no interaction by the workgroup 379; size on the implied waves per EU. 380 381attributes #0 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,8" } 382attributes #1 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,2" } 383attributes #2 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,4" } 384attributes #3 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,1" } 385attributes #4 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,8" } 386attributes #5 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,10" } 387attributes #6 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="2,9" } 388attributes #7 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="9,9" } 389attributes #8 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="3,8" } 390attributes #9 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="0,8" } 391attributes #10 = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-waves-per-eu"="1,123" } 392attributes #11 = { "amdgpu-flat-work-group-size"="1,512" } 393attributes #12 = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-waves-per-eu"="3,6" } 394attributes #13 = { "amdgpu-waves-per-eu"="3,6" } 395attributes #14 = { "amdgpu-waves-per-eu"="4,8" } 396attributes #15 = { "amdgpu-waves-per-eu"="6,8" } 397attributes #16 = { "amdgpu-waves-per-eu"="5,5" } 398attributes #17 = { "amdgpu-waves-per-eu"="5,8" } 399attributes #18 = { "amdgpu-waves-per-eu"="9,10" } 400attributes #19 = { "amdgpu-waves-per-eu"="8,9" } 401;. 402; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" } 403; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" } 404; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" } 405; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" } 406; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" } 407; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" } 408; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" } 409; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" } 410; CHECK: attributes #[[ATTR8]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" } 411; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } 412; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } 413; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" } 414; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } 415; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" } 416; CHECK: attributes #[[ATTR14]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" } 417; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" } 418; CHECK: attributes #[[ATTR16]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" } 419; CHECK: attributes #[[ATTR17]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" } 420; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" } 421; CHECK: attributes #[[ATTR19]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" } 422; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" } 423;. 424