1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s 3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck %s 4 5; Check propagation of amdgpu-flat-work-group-size attribute. 6 7; Called from a single kernel with 1,256 8define internal void @default_to_1_256() { 9; CHECK-LABEL: define {{[^@]+}}@default_to_1_256 10; CHECK-SAME: () #[[ATTR0:[0-9]+]] { 11; CHECK-NEXT: ret void 12; 13 ret void 14} 15 16define amdgpu_kernel void @kernel_1_256() #0 { 17; CHECK-LABEL: define {{[^@]+}}@kernel_1_256 18; CHECK-SAME: () #[[ATTR0]] { 19; CHECK-NEXT: call void @default_to_1_256() 20; CHECK-NEXT: ret void 21; 22 call void @default_to_1_256() 23 ret void 24} 25 26; Called from a single kernel with 64,128 27define internal void @default_to_64_128() { 28; CHECK-LABEL: define {{[^@]+}}@default_to_64_128 29; CHECK-SAME: () #[[ATTR1:[0-9]+]] { 30; CHECK-NEXT: ret void 31; 32 ret void 33} 34 35define amdgpu_kernel void @kernel_64_128() #1 { 36; CHECK-LABEL: define {{[^@]+}}@kernel_64_128 37; CHECK-SAME: () #[[ATTR1]] { 38; CHECK-NEXT: call void @default_to_64_128() 39; CHECK-NEXT: call void @flat_group_64_64() 40; CHECK-NEXT: call void @default_to_64_256() 41; CHECK-NEXT: call void @flat_group_128_256() 42; CHECK-NEXT: ret void 43; 44 call void @default_to_64_128() 45 call void @flat_group_64_64() 46 call void @default_to_64_256() 47 call void @flat_group_128_256() 48 ret void 49} 50 51; Called from kernels with 128,512 and 512,512 52define internal void @default_to_128_512() { 53; CHECK-LABEL: define {{[^@]+}}@default_to_128_512 54; CHECK-SAME: () #[[ATTR2:[0-9]+]] { 55; CHECK-NEXT: ret void 56; 57 ret void 58} 59 60; This already has a strict bounds, but called from kernels with wider 61; bounds, and should not be changed. 62define internal void @flat_group_64_64() #2 { 63; CHECK-LABEL: define {{[^@]+}}@flat_group_64_64 64; CHECK-SAME: () #[[ATTR3:[0-9]+]] { 65; CHECK-NEXT: ret void 66; 67 ret void 68} 69 70; 128,256 -> 128,128 71define internal void @flat_group_128_256() #3 { 72; CHECK-LABEL: define {{[^@]+}}@flat_group_128_256 73; CHECK-SAME: () #[[ATTR4:[0-9]+]] { 74; CHECK-NEXT: ret void 75; 76 ret void 77} 78 79define internal void @flat_group_512_1024() #4 { 80; CHECK-LABEL: define {{[^@]+}}@flat_group_512_1024 81; CHECK-SAME: () #[[ATTR5:[0-9]+]] { 82; CHECK-NEXT: ret void 83; 84 ret void 85} 86 87define amdgpu_kernel void @kernel_128_512() #5 { 88; CHECK-LABEL: define {{[^@]+}}@kernel_128_512 89; CHECK-SAME: () #[[ATTR2]] { 90; CHECK-NEXT: call void @default_to_128_512() 91; CHECK-NEXT: call void @flat_group_64_64() 92; CHECK-NEXT: ret void 93; 94 call void @default_to_128_512() 95 call void @flat_group_64_64() 96 ret void 97} 98 99define amdgpu_kernel void @kernel_512_512() #6 { 100; CHECK-LABEL: define {{[^@]+}}@kernel_512_512 101; CHECK-SAME: () #[[ATTR6:[0-9]+]] { 102; CHECK-NEXT: call void @default_to_128_512() 103; CHECK-NEXT: call void @flat_group_512_1024() 104; CHECK-NEXT: ret void 105; 106 call void @default_to_128_512() 107 call void @flat_group_512_1024() 108 ret void 109} 110 111; Called from kernels with 128,256 and 64,128 => 64,256 112define internal void @default_to_64_256() { 113; CHECK-LABEL: define {{[^@]+}}@default_to_64_256 114; CHECK-SAME: () #[[ATTR7:[0-9]+]] { 115; CHECK-NEXT: ret void 116; 117 ret void 118} 119 120; The kernel's lower bound is higher than the callee's lower bound, so 121; this should probably be illegal. 122define amdgpu_kernel void @kernel_128_256() #3 { 123; CHECK-LABEL: define {{[^@]+}}@kernel_128_256 124; CHECK-SAME: () #[[ATTR4]] { 125; CHECK-NEXT: call void @default_to_64_256() 126; CHECK-NEXT: ret void 127; 128 call void @default_to_64_256() 129 ret void 130} 131 132; 64,128 -> 64,128 133define internal void @merge_cycle_0() #1 { 134; CHECK-LABEL: define {{[^@]+}}@merge_cycle_0 135; CHECK-SAME: () #[[ATTR1]] { 136; CHECK-NEXT: call void @merge_cycle_1() 137; CHECK-NEXT: ret void 138; 139 call void @merge_cycle_1() 140 ret void 141} 142 143; 128,256 -> 128,128 144define internal void @merge_cycle_1() #3 { 145; CHECK-LABEL: define {{[^@]+}}@merge_cycle_1 146; CHECK-SAME: () #[[ATTR4]] { 147; CHECK-NEXT: call void @merge_cycle_0() 148; CHECK-NEXT: ret void 149; 150 call void @merge_cycle_0() 151 ret void 152} 153 154define amdgpu_kernel void @kernel_64_256() #7 { 155; CHECK-LABEL: define {{[^@]+}}@kernel_64_256 156; CHECK-SAME: () #[[ATTR7]] { 157; CHECK-NEXT: call void @merge_cycle_0() 158; CHECK-NEXT: call void @default_captured_address() 159; CHECK-NEXT: call void @externally_visible_default() 160; CHECK-NEXT: [[F32:%.*]] = call float @bitcasted_function() 161; CHECK-NEXT: ret void 162; 163 call void @merge_cycle_0() 164 call void @default_captured_address() 165 call void @externally_visible_default() 166 %f32 = call float @bitcasted_function() 167 ret void 168} 169 170define internal void @default_captured_address() { 171; CHECK-LABEL: define {{[^@]+}}@default_captured_address 172; CHECK-SAME: () #[[ATTR8:[0-9]+]] { 173; CHECK-NEXT: store volatile ptr @default_captured_address, ptr undef, align 8 174; CHECK-NEXT: ret void 175; 176 store volatile ptr @default_captured_address, ptr undef, align 8 177 ret void 178} 179 180define void @externally_visible_default() { 181; CHECK-LABEL: define {{[^@]+}}@externally_visible_default 182; CHECK-SAME: () #[[ATTR8]] { 183; CHECK-NEXT: ret void 184; 185 ret void 186} 187 188; 1,1024 -> 64,256 189define internal i32 @bitcasted_function() { 190; CHECK-LABEL: define {{[^@]+}}@bitcasted_function 191; CHECK-SAME: () #[[ATTR7]] { 192; CHECK-NEXT: ret i32 0 193; 194 ret i32 0 195} 196 197attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } 198attributes #1 = { "amdgpu-flat-work-group-size"="64,128" } 199attributes #2 = { "amdgpu-flat-work-group-size"="64,64" } 200attributes #3 = { "amdgpu-flat-work-group-size"="128,256" } 201attributes #4 = { "amdgpu-flat-work-group-size"="512,1024" } 202attributes #5 = { "amdgpu-flat-work-group-size"="128,512" } 203attributes #6 = { "amdgpu-flat-work-group-size"="512,512" } 204attributes #7 = { "amdgpu-flat-work-group-size"="64,256" } 205;. 206; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } 207; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } 208; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" } 209; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } 210; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } 211; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" } 212; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,10" "uniform-work-group-size"="false" } 213; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } 214; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } 215;. 216