1; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s 2; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s 3; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s 4; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s 5; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s 6 7; Make sure to run a GPU with the SGPR allocation bug. 8 9; GCN-LABEL: {{^}}use_vcc: 10; GCN: ; TotalNumSgprs: 34 11; GCN: ; NumVgprs: 0 12define void @use_vcc() #1 { 13 call void asm sideeffect "", "~{vcc}" () #0 14 ret void 15} 16 17; GCN-LABEL: {{^}}indirect_use_vcc: 18; GCN: s_mov_b32 s4, s33 19; GCN: v_writelane_b32 v40, s4, 2 20; GCN: v_writelane_b32 v40, s30, 0 21; GCN: v_writelane_b32 v40, s31, 1 22; GCN: s_swappc_b64 23; GCN: v_readlane_b32 s31, v40, 1 24; GCN: v_readlane_b32 s30, v40, 0 25; GCN: v_readlane_b32 s4, v40, 2 26; GCN: s_mov_b32 s33, s4 27; GCN: s_setpc_b64 s[30:31] 28; GCN: ; TotalNumSgprs: 36 29; GCN: ; NumVgprs: 41 30define void @indirect_use_vcc() #1 { 31 call void @use_vcc() 32 ret void 33} 34 35; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel: 36; CI: ; TotalNumSgprs: 38 37; VI-NOBUG: ; TotalNumSgprs: 40 38; VI-BUG: ; TotalNumSgprs: 96 39; GCN: ; NumVgprs: 41 40define amdgpu_kernel void @indirect_2level_use_vcc_kernel(ptr addrspace(1) %out) #0 { 41 call void @indirect_use_vcc() 42 ret void 43} 44 45; GCN-LABEL: {{^}}use_flat_scratch: 46; CI: ; TotalNumSgprs: 36 47; VI: ; TotalNumSgprs: 38 48; GCN: ; NumVgprs: 0 49define void @use_flat_scratch() #1 { 50 call void asm sideeffect "", "~{flat_scratch}" () #0 51 ret void 52} 53 54; GCN-LABEL: {{^}}indirect_use_flat_scratch: 55; CI: ; TotalNumSgprs: 38 56; VI: ; TotalNumSgprs: 40 57; GCN: ; NumVgprs: 41 58define void @indirect_use_flat_scratch() #1 { 59 call void @use_flat_scratch() 60 ret void 61} 62 63; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel: 64; CI: ; TotalNumSgprs: 38 65; VI-NOBUG: ; TotalNumSgprs: 40 66; VI-BUG: ; TotalNumSgprs: 96 67; GCN: ; NumVgprs: 41 68define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(ptr addrspace(1) %out) #0 { 69 call void @indirect_use_flat_scratch() 70 ret void 71} 72 73; GCN-LABEL: {{^}}use_10_vgpr: 74; GCN: ; NumVgprs: 10 75define void @use_10_vgpr() #1 { 76 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0 77 call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0 78 ret void 79} 80 81; GCN-LABEL: {{^}}indirect_use_10_vgpr: 82; GCN: ; NumVgprs: 41 83define void @indirect_use_10_vgpr() #0 { 84 call void @use_10_vgpr() 85 ret void 86} 87 88; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr: 89; GCN: ; NumVgprs: 41 90define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 { 91 call void @indirect_use_10_vgpr() 92 ret void 93} 94 95; GCN-LABEL: {{^}}use_50_vgpr: 96; GCN: ; NumVgprs: 50 97define void @use_50_vgpr() #1 { 98 call void asm sideeffect "", "~{v49}"() #0 99 ret void 100} 101 102; GCN-LABEL: {{^}}indirect_use_50_vgpr: 103; GCN: ; NumVgprs: 50 104define void @indirect_use_50_vgpr() #0 { 105 call void @use_50_vgpr() 106 ret void 107} 108 109; GCN-LABEL: {{^}}use_80_sgpr: 110; GCN: ; TotalNumSgprs: 80 111define void @use_80_sgpr() #1 { 112 call void asm sideeffect "", "~{s79}"() #0 113 ret void 114} 115 116; GCN-LABEL: {{^}}indirect_use_80_sgpr: 117; GCN: ; TotalNumSgprs: 82 118define void @indirect_use_80_sgpr() #1 { 119 call void @use_80_sgpr() 120 ret void 121} 122 123; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr: 124; CI: ; TotalNumSgprs: 84 125; VI-NOBUG: ; TotalNumSgprs: 86 126; VI-BUG: ; TotalNumSgprs: 96 127define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 { 128 call void @indirect_use_80_sgpr() 129 ret void 130} 131 132 133; GCN-LABEL: {{^}}use_stack0: 134; GCN: ScratchSize: 2052 135define void @use_stack0() #1 { 136 %alloca = alloca [512 x i32], align 4, addrspace(5) 137 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0 138 ret void 139} 140 141; GCN-LABEL: {{^}}use_stack1: 142; GCN: ScratchSize: 404 143define void @use_stack1() #1 { 144 %alloca = alloca [100 x i32], align 4, addrspace(5) 145 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0 146 ret void 147} 148 149; GCN-LABEL: {{^}}indirect_use_stack: 150; GCN: ScratchSize: 2132 151define void @indirect_use_stack() #1 { 152 %alloca = alloca [16 x i32], align 4, addrspace(5) 153 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0 154 call void @use_stack0() 155 ret void 156} 157 158; GCN-LABEL: {{^}}indirect_2_level_use_stack: 159; GCN: ScratchSize: 2132 160define amdgpu_kernel void @indirect_2_level_use_stack() #0 { 161 call void @indirect_use_stack() 162 ret void 163} 164 165 166; Should be maximum of callee usage 167; GCN-LABEL: {{^}}multi_call_use_use_stack: 168; GCN: ScratchSize: 2052 169define amdgpu_kernel void @multi_call_use_use_stack() #0 { 170 call void @use_stack0() 171 call void @use_stack1() 172 ret void 173} 174 175 176declare void @external() #0 177 178; GCN-LABEL: {{^}}usage_external: 179; TotalNumSgprs: 48 180; NumVgprs: 24 181; GCN: ScratchSize: 16384 182; 183; GCN-V5-LABEL: {{^}}usage_external: 184; GCN-V5: ScratchSize: 0 185define amdgpu_kernel void @usage_external() #0 { 186 call void @external() 187 ret void 188} 189 190declare void @external_recurse() #2 191 192; GCN-LABEL: {{^}}usage_external_recurse: 193; TotalNumSgprs: 48 194; NumVgprs: 24 195; GCN: ScratchSize: 16384 196; 197; GCN-V5-LABEL: {{^}}usage_external_recurse: 198; GCN-V5: ScratchSize: 0 199define amdgpu_kernel void @usage_external_recurse() #0 { 200 call void @external_recurse() 201 ret void 202} 203 204; GCN-LABEL: {{^}}direct_recursion_use_stack: 205; GCN: ScratchSize: 18448{{$}} 206; 207; GCN-V5-LABEL: {{^}}direct_recursion_use_stack: 208; GCN-V5: ScratchSize: 2064{{$}} 209define void @direct_recursion_use_stack(i32 %val) #2 { 210 %alloca = alloca [512 x i32], align 4, addrspace(5) 211 call void asm sideeffect "; use $0", "v"(ptr addrspace(5) %alloca) #0 212 %cmp = icmp eq i32 %val, 0 213 br i1 %cmp, label %ret, label %call 214 215call: 216 %val.sub1 = sub i32 %val, 1 217 call void @direct_recursion_use_stack(i32 %val.sub1) 218 br label %ret 219 220ret: 221 ret void 222} 223 224; GCN-LABEL: {{^}}usage_direct_recursion: 225; GCN: .amdhsa_private_segment_fixed_size 18448 226; 227; GCN-V5-LABEL: {{^}}usage_direct_recursion: 228; GCN-V5: .amdhsa_private_segment_fixed_size 2064{{$}} 229define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 { 230 call void @direct_recursion_use_stack(i32 %n) 231 ret void 232} 233 234; Make sure there's no assert when a sgpr96 is used. 235; GCN-LABEL: {{^}}count_use_sgpr96_external_call 236; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}] 237; GCN: .set count_use_sgpr96_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr) 238; GCN: .set count_use_sgpr96_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) 239; CI: TotalNumSgprs: count_use_sgpr96_external_call.numbered_sgpr+4 240; VI-BUG: TotalNumSgprs: 96 241; GCN: NumVgprs: count_use_sgpr96_external_call.num_vgpr 242define amdgpu_kernel void @count_use_sgpr96_external_call() { 243entry: 244 tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1 245 call void @external() 246 ret void 247} 248 249; Make sure there's no assert when a sgpr160 is used. 250; GCN-LABEL: {{^}}count_use_sgpr160_external_call 251; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}] 252; GCN: .set count_use_sgpr160_external_call.num_vgpr, max(0, amdgpu.max_num_vgpr) 253; GCN: .set count_use_sgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) 254; CI: TotalNumSgprs: count_use_sgpr160_external_call.numbered_sgpr+4 255; VI-BUG: TotalNumSgprs: 96 256; GCN: NumVgprs: count_use_sgpr160_external_call.num_vgpr 257define amdgpu_kernel void @count_use_sgpr160_external_call() { 258entry: 259 tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1 260 call void @external() 261 ret void 262} 263 264; Make sure there's no assert when a vgpr160 is used. 265; GCN-LABEL: {{^}}count_use_vgpr160_external_call 266; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}] 267; GCN: .set count_use_vgpr160_external_call.num_vgpr, max(5, amdgpu.max_num_vgpr) 268; GCN: .set count_use_vgpr160_external_call.numbered_sgpr, max(33, amdgpu.max_num_sgpr) 269; CI: TotalNumSgprs: count_use_vgpr160_external_call.numbered_sgpr+4 270; VI-BUG: TotalNumSgprs: 96 271; GCN: NumVgprs: count_use_vgpr160_external_call.num_vgpr 272define amdgpu_kernel void @count_use_vgpr160_external_call() { 273entry: 274 tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1 275 call void @external() 276 ret void 277} 278 279; GCN: .set amdgpu.max_num_vgpr, 50 280; GCN: .set amdgpu.max_num_agpr, 0 281; GCN: .set amdgpu.max_num_sgpr, 80 282 283; GCN-LABEL: amdhsa.kernels: 284; GCN: .name: count_use_sgpr96_external_call 285; CI: .sgpr_count: 84 286; VI-NOBUG: .sgpr_count: 86 287; VI-BUG: .sgpr_count: 96 288; GCN: .vgpr_count: 50 289; GCN: .name: count_use_sgpr160_external_call 290; CI: .sgpr_count: 84 291; VI-NOBUG: .sgpr_count: 86 292; VI-BUG: .sgpr_count: 96 293; GCN: .vgpr_count: 50 294; GCN: .name: count_use_vgpr160_external_call 295; CI: .sgpr_count: 84 296; VI-NOBUG: .sgpr_count: 86 297; VI-BUG: .sgpr_count: 96 298; GCN: .vgpr_count: 50 299 300attributes #0 = { nounwind noinline norecurse "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 301attributes #1 = { nounwind noinline norecurse "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 302attributes #2 = { nounwind noinline } 303 304!llvm.module.flags = !{!0} 305!0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION} 306