1; Note: uses a randomly selected assumed external call stack size so that the 2; test assertions are unlikely to succeed by accident. 3 4; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX7 %s 5; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX8 %s 6; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX9 %s 7; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX10 %s 8 9; CHECK-LABEL: amdhsa.kernels 10 11; test a kernel without an external call that occurs before its callee in the module 12; CHECK-LABEL: test1 13; CHECK: .private_segment_fixed_size: 20 14 15; GFX7: .sgpr_count: 37 16; GFX7: .sgpr_spill_count: 0 17; GFX7: .vgpr_count: 4 18; GFX7: .vgpr_spill_count: 0 19 20; GFX8: .sgpr_count: 39 21; GFX8: .sgpr_spill_count: 0 22; GFX8: .vgpr_count: 4 23; GFX8: .vgpr_spill_count: 0 24 25; GFX9: .sgpr_count: 39 26; GFX9: .sgpr_spill_count: 0 27; GFX9: .vgpr_count: 4 28; GFX9: .vgpr_spill_count: 0 29 30; GFX10: .sgpr_count: 33 31; GFX10: .sgpr_spill_count: 0 32; GFX10: .vgpr_count: 4 33; GFX10: .vgpr_spill_count: 0 34define amdgpu_kernel void @test1(ptr %x) #1 { 35 %1 = load volatile float, ptr %x 36 %2 = call float @f(float %1) 37 store volatile float %2, ptr %x 38 ret void 39} 40 41define internal float @f(float %arg0) #1 { 42 %stack = alloca float, i32 4, align 4, addrspace(5) 43 store volatile float 3.0, ptr addrspace(5) %stack 44 %val = load volatile float, ptr addrspace(5) %stack 45 %add = fadd float %arg0, %val 46 ret float %add 47} 48 49; test a kernel without an external call that occurs after its callee in the module 50; CHECK-LABEL: test2 51; CHECK: .private_segment_fixed_size: 20 52 53; GFX7: .sgpr_count: 37 54; GFX7: .sgpr_spill_count: 0 55; GFX7: .vgpr_count: 4 56; GFX7: .vgpr_spill_count: 0 57 58; GFX8: .sgpr_count: 39 59; GFX8: .sgpr_spill_count: 0 60; GFX8: .vgpr_count: 4 61; GFX8: .vgpr_spill_count: 0 62 63; GFX9: .sgpr_count: 39 64; GFX9: .sgpr_spill_count: 0 65; GFX9: .vgpr_count: 4 66; GFX9: .vgpr_spill_count: 0 67 68; GFX10: .sgpr_count: 33 69; GFX10: .sgpr_spill_count: 0 70; GFX10: .vgpr_count: 4 71; GFX10: .vgpr_spill_count: 0 72define amdgpu_kernel void @test2(ptr %x) { 73 %1 = load volatile float, ptr %x 74 %2 = call float @f(float %1) 75 store volatile float %2, ptr %x 76 ret void 77} 78 79; test a kernel with an external call that occurs before its callee in the module 80; CHECK-LABEL: test3 81; CHECK: .private_segment_fixed_size: 5310 82 83; GFX7: .sgpr_count: 37 84; GFX7: .sgpr_spill_count: 0 85; GFX7: .vgpr_count: 32 86; GFX7: .vgpr_spill_count: 0 87 88; GFX8: .sgpr_count: 39 89; GFX8: .sgpr_spill_count: 0 90; GFX8: .vgpr_count: 32 91; GFX8: .vgpr_spill_count: 0 92 93; GFX9: .sgpr_count: 39 94; GFX9: .sgpr_spill_count: 0 95; GFX9: .vgpr_count: 32 96; GFX9: .vgpr_spill_count: 0 97 98; GFX10: .sgpr_count: 35 99; GFX10: .sgpr_spill_count: 0 100; GFX10: .vgpr_count: 32 101; GFX10: .vgpr_spill_count: 0 102define amdgpu_kernel void @test3() { 103 call void @g() 104 ret void 105} 106 107declare void @g() #0 108 109; test a kernel without an external call that occurs after its callee in the module 110; CHECK-LABEL: test4 111; CHECK: .private_segment_fixed_size: 5310 112 113; GFX7: .sgpr_count: 37 114; GFX7: .sgpr_spill_count: 0 115; GFX7: .vgpr_count: 32 116; GFX7: .vgpr_spill_count: 0 117 118; GFX8: .sgpr_count: 39 119; GFX8: .sgpr_spill_count: 0 120; GFX8: .vgpr_count: 32 121; GFX8: .vgpr_spill_count: 0 122 123; GFX9: .sgpr_count: 39 124; GFX9: .sgpr_spill_count: 0 125; GFX9: .vgpr_count: 32 126; GFX9: .vgpr_spill_count: 0 127 128; GFX10: .sgpr_count: 35 129; GFX10: .sgpr_spill_count: 0 130; GFX10: .vgpr_count: 32 131; GFX10: .vgpr_spill_count: 0 132define amdgpu_kernel void @test4() { 133 call void @g() 134 ret void 135} 136 137attributes #0 = { norecurse } 138attributes #1 = { norecurse "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 139 140!llvm.module.flags = !{!0} 141!0 = !{i32 1, !"amdhsa_code_object_version", i32 400} 142