xref: /llvm-project/llvm/test/CodeGen/AMDGPU/hsa-metadata-resource-usage-function-ordering.ll (revision b1bcb7ca460fcd317bbc8309e14c8761bf8394e0)
1; Note: uses a randomly selected assumed external call stack size so that the
2; test assertions are unlikely to succeed by accident.
3
4; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX7 %s
5; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX8 %s
6; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX9 %s
7; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX10 %s
8
9; CHECK-LABEL: amdhsa.kernels
10
11; test a kernel without an external call that occurs before its callee in the module
12; CHECK-LABEL: test1
13; CHECK:     .private_segment_fixed_size: 20
14
15; GFX7: .sgpr_count:     37
16; GFX7: .sgpr_spill_count: 0
17; GFX7: .vgpr_count:     4
18; GFX7: .vgpr_spill_count: 0
19
20; GFX8:     .sgpr_count:     39
21; GFX8:     .sgpr_spill_count: 0
22; GFX8:     .vgpr_count:     4
23; GFX8:     .vgpr_spill_count: 0
24
25; GFX9:     .sgpr_count:     39
26; GFX9:     .sgpr_spill_count: 0
27; GFX9:     .vgpr_count:     4
28; GFX9:     .vgpr_spill_count: 0
29
30; GFX10:     .sgpr_count:     33
31; GFX10:     .sgpr_spill_count: 0
32; GFX10:     .vgpr_count:     4
33; GFX10:     .vgpr_spill_count: 0
34define amdgpu_kernel void @test1(ptr %x) #1 {
35  %1 = load volatile float, ptr %x
36  %2 = call float @f(float %1)
37  store volatile float %2, ptr %x
38  ret void
39}
40
41define internal float @f(float %arg0) #1 {
42  %stack = alloca float, i32 4, align 4, addrspace(5)
43  store volatile float 3.0, ptr addrspace(5) %stack
44  %val = load volatile float, ptr addrspace(5) %stack
45  %add = fadd float %arg0, %val
46  ret float %add
47}
48
49; test a kernel without an external call that occurs after its callee in the module
50; CHECK-LABEL: test2
51; CHECK:     .private_segment_fixed_size: 20
52
53; GFX7:     .sgpr_count:     37
54; GFX7:     .sgpr_spill_count: 0
55; GFX7:     .vgpr_count:     4
56; GFX7:     .vgpr_spill_count: 0
57
58; GFX8:     .sgpr_count:     39
59; GFX8:     .sgpr_spill_count: 0
60; GFX8:     .vgpr_count:     4
61; GFX8:     .vgpr_spill_count: 0
62
63; GFX9:     .sgpr_count:     39
64; GFX9:     .sgpr_spill_count: 0
65; GFX9:     .vgpr_count:     4
66; GFX9:     .vgpr_spill_count: 0
67
68; GFX10:     .sgpr_count:     33
69; GFX10:     .sgpr_spill_count: 0
70; GFX10:     .vgpr_count:     4
71; GFX10:     .vgpr_spill_count: 0
72define amdgpu_kernel void @test2(ptr %x) {
73  %1 = load volatile float, ptr %x
74  %2 = call float @f(float %1)
75  store volatile float %2, ptr %x
76  ret void
77}
78
79; test a kernel with an external call that occurs before its callee in the module
80; CHECK-LABEL: test3
81; CHECK:     .private_segment_fixed_size: 5310
82
83; GFX7:     .sgpr_count:     37
84; GFX7:     .sgpr_spill_count: 0
85; GFX7:     .vgpr_count:     32
86; GFX7:     .vgpr_spill_count: 0
87
88; GFX8:     .sgpr_count:     39
89; GFX8:     .sgpr_spill_count: 0
90; GFX8:     .vgpr_count:     32
91; GFX8:     .vgpr_spill_count: 0
92
93; GFX9:     .sgpr_count:     39
94; GFX9:     .sgpr_spill_count: 0
95; GFX9:     .vgpr_count:     32
96; GFX9:     .vgpr_spill_count: 0
97
98; GFX10:     .sgpr_count:     35
99; GFX10:     .sgpr_spill_count: 0
100; GFX10:     .vgpr_count:     32
101; GFX10:     .vgpr_spill_count: 0
102define amdgpu_kernel void @test3() {
103  call void @g()
104  ret void
105}
106
107declare void @g() #0
108
109; test a kernel without an external call that occurs after its callee in the module
110; CHECK-LABEL: test4
111; CHECK:     .private_segment_fixed_size: 5310
112
113; GFX7:     .sgpr_count:     37
114; GFX7:     .sgpr_spill_count: 0
115; GFX7:     .vgpr_count:     32
116; GFX7:     .vgpr_spill_count: 0
117
118; GFX8:     .sgpr_count:     39
119; GFX8:     .sgpr_spill_count: 0
120; GFX8:     .vgpr_count:     32
121; GFX8:     .vgpr_spill_count: 0
122
123; GFX9:     .sgpr_count:     39
124; GFX9:     .sgpr_spill_count: 0
125; GFX9:     .vgpr_count:     32
126; GFX9:     .vgpr_spill_count: 0
127
128; GFX10:     .sgpr_count:     35
129; GFX10:     .sgpr_spill_count: 0
130; GFX10:     .vgpr_count:     32
131; GFX10:     .vgpr_spill_count: 0
132define amdgpu_kernel void @test4() {
133  call void @g()
134  ret void
135}
136
137attributes #0 = { norecurse }
138attributes #1 = { norecurse "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
139
140!llvm.module.flags = !{!0}
141!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
142