xref: /llvm-project/llvm/test/CodeGen/AMDGPU/ipra.ll (revision c897c13dde3bb413e723317c0579781fb6045a8b)
1; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; Kernels are not called, so there is no call preserved mask.
5; GCN-LABEL: {{^}}kernel:
6; GCN: flat_store_dword
7define amdgpu_kernel void @kernel(ptr addrspace(1) %out) #0 {
8entry:
9  store i32 0, ptr addrspace(1) %out
10  ret void
11}
12
13; GCN-LABEL: {{^}}func:
14; GCN: ; NumVgprs: 8
15define hidden void @func() #1 {
16  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
17  ret void
18}
19
20; GCN-LABEL: {{^}}kernel_call:
21; GCN-NOT: buffer_store
22; GCN-NOT: buffer_load
23; GCN-NOT: readlane
24; GCN-NOT: writelane
25; GCN: flat_load_dword v8
26; GCN: s_swappc_b64
27; GCN-NOT: buffer_store
28; GCN-NOT: buffer_load
29; GCN-NOT: readlane
30; GCN-NOT: writelane
31; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
32
33; GCN: ; TotalNumSgprs: 37
34; GCN: ; NumVgprs: 9
35define amdgpu_kernel void @kernel_call() #0 {
36  %vgpr = load volatile i32, ptr addrspace(1) undef
37  tail call void @func()
38  store volatile i32 %vgpr, ptr addrspace(1) undef
39  ret void
40}
41
42; GCN-LABEL: {{^}}func_regular_call:
43; GCN-NOT: buffer_load
44; GCN-NOT: readlane
45; GCN: flat_load_dword v8
46; GCN: s_swappc_b64
47; GCN-NOT: buffer_load
48; GCN-NOT: readlane
49; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
50
51; GCN: ; TotalNumSgprs: 34
52; GCN: ; NumVgprs: 10
53define void @func_regular_call() #1 {
54  %vgpr = load volatile i32, ptr addrspace(1) undef
55  tail call void @func()
56  store volatile i32 %vgpr, ptr addrspace(1) undef
57  ret void
58}
59
60; GCN-LABEL: {{^}}func_tail_call:
61; GCN: s_waitcnt
62; GCN-NEXT: s_getpc_b64 s[16:17]
63; GCN-NEXT: s_add_u32 s16,
64; GCN-NEXT: s_addc_u32 s17,
65; GCN-NEXT: s_setpc_b64 s[16:17]
66
67; GCN: ; TotalNumSgprs: 32
68; GCN: ; NumVgprs: 8
69define void @func_tail_call() #1 {
70  tail call void @func()
71  ret void
72}
73
74; GCN-LABEL: {{^}}func_call_tail_call:
75; GCN: flat_load_dword v8
76; GCN: s_swappc_b64
77; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
78; GCN: s_setpc_b64
79
80; GCN: ; TotalNumSgprs: 34
81; GCN: ; NumVgprs: 10
82define void @func_call_tail_call() #1 {
83  %vgpr = load volatile i32, ptr addrspace(1) undef
84  tail call void @func()
85  store volatile i32 %vgpr, ptr addrspace(1) undef
86  tail call void @func()
87  ret void
88}
89
90; GCN-LABEL: {{^}}void_func_void:
91define void @void_func_void() noinline {
92  ret void
93}
94
95; Make sure we don't get save/restore of FP between calls.
96; GCN-LABEL: {{^}}test_funcx2:
97; GCN: s_getpc_b64
98; GCN-NOT: s32
99; GCN: s_swappc_b64
100; GCN-NOT: s32
101; GCN: s_swappc_b64
102define void @test_funcx2() #0 {
103  call void @void_func_void()
104  call void @void_func_void()
105  ret void
106}
107
108; Make sure we save/restore the return address around the call.
109; Function Attrs: norecurse
110define internal void @hoge() #2 {
111bb:
112; GCN-LABEL: {{^}}hoge:
113; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30,
114; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
115; GCN: s_swappc_b64 s[30:31]
116; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]],
117; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]],
118; GCN: s_waitcnt vmcnt(0)
119; GCN: s_setpc_b64 s[30:31]
120  call void @eggs()
121  ret void
122}
123
124; GCN-LABEL: {{^}}wombat:
125define weak amdgpu_kernel void @wombat(ptr %arg, ptr %arg2) {
126bb:
127  call void @hoge() #0
128  ret void
129}
130
131declare dso_local void @eggs()
132
133
134attributes #0 = { nounwind }
135attributes #1 = { nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
136attributes #2 = { norecurse }
137