xref: /llvm-project/llvm/test/CodeGen/AMDGPU/nested-calls.ll (revision cd57c9530b915aafac251b9f2757eca15027dc10)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,FIJI %s
3; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,HAWAII %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
5
6; Test calls when called by other callable functions rather than
7; kernels.
8
9declare void @external_void_func_i32(i32) #0
10
11; Spill CSR VGPR used for SGPR spilling
12define void @test_func_call_external_void_func_i32_imm() #0 {
13; GCN-LABEL: test_func_call_external_void_func_i32_imm:
14; GCN:       ; %bb.0:
15; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GCN-NEXT:    s_mov_b32 s16, s33
17; GCN-NEXT:    s_mov_b32 s33, s32
18; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
19; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
20; GCN-NEXT:    s_mov_b64 exec, s[18:19]
21; GCN-NEXT:    s_addk_i32 s32, 0x400
22; GCN-NEXT:    v_writelane_b32 v40, s16, 2
23; GCN-NEXT:    s_getpc_b64 s[16:17]
24; GCN-NEXT:    s_add_u32 s16, s16, external_void_func_i32@gotpcrel32@lo+4
25; GCN-NEXT:    s_addc_u32 s17, s17, external_void_func_i32@gotpcrel32@hi+12
26; GCN-NEXT:    s_load_dwordx2 s[16:17], s[16:17], 0x0
27; GCN-NEXT:    v_writelane_b32 v40, s30, 0
28; GCN-NEXT:    v_mov_b32_e32 v0, 42
29; GCN-NEXT:    v_writelane_b32 v40, s31, 1
30; GCN-NEXT:    s_waitcnt lgkmcnt(0)
31; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
32; GCN-NEXT:    v_readlane_b32 s31, v40, 1
33; GCN-NEXT:    v_readlane_b32 s30, v40, 0
34; GCN-NEXT:    s_mov_b32 s32, s33
35; GCN-NEXT:    v_readlane_b32 s4, v40, 2
36; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
37; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
38; GCN-NEXT:    s_mov_b64 exec, s[6:7]
39; GCN-NEXT:    s_mov_b32 s33, s4
40; GCN-NEXT:    s_waitcnt vmcnt(0)
41; GCN-NEXT:    s_setpc_b64 s[30:31]
42  call void @external_void_func_i32(i32 42)
43  ret void
44}
45
46define void @test_func_call_external_void_func_i32_imm_stack_use() #0 {
47; GCN-LABEL: test_func_call_external_void_func_i32_imm_stack_use:
48; GCN:       ; %bb.0:
49; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50; GCN-NEXT:    s_mov_b32 s16, s33
51; GCN-NEXT:    s_mov_b32 s33, s32
52; GCN-NEXT:    s_or_saveexec_b64 s[18:19], -1
53; GCN-NEXT:    buffer_store_dword v40, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill
54; GCN-NEXT:    s_mov_b64 exec, s[18:19]
55; GCN-NEXT:    s_addk_i32 s32, 0x1400
56; GCN-NEXT:    v_writelane_b32 v40, s16, 2
57; GCN-NEXT:    s_getpc_b64 s[16:17]
58; GCN-NEXT:    s_add_u32 s16, s16, external_void_func_i32@gotpcrel32@lo+4
59; GCN-NEXT:    s_addc_u32 s17, s17, external_void_func_i32@gotpcrel32@hi+12
60; GCN-NEXT:    s_load_dwordx2 s[16:17], s[16:17], 0x0
61; GCN-NEXT:    v_mov_b32_e32 v0, 0
62; GCN-NEXT:    v_writelane_b32 v40, s30, 0
63; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33
64; GCN-NEXT:    s_waitcnt vmcnt(0)
65; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:64
66; GCN-NEXT:    s_waitcnt vmcnt(0)
67; GCN-NEXT:    v_mov_b32_e32 v0, 42
68; GCN-NEXT:    v_writelane_b32 v40, s31, 1
69; GCN-NEXT:    s_waitcnt lgkmcnt(0)
70; GCN-NEXT:    s_swappc_b64 s[30:31], s[16:17]
71; GCN-NEXT:    v_readlane_b32 s31, v40, 1
72; GCN-NEXT:    v_readlane_b32 s30, v40, 0
73; GCN-NEXT:    s_mov_b32 s32, s33
74; GCN-NEXT:    v_readlane_b32 s4, v40, 2
75; GCN-NEXT:    s_or_saveexec_b64 s[6:7], -1
76; GCN-NEXT:    buffer_load_dword v40, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload
77; GCN-NEXT:    s_mov_b64 exec, s[6:7]
78; GCN-NEXT:    s_mov_b32 s33, s4
79; GCN-NEXT:    s_waitcnt vmcnt(0)
80; GCN-NEXT:    s_setpc_b64 s[30:31]
81  %alloca = alloca [16 x i32], align 4, addrspace(5)
82  %gep15 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 16
83  store volatile i32 0, ptr addrspace(5) %alloca
84  store volatile i32 0, ptr addrspace(5) %gep15
85  call void @external_void_func_i32(i32 42)
86  ret void
87}
88
89attributes #0 = { nounwind }
90attributes #1 = { nounwind readnone }
91attributes #2 = { nounwind noinline }
92;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
93; FIJI: {{.*}}
94; GFX9: {{.*}}
95; HAWAII: {{.*}}
96