xref: /llvm-project/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLAT_SCR_OPT %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 --mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLAT_SCR_ARCH %s
4
5declare void @extern_func() #0
6
7define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
8; FLAT_SCR_OPT-LABEL: stack_object_addrspacecast_in_kernel_no_calls:
9; FLAT_SCR_OPT:       ; %bb.0:
10; FLAT_SCR_OPT-NEXT:    s_add_u32 s8, s8, s13
11; FLAT_SCR_OPT-NEXT:    s_addc_u32 s9, s9, 0
12; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
13; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
14; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[0:1], src_private_base
15; FLAT_SCR_OPT-NEXT:    v_mov_b32_e32 v0, 0
16; FLAT_SCR_OPT-NEXT:    v_mov_b32_e32 v1, s1
17; FLAT_SCR_OPT-NEXT:    v_mov_b32_e32 v2, 0
18; FLAT_SCR_OPT-NEXT:    flat_store_dword v[0:1], v2
19; FLAT_SCR_OPT-NEXT:    s_waitcnt_vscnt null, 0x0
20; FLAT_SCR_OPT-NEXT:    s_endpgm
21;
22; FLAT_SCR_ARCH-LABEL: stack_object_addrspacecast_in_kernel_no_calls:
23; FLAT_SCR_ARCH:       ; %bb.0:
24; FLAT_SCR_ARCH-NEXT:    s_mov_b64 s[0:1], src_private_base
25; FLAT_SCR_ARCH-NEXT:    v_mov_b32_e32 v0, 0
26; FLAT_SCR_ARCH-NEXT:    v_mov_b32_e32 v1, s1
27; FLAT_SCR_ARCH-NEXT:    v_mov_b32_e32 v2, 0
28; FLAT_SCR_ARCH-NEXT:    flat_store_dword v[0:1], v2
29; FLAT_SCR_ARCH-NEXT:    s_waitcnt_vscnt null, 0x0
30; FLAT_SCR_ARCH-NEXT:    s_endpgm
31  %alloca = alloca i32, addrspace(5)
32  %cast = addrspacecast ptr addrspace(5) %alloca to ptr
33  store volatile i32 0, ptr %cast
34  ret void
35}
36
37define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
38; FLAT_SCR_OPT-LABEL: stack_object_in_kernel_no_calls:
39; FLAT_SCR_OPT:       ; %bb.0:
40; FLAT_SCR_OPT-NEXT:    s_add_u32 s8, s8, s13
41; FLAT_SCR_OPT-NEXT:    s_addc_u32 s9, s9, 0
42; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
43; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
44; FLAT_SCR_OPT-NEXT:    v_mov_b32_e32 v0, 0
45; FLAT_SCR_OPT-NEXT:    s_mov_b32 s0, 0
46; FLAT_SCR_OPT-NEXT:    scratch_store_dword off, v0, s0
47; FLAT_SCR_OPT-NEXT:    s_waitcnt_vscnt null, 0x0
48; FLAT_SCR_OPT-NEXT:    s_endpgm
49;
50; FLAT_SCR_ARCH-LABEL: stack_object_in_kernel_no_calls:
51; FLAT_SCR_ARCH:       ; %bb.0:
52; FLAT_SCR_ARCH-NEXT:    v_mov_b32_e32 v0, 0
53; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s0, 0
54; FLAT_SCR_ARCH-NEXT:    scratch_store_dword off, v0, s0
55; FLAT_SCR_ARCH-NEXT:    s_waitcnt_vscnt null, 0x0
56; FLAT_SCR_ARCH-NEXT:    s_endpgm
57  %alloca = alloca i32, addrspace(5)
58  store volatile i32 0, ptr addrspace(5) %alloca
59  ret void
60}
61
62define amdgpu_kernel void @kernel_calls_no_stack() {
63; FLAT_SCR_OPT-LABEL: kernel_calls_no_stack:
64; FLAT_SCR_OPT:       ; %bb.0:
65; FLAT_SCR_OPT-NEXT:    s_add_u32 s8, s8, s13
66; FLAT_SCR_OPT-NEXT:    s_mov_b32 s32, 0
67; FLAT_SCR_OPT-NEXT:    s_addc_u32 s9, s9, 0
68; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8
69; FLAT_SCR_OPT-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
70; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[8:9], s[4:5]
71; FLAT_SCR_OPT-NEXT:    s_getpc_b64 s[4:5]
72; FLAT_SCR_OPT-NEXT:    s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
73; FLAT_SCR_OPT-NEXT:    s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
74; FLAT_SCR_OPT-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
75; FLAT_SCR_OPT-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
76; FLAT_SCR_OPT-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
77; FLAT_SCR_OPT-NEXT:    s_mov_b32 s14, s12
78; FLAT_SCR_OPT-NEXT:    s_mov_b32 s13, s11
79; FLAT_SCR_OPT-NEXT:    s_mov_b32 s12, s10
80; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[10:11], s[6:7]
81; FLAT_SCR_OPT-NEXT:    v_or3_b32 v31, v0, v1, v2
82; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[4:5], s[0:1]
83; FLAT_SCR_OPT-NEXT:    s_mov_b64 s[6:7], s[2:3]
84; FLAT_SCR_OPT-NEXT:    s_waitcnt lgkmcnt(0)
85; FLAT_SCR_OPT-NEXT:    s_swappc_b64 s[30:31], s[16:17]
86; FLAT_SCR_OPT-NEXT:    s_endpgm
87;
88; FLAT_SCR_ARCH-LABEL: kernel_calls_no_stack:
89; FLAT_SCR_ARCH:       ; %bb.0:
90; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s13, s9
91; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s12, s8
92; FLAT_SCR_ARCH-NEXT:    s_mov_b64 s[8:9], s[4:5]
93; FLAT_SCR_ARCH-NEXT:    s_getpc_b64 s[4:5]
94; FLAT_SCR_ARCH-NEXT:    s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
95; FLAT_SCR_ARCH-NEXT:    s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
96; FLAT_SCR_ARCH-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
97; FLAT_SCR_ARCH-NEXT:    s_load_dwordx2 s[16:17], s[4:5], 0x0
98; FLAT_SCR_ARCH-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
99; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s14, s10
100; FLAT_SCR_ARCH-NEXT:    s_mov_b64 s[10:11], s[6:7]
101; FLAT_SCR_ARCH-NEXT:    s_mov_b64 s[4:5], s[0:1]
102; FLAT_SCR_ARCH-NEXT:    s_mov_b64 s[6:7], s[2:3]
103; FLAT_SCR_ARCH-NEXT:    v_or3_b32 v31, v0, v1, v2
104; FLAT_SCR_ARCH-NEXT:    s_mov_b32 s32, 0
105; FLAT_SCR_ARCH-NEXT:    s_waitcnt lgkmcnt(0)
106; FLAT_SCR_ARCH-NEXT:    s_swappc_b64 s[30:31], s[16:17]
107; FLAT_SCR_ARCH-NEXT:    s_endpgm
108  call void @extern_func()
109  ret void
110}
111
112define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) {
113; GCN-LABEL: test:
114; GCN:       ; %bb.0:
115; GCN-NEXT:    s_clause 0x1
116; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
117; GCN-NEXT:    s_load_dword vcc_lo, s[4:5], 0x8
118; GCN-NEXT:    ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
119; GCN-NEXT:    ; kill: killed $sgpr4_sgpr5
120; GCN-NEXT:    s_waitcnt lgkmcnt(0)
121; GCN-NEXT:    v_writelane_b32 v0, s0, 0
122; GCN-NEXT:    v_writelane_b32 v0, s1, 1
123; GCN-NEXT:    ;;#ASMSTART
124; GCN-NEXT:    ;;#ASMEND
125; GCN-NEXT:    ;;#ASMSTART
126; GCN-NEXT:    ;;#ASMEND
127; GCN-NEXT:    ;;#ASMSTART
128; GCN-NEXT:    ;;#ASMEND
129; GCN-NEXT:    ;;#ASMSTART
130; GCN-NEXT:    ;;#ASMEND
131; GCN-NEXT:    ;;#ASMSTART
132; GCN-NEXT:    ;;#ASMEND
133; GCN-NEXT:    ;;#ASMSTART
134; GCN-NEXT:    ;;#ASMEND
135; GCN-NEXT:    ;;#ASMSTART
136; GCN-NEXT:    ;;#ASMEND
137; GCN-NEXT:    ;;#ASMSTART
138; GCN-NEXT:    ;;#ASMEND
139; GCN-NEXT:    ;;#ASMSTART
140; GCN-NEXT:    ;;#ASMEND
141; GCN-NEXT:    ;;#ASMSTART
142; GCN-NEXT:    ;;#ASMEND
143; GCN-NEXT:    ;;#ASMSTART
144; GCN-NEXT:    ;;#ASMEND
145; GCN-NEXT:    ;;#ASMSTART
146; GCN-NEXT:    ;;#ASMEND
147; GCN-NEXT:    ;;#ASMSTART
148; GCN-NEXT:    ;;#ASMEND
149; GCN-NEXT:    ;;#ASMSTART
150; GCN-NEXT:    ;;#ASMEND
151; GCN-NEXT:    ;;#ASMSTART
152; GCN-NEXT:    ;;#ASMEND
153; GCN-NEXT:    v_readlane_b32 s0, v0, 0
154; GCN-NEXT:    v_mov_b32_e32 v1, vcc_lo
155; GCN-NEXT:    v_readlane_b32 s1, v0, 1
156; GCN-NEXT:    v_mov_b32_e32 v2, 0
157; GCN-NEXT:    ;;#ASMSTART
158; GCN-NEXT:    ;;#ASMEND
159; GCN-NEXT:    ;;#ASMSTART
160; GCN-NEXT:    ;;#ASMEND
161; GCN-NEXT:    ;;#ASMSTART
162; GCN-NEXT:    ;;#ASMEND
163; GCN-NEXT:    ;;#ASMSTART
164; GCN-NEXT:    ;;#ASMEND
165; GCN-NEXT:    ;;#ASMSTART
166; GCN-NEXT:    ;;#ASMEND
167; GCN-NEXT:    ;;#ASMSTART
168; GCN-NEXT:    ;;#ASMEND
169; GCN-NEXT:    ;;#ASMSTART
170; GCN-NEXT:    ;;#ASMEND
171; GCN-NEXT:    ;;#ASMSTART
172; GCN-NEXT:    ;;#ASMEND
173; GCN-NEXT:    ;;#ASMSTART
174; GCN-NEXT:    ;;#ASMEND
175; GCN-NEXT:    ;;#ASMSTART
176; GCN-NEXT:    ;;#ASMEND
177; GCN-NEXT:    ;;#ASMSTART
178; GCN-NEXT:    ;;#ASMEND
179; GCN-NEXT:    ;;#ASMSTART
180; GCN-NEXT:    ;;#ASMEND
181; GCN-NEXT:    ;;#ASMSTART
182; GCN-NEXT:    ;;#ASMEND
183; GCN-NEXT:    ;;#ASMSTART
184; GCN-NEXT:    ;;#ASMEND
185; GCN-NEXT:    ;;#ASMSTART
186; GCN-NEXT:    ;;#ASMEND
187; GCN-NEXT:    ;;#ASMSTART
188; GCN-NEXT:    ;;#ASMEND
189; GCN-NEXT:    ;;#ASMSTART
190; GCN-NEXT:    ;;#ASMEND
191; GCN-NEXT:    ;;#ASMSTART
192; GCN-NEXT:    ;;#ASMEND
193; GCN-NEXT:    ;;#ASMSTART
194; GCN-NEXT:    ;;#ASMEND
195; GCN-NEXT:    ;;#ASMSTART
196; GCN-NEXT:    ;;#ASMEND
197; GCN-NEXT:    ;;#ASMSTART
198; GCN-NEXT:    ;;#ASMEND
199; GCN-NEXT:    ;;#ASMSTART
200; GCN-NEXT:    ;;#ASMEND
201; GCN-NEXT:    ;;#ASMSTART
202; GCN-NEXT:    ;;#ASMEND
203; GCN-NEXT:    ;;#ASMSTART
204; GCN-NEXT:    ;;#ASMEND
205; GCN-NEXT:    ;;#ASMSTART
206; GCN-NEXT:    ;;#ASMEND
207; GCN-NEXT:    ;;#ASMSTART
208; GCN-NEXT:    ;;#ASMEND
209; GCN-NEXT:    ;;#ASMSTART
210; GCN-NEXT:    ;;#ASMEND
211; GCN-NEXT:    ;;#ASMSTART
212; GCN-NEXT:    ;;#ASMEND
213; GCN-NEXT:    ;;#ASMSTART
214; GCN-NEXT:    ;;#ASMEND
215; GCN-NEXT:    ;;#ASMSTART
216; GCN-NEXT:    ;;#ASMEND
217; GCN-NEXT:    ;;#ASMSTART
218; GCN-NEXT:    ;;#ASMEND
219; GCN-NEXT:    global_store_dword v2, v1, s[0:1]
220; GCN-NEXT:    s_endpgm
221  call void asm sideeffect "", "~{s[0:7]}" ()
222  call void asm sideeffect "", "~{s[8:15]}" ()
223  call void asm sideeffect "", "~{s[16:23]}" ()
224  call void asm sideeffect "", "~{s[24:31]}" ()
225  call void asm sideeffect "", "~{s[32:39]}" ()
226  call void asm sideeffect "", "~{s[40:47]}" ()
227  call void asm sideeffect "", "~{s[48:55]}" ()
228  call void asm sideeffect "", "~{s[56:63]}" ()
229  call void asm sideeffect "", "~{s[64:71]}" ()
230  call void asm sideeffect "", "~{s[72:79]}" ()
231  call void asm sideeffect "", "~{s[80:87]}" ()
232  call void asm sideeffect "", "~{s[88:95]}" ()
233  call void asm sideeffect "", "~{s[96:103]}" ()
234  call void asm sideeffect "", "~{s[104:105]}" ()
235  call void asm sideeffect "", "~{v[1:7]}" ()
236  call void asm sideeffect "", "~{v[8:15]}" ()
237  call void asm sideeffect "", "~{v[16:23]}" ()
238  call void asm sideeffect "", "~{v[24:31]}" ()
239  call void asm sideeffect "", "~{v[32:39]}" ()
240  call void asm sideeffect "", "~{v[40:47]}" ()
241  call void asm sideeffect "", "~{v[48:55]}" ()
242  call void asm sideeffect "", "~{v[56:63]}" ()
243  call void asm sideeffect "", "~{v[64:71]}" ()
244  call void asm sideeffect "", "~{v[72:79]}" ()
245  call void asm sideeffect "", "~{v[80:87]}" ()
246  call void asm sideeffect "", "~{v[88:95]}" ()
247  call void asm sideeffect "", "~{v[96:103]}" ()
248  call void asm sideeffect "", "~{v[104:111]}" ()
249  call void asm sideeffect "", "~{v[112:119]}" ()
250  call void asm sideeffect "", "~{v[120:127]}" ()
251  call void asm sideeffect "", "~{v[128:135]}" ()
252  call void asm sideeffect "", "~{v[136:143]}" ()
253  call void asm sideeffect "", "~{v[144:151]}" ()
254  call void asm sideeffect "", "~{v[152:159]}" ()
255  call void asm sideeffect "", "~{v[160:167]}" ()
256  call void asm sideeffect "", "~{v[168:175]}" ()
257  call void asm sideeffect "", "~{v[176:183]}" ()
258  call void asm sideeffect "", "~{v[184:191]}" ()
259  call void asm sideeffect "", "~{v[192:199]}" ()
260  call void asm sideeffect "", "~{v[200:207]}" ()
261  call void asm sideeffect "", "~{v[208:215]}" ()
262  call void asm sideeffect "", "~{v[216:223]}" ()
263  call void asm sideeffect "", "~{v[224:231]}" ()
264  call void asm sideeffect "", "~{v[232:239]}" ()
265  call void asm sideeffect "", "~{v[240:247]}" ()
266  call void asm sideeffect "", "~{v[248:255]}" ()
267
268  store i32 %in, ptr addrspace(1) %out
269  ret void
270}
271
272define amdgpu_kernel void @kernel_no_calls_no_stack() {
273; GCN-LABEL: kernel_no_calls_no_stack:
274; GCN:       ; %bb.0:
275; GCN-NEXT:    s_endpgm
276  ret void
277}
278
279attributes #0 = { nounwind }
280
281!llvm.module.flags = !{!0}
282!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
283