xref: /llvm-project/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll (revision 11b040192640ef3b1f481124c440f464ed6ec86a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-SDAG %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=FIXEDABI,FIXEDABI-GISEL %s
4
5; Test with gfx803 so that
6; addrspacecast/llvm.amdgcn.is.shared/llvm.amdgcn.is.private require
7; the queue ptr.  Tests with code object v3 and above to test
8; llvm.trap/llvm.debugtrap that require the queue ptr.
9
10
11declare hidden void @requires_all_inputs()
12
13; This function incorrectly is marked with the hints that the callee
14; does not require the implicit arguments to the function. Make sure
15; we do not crash.
16define void @parent_func_missing_inputs() #0 {
17; FIXEDABI-LABEL: parent_func_missing_inputs:
18; FIXEDABI:       ; %bb.0:
19; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; FIXEDABI-NEXT:    s_mov_b32 s16, s33
21; FIXEDABI-NEXT:    s_mov_b32 s33, s32
22; FIXEDABI-NEXT:    s_or_saveexec_b64 s[18:19], -1
23; FIXEDABI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
24; FIXEDABI-NEXT:    s_mov_b64 exec, s[18:19]
25; FIXEDABI-NEXT:    v_writelane_b32 v40, s16, 2
26; FIXEDABI-NEXT:    s_addk_i32 s32, 0x400
27; FIXEDABI-NEXT:    v_writelane_b32 v40, s30, 0
28; FIXEDABI-NEXT:    v_writelane_b32 v40, s31, 1
29; FIXEDABI-NEXT:    s_getpc_b64 s[16:17]
30; FIXEDABI-NEXT:    s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4
31; FIXEDABI-NEXT:    s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12
32; FIXEDABI-NEXT:    s_swappc_b64 s[30:31], s[16:17]
33; FIXEDABI-NEXT:    v_readlane_b32 s31, v40, 1
34; FIXEDABI-NEXT:    v_readlane_b32 s30, v40, 0
35; FIXEDABI-NEXT:    s_mov_b32 s32, s33
36; FIXEDABI-NEXT:    v_readlane_b32 s4, v40, 2
37; FIXEDABI-NEXT:    s_or_saveexec_b64 s[6:7], -1
38; FIXEDABI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
39; FIXEDABI-NEXT:    s_mov_b64 exec, s[6:7]
40; FIXEDABI-NEXT:    s_mov_b32 s33, s4
41; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
42; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
43  call void @requires_all_inputs()
44  ret void
45}
46
47define amdgpu_kernel void @parent_kernel_missing_inputs() #0 {
48; FIXEDABI-SDAG-LABEL: parent_kernel_missing_inputs:
49; FIXEDABI-SDAG:       ; %bb.0:
50; FIXEDABI-SDAG-NEXT:    s_add_i32 s4, s4, s9
51; FIXEDABI-SDAG-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
52; FIXEDABI-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
53; FIXEDABI-SDAG-NEXT:    s_add_u32 s0, s0, s9
54; FIXEDABI-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
55; FIXEDABI-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
56; FIXEDABI-SDAG-NEXT:    s_addc_u32 s1, s1, 0
57; FIXEDABI-SDAG-NEXT:    s_mov_b32 s14, s8
58; FIXEDABI-SDAG-NEXT:    v_or_b32_e32 v31, v0, v2
59; FIXEDABI-SDAG-NEXT:    s_mov_b64 s[8:9], 0
60; FIXEDABI-SDAG-NEXT:    s_mov_b32 s12, s6
61; FIXEDABI-SDAG-NEXT:    s_mov_b32 s13, s7
62; FIXEDABI-SDAG-NEXT:    s_mov_b32 s32, 0
63; FIXEDABI-SDAG-NEXT:    s_mov_b32 flat_scratch_lo, s5
64; FIXEDABI-SDAG-NEXT:    s_getpc_b64 s[4:5]
65; FIXEDABI-SDAG-NEXT:    s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
66; FIXEDABI-SDAG-NEXT:    s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
67; FIXEDABI-SDAG-NEXT:    s_swappc_b64 s[30:31], s[4:5]
68; FIXEDABI-SDAG-NEXT:    s_endpgm
69;
70; FIXEDABI-GISEL-LABEL: parent_kernel_missing_inputs:
71; FIXEDABI-GISEL:       ; %bb.0:
72; FIXEDABI-GISEL-NEXT:    s_add_i32 s4, s4, s9
73; FIXEDABI-GISEL-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
74; FIXEDABI-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
75; FIXEDABI-GISEL-NEXT:    s_add_u32 s0, s0, s9
76; FIXEDABI-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
77; FIXEDABI-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 20, v2
78; FIXEDABI-GISEL-NEXT:    s_addc_u32 s1, s1, 0
79; FIXEDABI-GISEL-NEXT:    s_mov_b32 s14, s8
80; FIXEDABI-GISEL-NEXT:    v_or_b32_e32 v31, v0, v1
81; FIXEDABI-GISEL-NEXT:    s_mov_b64 s[8:9], 0
82; FIXEDABI-GISEL-NEXT:    s_mov_b32 s12, s6
83; FIXEDABI-GISEL-NEXT:    s_mov_b32 s13, s7
84; FIXEDABI-GISEL-NEXT:    s_mov_b32 s32, 0
85; FIXEDABI-GISEL-NEXT:    s_mov_b32 flat_scratch_lo, s5
86; FIXEDABI-GISEL-NEXT:    s_getpc_b64 s[4:5]
87; FIXEDABI-GISEL-NEXT:    s_add_u32 s4, s4, requires_all_inputs@rel32@lo+4
88; FIXEDABI-GISEL-NEXT:    s_addc_u32 s5, s5, requires_all_inputs@rel32@hi+12
89; FIXEDABI-GISEL-NEXT:    s_swappc_b64 s[30:31], s[4:5]
90; FIXEDABI-GISEL-NEXT:    s_endpgm
91  call void @requires_all_inputs()
92  ret void
93}
94
95; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
96define void @marked_func_use_workitem_id(ptr addrspace(1) %ptr) #0 {
97; FIXEDABI-SDAG-LABEL: marked_func_use_workitem_id:
98; FIXEDABI-SDAG:       ; %bb.0:
99; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; FIXEDABI-SDAG-NEXT:    v_and_b32_e32 v2, 0x3ff, v31
101; FIXEDABI-SDAG-NEXT:    flat_store_dword v[0:1], v2
102; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
103; FIXEDABI-SDAG-NEXT:    v_bfe_u32 v2, v31, 10, 10
104; FIXEDABI-SDAG-NEXT:    flat_store_dword v[0:1], v2
105; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
106; FIXEDABI-SDAG-NEXT:    v_bfe_u32 v2, v31, 20, 10
107; FIXEDABI-SDAG-NEXT:    flat_store_dword v[0:1], v2
108; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
109; FIXEDABI-SDAG-NEXT:    s_setpc_b64 s[30:31]
110;
111; FIXEDABI-GISEL-LABEL: marked_func_use_workitem_id:
112; FIXEDABI-GISEL:       ; %bb.0:
113; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114; FIXEDABI-GISEL-NEXT:    v_and_b32_e32 v2, 0x3ff, v31
115; FIXEDABI-GISEL-NEXT:    v_bfe_u32 v3, v31, 10, 10
116; FIXEDABI-GISEL-NEXT:    v_bfe_u32 v4, v31, 20, 10
117; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v2
118; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
119; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v3
120; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
121; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v4
122; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
123; FIXEDABI-GISEL-NEXT:    s_setpc_b64 s[30:31]
124  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
125  %id.y = call i32 @llvm.amdgcn.workitem.id.y()
126  %id.z = call i32 @llvm.amdgcn.workitem.id.z()
127  store volatile i32 %id.x, ptr addrspace(1) %ptr
128  store volatile i32 %id.y, ptr addrspace(1) %ptr
129  store volatile i32 %id.z, ptr addrspace(1) %ptr
130  ret void
131}
132
133; Function is marked with amdgpu-no-workitem-id-* but uses them anyway
134define amdgpu_kernel void @marked_kernel_use_workitem_id(ptr addrspace(1) %ptr) #0 {
135; FIXEDABI-LABEL: marked_kernel_use_workitem_id:
136; FIXEDABI:       ; %bb.0:
137; FIXEDABI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
138; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
139; FIXEDABI-NEXT:    v_mov_b32_e32 v4, s1
140; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s0
141; FIXEDABI-NEXT:    flat_store_dword v[3:4], v0
142; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
143; FIXEDABI-NEXT:    flat_store_dword v[3:4], v1
144; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
145; FIXEDABI-NEXT:    flat_store_dword v[3:4], v2
146; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
147; FIXEDABI-NEXT:    s_endpgm
148  %id.x = call i32 @llvm.amdgcn.workitem.id.x()
149  %id.y = call i32 @llvm.amdgcn.workitem.id.y()
150  %id.z = call i32 @llvm.amdgcn.workitem.id.z()
151  store volatile i32 %id.x, ptr addrspace(1) %ptr
152  store volatile i32 %id.y, ptr addrspace(1) %ptr
153  store volatile i32 %id.z, ptr addrspace(1) %ptr
154  ret void
155}
156
157define void @marked_func_use_workgroup_id(ptr addrspace(1) %ptr) #0 {
158; FIXEDABI-LABEL: marked_func_use_workgroup_id:
159; FIXEDABI:       ; %bb.0:
160; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s12
162; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
163; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
164; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s13
165; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
166; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
167; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s14
168; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
169; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
170; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
171  %id.x = call i32 @llvm.amdgcn.workgroup.id.x()
172  %id.y = call i32 @llvm.amdgcn.workgroup.id.y()
173  %id.z = call i32 @llvm.amdgcn.workgroup.id.z()
174  store volatile i32 %id.x, ptr addrspace(1) %ptr
175  store volatile i32 %id.y, ptr addrspace(1) %ptr
176  store volatile i32 %id.z, ptr addrspace(1) %ptr
177  ret void
178}
179
180define amdgpu_kernel void @marked_kernel_use_workgroup_id(ptr addrspace(1) %ptr) #0 {
181; FIXEDABI-LABEL: marked_kernel_use_workgroup_id:
182; FIXEDABI:       ; %bb.0:
183; FIXEDABI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
184; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s6
185; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
186; FIXEDABI-NEXT:    v_mov_b32_e32 v0, s0
187; FIXEDABI-NEXT:    v_mov_b32_e32 v1, s1
188; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
189; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
190; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s7
191; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
192; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
193; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s8
194; FIXEDABI-NEXT:    flat_store_dword v[0:1], v2
195; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
196; FIXEDABI-NEXT:    s_endpgm
197  %id.x = call i32 @llvm.amdgcn.workgroup.id.x()
198  %id.y = call i32 @llvm.amdgcn.workgroup.id.y()
199  %id.z = call i32 @llvm.amdgcn.workgroup.id.z()
200  store volatile i32 %id.x, ptr addrspace(1) %ptr
201  store volatile i32 %id.y, ptr addrspace(1) %ptr
202  store volatile i32 %id.z, ptr addrspace(1) %ptr
203  ret void
204}
205
206define void @marked_func_use_other_sgpr(ptr addrspace(1) %ptr) #0 {
207; FIXEDABI-LABEL: marked_func_use_other_sgpr:
208; FIXEDABI:       ; %bb.0:
209; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s6
211; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s7
212; FIXEDABI-NEXT:    flat_load_ubyte v2, v[2:3] glc
213; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
214; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s8
215; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s9
216; FIXEDABI-NEXT:    flat_load_ubyte v2, v[2:3] glc
217; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
218; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s4
219; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s5
220; FIXEDABI-NEXT:    flat_load_ubyte v2, v[2:3] glc
221; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
222; FIXEDABI-NEXT:    v_mov_b32_e32 v2, s10
223; FIXEDABI-NEXT:    v_mov_b32_e32 v3, s11
224; FIXEDABI-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
225; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
226; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
227  %queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
228  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
229  %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
230  %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
231  %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
232  %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
233  %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
234  store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
235  ret void
236}
237
238define amdgpu_kernel void @marked_kernel_use_other_sgpr(ptr addrspace(1) %ptr) #0 {
239; FIXEDABI-LABEL: marked_kernel_use_other_sgpr:
240; FIXEDABI:       ; %bb.0:
241; FIXEDABI-NEXT:    s_add_u32 s0, s4, 8
242; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
243; FIXEDABI-NEXT:    s_addc_u32 s1, s5, 0
244; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
245; FIXEDABI-NEXT:    v_mov_b32_e32 v0, s0
246; FIXEDABI-NEXT:    v_mov_b32_e32 v1, s1
247; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
248; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
249; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
250; FIXEDABI-NEXT:    s_endpgm
251  %queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
252  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
253  %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
254  %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
255  %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
256  %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
257  %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
258  ret void
259}
260
261define amdgpu_kernel void @marked_kernel_nokernargs_implicitarg_ptr() #0 {
262; FIXEDABI-LABEL: marked_kernel_nokernargs_implicitarg_ptr:
263; FIXEDABI:       ; %bb.0:
264; FIXEDABI-NEXT:    v_mov_b32_e32 v0, 0
265; FIXEDABI-NEXT:    v_mov_b32_e32 v1, 0
266; FIXEDABI-NEXT:    flat_load_ubyte v0, v[0:1] glc
267; FIXEDABI-NEXT:    s_endpgm
268  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
269  %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
270  ret void
271}
272
273; On gfx8, the queue ptr is required for this addrspacecast.
274define void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) #0 {
275; FIXEDABI-SDAG-LABEL: addrspacecast_requires_queue_ptr:
276; FIXEDABI-SDAG:       ; %bb.0:
277; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278; FIXEDABI-SDAG-NEXT:    s_mov_b64 s[4:5], 0xc0
279; FIXEDABI-SDAG-NEXT:    s_load_dword s6, s[4:5], 0x0
280; FIXEDABI-SDAG-NEXT:    s_mov_b64 s[4:5], 0xc4
281; FIXEDABI-SDAG-NEXT:    s_load_dword s4, s[4:5], 0x0
282; FIXEDABI-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
283; FIXEDABI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
284; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v2, s6
285; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
286; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
287; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v0, s4
288; FIXEDABI-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
289; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v5, 0, v0, vcc
290; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v0, 1
291; FIXEDABI-SDAG-NEXT:    v_cndmask_b32_e32 v4, 0, v1, vcc
292; FIXEDABI-SDAG-NEXT:    flat_store_dword v[2:3], v0
293; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0)
294; FIXEDABI-SDAG-NEXT:    v_mov_b32_e32 v0, 2
295; FIXEDABI-SDAG-NEXT:    flat_store_dword v[4:5], v0
296; FIXEDABI-SDAG-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
297; FIXEDABI-SDAG-NEXT:    s_setpc_b64 s[30:31]
298;
299; FIXEDABI-GISEL-LABEL: addrspacecast_requires_queue_ptr:
300; FIXEDABI-GISEL:       ; %bb.0:
301; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302; FIXEDABI-GISEL-NEXT:    s_mov_b64 s[4:5], 0xc0
303; FIXEDABI-GISEL-NEXT:    s_load_dword s6, s[4:5], 0x0
304; FIXEDABI-GISEL-NEXT:    s_mov_b64 s[4:5], 0xc4
305; FIXEDABI-GISEL-NEXT:    s_load_dword s4, s[4:5], 0x0
306; FIXEDABI-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
307; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v2, 0, v0, vcc
308; FIXEDABI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
309; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v3, s6
310; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
311; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v4, s4
312; FIXEDABI-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
313; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
314; FIXEDABI-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
315; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v4, 1
316; FIXEDABI-GISEL-NEXT:    flat_store_dword v[2:3], v4
317; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0)
318; FIXEDABI-GISEL-NEXT:    v_mov_b32_e32 v2, 2
319; FIXEDABI-GISEL-NEXT:    flat_store_dword v[0:1], v2
320; FIXEDABI-GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
321; FIXEDABI-GISEL-NEXT:    s_setpc_b64 s[30:31]
322  %flat.private = addrspacecast ptr addrspace(5) %ptr.private to ptr
323  %flat.local = addrspacecast ptr addrspace(3) %ptr.local to ptr
324  store volatile i32 1, ptr %flat.private
325  store volatile i32 2, ptr %flat.local
326  ret void
327}
328
329define void @is_shared_requires_queue_ptr(ptr %ptr) #0 {
330; FIXEDABI-LABEL: is_shared_requires_queue_ptr:
331; FIXEDABI:       ; %bb.0:
332; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333; FIXEDABI-NEXT:    s_mov_b64 s[4:5], 0xc4
334; FIXEDABI-NEXT:    s_load_dword s4, s[4:5], 0x0
335; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
336; FIXEDABI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
337; FIXEDABI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
338; FIXEDABI-NEXT:    flat_store_dword v[0:1], v0
339; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
340; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
341  %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
342  %zext = zext i1 %is.shared to i32
343  store volatile i32 %zext, ptr addrspace(1) undef
344  ret void
345}
346
347define void @is_private_requires_queue_ptr(ptr %ptr) #0 {
348; FIXEDABI-LABEL: is_private_requires_queue_ptr:
349; FIXEDABI:       ; %bb.0:
350; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351; FIXEDABI-NEXT:    s_mov_b64 s[4:5], 0xc0
352; FIXEDABI-NEXT:    s_load_dword s4, s[4:5], 0x0
353; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
354; FIXEDABI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
355; FIXEDABI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
356; FIXEDABI-NEXT:    flat_store_dword v[0:1], v0
357; FIXEDABI-NEXT:    s_waitcnt vmcnt(0)
358; FIXEDABI-NEXT:    s_setpc_b64 s[30:31]
359  %is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr)
360  %zext = zext i1 %is.private to i32
361  store volatile i32 %zext, ptr addrspace(1) undef
362  ret void
363}
364
365define void @trap_requires_queue() #0 {
366; FIXEDABI-LABEL: trap_requires_queue:
367; FIXEDABI:       ; %bb.0:
368; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369; FIXEDABI-NEXT:    s_mov_b64 s[4:5], 0xc8
370; FIXEDABI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
371; FIXEDABI-NEXT:    s_waitcnt lgkmcnt(0)
372; FIXEDABI-NEXT:    s_trap 2
373  call void @llvm.trap()
374  unreachable
375}
376
377define void @debugtrap_requires_queue() #0 {
378; FIXEDABI-LABEL: debugtrap_requires_queue:
379; FIXEDABI:       ; %bb.0:
380; FIXEDABI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
381; FIXEDABI-NEXT:    s_trap 3
382  call void @llvm.debugtrap()
383  unreachable
384}
385
386declare i32 @llvm.amdgcn.workitem.id.x()
387declare i32 @llvm.amdgcn.workitem.id.y()
388declare i32 @llvm.amdgcn.workitem.id.z()
389declare i32 @llvm.amdgcn.workgroup.id.x()
390declare i32 @llvm.amdgcn.workgroup.id.y()
391declare i32 @llvm.amdgcn.workgroup.id.z()
392declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
393declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
394declare i64 @llvm.amdgcn.dispatch.id()
395declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
396declare i1 @llvm.amdgcn.is.shared(ptr)
397declare i1 @llvm.amdgcn.is.private(ptr)
398declare void @llvm.trap()
399declare void @llvm.debugtrap()
400
401attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
402
403!llvm.module.flags = !{!0}
404!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
405