xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s
3; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SDAG %s
4; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
5; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-GISEL %s
6; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
7; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
8; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
9
10define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
11; CIT-LABEL: is_local_vgpr:
12; CIT:       ; %bb.0:
13; CIT-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
14; CIT-NEXT:    s_load_dword s4, s[6:7], 0x33
15; CIT-NEXT:    s_mov_b32 s2, 0
16; CIT-NEXT:    s_mov_b32 s3, 0x100f000
17; CIT-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
18; CIT-NEXT:    v_mov_b32_e32 v1, 0
19; CIT-NEXT:    s_waitcnt lgkmcnt(0)
20; CIT-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 glc
21; CIT-NEXT:    s_waitcnt vmcnt(0)
22; CIT-NEXT:    s_mov_b32 s2, -1
23; CIT-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
24; CIT-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
25; CIT-NEXT:    buffer_store_dword v0, off, s[0:3], 0
26; CIT-NEXT:    s_endpgm
27;
28; CIH-LABEL: is_local_vgpr:
29; CIH:       ; %bb.0:
30; CIH-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
31; CIH-NEXT:    s_load_dword s2, s[6:7], 0x33
32; CIH-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
33; CIH-NEXT:    s_waitcnt lgkmcnt(0)
34; CIH-NEXT:    v_mov_b32_e32 v1, s1
35; CIH-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
36; CIH-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
37; CIH-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
38; CIH-NEXT:    s_waitcnt vmcnt(0)
39; CIH-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
40; CIH-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
41; CIH-NEXT:    flat_store_dword v[0:1], v0
42; CIH-NEXT:    s_endpgm
43;
44; SI-LABEL: is_local_vgpr:
45; SI:       ; %bb.0:
46; SI-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
47; SI-NEXT:    s_load_dword s4, s[8:9], 0x33
48; SI-NEXT:    s_mov_b32 s2, 0
49; SI-NEXT:    s_mov_b32 s3, 0x100f000
50; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
51; SI-NEXT:    v_mov_b32_e32 v1, 0
52; SI-NEXT:    s_waitcnt lgkmcnt(0)
53; SI-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64 glc
54; SI-NEXT:    s_waitcnt vmcnt(0)
55; SI-NEXT:    s_mov_b32 s2, -1
56; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v1
57; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
58; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
59; SI-NEXT:    s_endpgm
60;
61; CI-SDAG-LABEL: is_local_vgpr:
62; CI-SDAG:       ; %bb.0:
63; CI-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
64; CI-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x33
65; CI-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
66; CI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
67; CI-SDAG-NEXT:    v_mov_b32_e32 v1, s1
68; CI-SDAG-NEXT:    v_add_i32_e32 v0, vcc, s0, v0
69; CI-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
70; CI-SDAG-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
71; CI-SDAG-NEXT:    s_waitcnt vmcnt(0)
72; CI-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
73; CI-SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
74; CI-SDAG-NEXT:    flat_store_dword v[0:1], v0
75; CI-SDAG-NEXT:    s_endpgm
76;
77; GFX9-LABEL: is_local_vgpr:
78; GFX9:       ; %bb.0:
79; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
80; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
81; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
82; GFX9-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc
83; GFX9-NEXT:    s_waitcnt vmcnt(0)
84; GFX9-NEXT:    s_mov_b64 s[0:1], src_shared_base
85; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, s1, v1
86; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
87; GFX9-NEXT:    global_store_dword v[0:1], v0, off
88; GFX9-NEXT:    s_endpgm
89;
90; CI-GISEL-LABEL: is_local_vgpr:
91; CI-GISEL:       ; %bb.0:
92; CI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
93; CI-GISEL-NEXT:    s_load_dword s2, s[8:9], 0x33
94; CI-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
95; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
96; CI-GISEL-NEXT:    v_mov_b32_e32 v0, s0
97; CI-GISEL-NEXT:    v_mov_b32_e32 v1, s1
98; CI-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
99; CI-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
100; CI-GISEL-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] glc
101; CI-GISEL-NEXT:    s_waitcnt vmcnt(0)
102; CI-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v1
103; CI-GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
104; CI-GISEL-NEXT:    flat_store_dword v[0:1], v0
105; CI-GISEL-NEXT:    s_endpgm
106;
107; GFX10-LABEL: is_local_vgpr:
108; GFX10:       ; %bb.0:
109; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
110; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
111; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
112; GFX10-NEXT:    global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
113; GFX10-NEXT:    s_waitcnt vmcnt(0)
114; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
115; GFX10-NEXT:    s_mov_b64 s[0:1], src_shared_base
116; GFX10-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
117; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
118; GFX10-NEXT:    global_store_dword v[0:1], v0, off
119; GFX10-NEXT:    s_endpgm
120;
121; GFX11-LABEL: is_local_vgpr:
122; GFX11:       ; %bb.0:
123; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
124; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
125; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
126; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
127; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
128; GFX11-NEXT:    global_load_b64 v[0:1], v0, s[0:1] glc dlc
129; GFX11-NEXT:    s_waitcnt vmcnt(0)
130; GFX11-NEXT:    s_mov_b64 s[0:1], src_shared_base
131; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v1
132; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
133; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
134; GFX11-NEXT:    s_endpgm
135  %id = call i32 @llvm.amdgcn.workitem.id.x()
136  %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
137  %ptr = load volatile ptr, ptr addrspace(1) %gep
138  %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
139  %ext = zext i1 %val to i32
140  store i32 %ext, ptr addrspace(1) undef
141  ret void
142}
143
144; FIXME: setcc (zero_extend (setcc)), 1) not folded out, resulting in
145; select and vcc branch.
146define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
147; CIT-LABEL: is_local_sgpr:
148; CIT:       ; %bb.0:
149; CIT-NEXT:    s_load_dword s0, s[6:7], 0x1
150; CIT-NEXT:    s_load_dword s1, s[6:7], 0x33
151; CIT-NEXT:    s_waitcnt lgkmcnt(0)
152; CIT-NEXT:    s_cmp_eq_u32 s0, s1
153; CIT-NEXT:    s_cselect_b64 s[0:1], -1, 0
154; CIT-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
155; CIT-NEXT:    s_cbranch_vccnz .LBB1_2
156; CIT-NEXT:  ; %bb.1: ; %bb0
157; CIT-NEXT:    s_mov_b32 s3, 0x100f000
158; CIT-NEXT:    s_mov_b32 s2, -1
159; CIT-NEXT:    v_mov_b32_e32 v0, 0
160; CIT-NEXT:    buffer_store_dword v0, off, s[0:3], 0
161; CIT-NEXT:    s_waitcnt vmcnt(0)
162; CIT-NEXT:  .LBB1_2: ; %bb1
163; CIT-NEXT:    s_endpgm
164;
165; CIH-LABEL: is_local_sgpr:
166; CIH:       ; %bb.0:
167; CIH-NEXT:    s_load_dword s0, s[6:7], 0x1
168; CIH-NEXT:    s_load_dword s1, s[6:7], 0x33
169; CIH-NEXT:    s_waitcnt lgkmcnt(0)
170; CIH-NEXT:    s_cmp_eq_u32 s0, s1
171; CIH-NEXT:    s_cselect_b64 s[0:1], -1, 0
172; CIH-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
173; CIH-NEXT:    s_cbranch_vccnz .LBB1_2
174; CIH-NEXT:  ; %bb.1: ; %bb0
175; CIH-NEXT:    v_mov_b32_e32 v0, 0
176; CIH-NEXT:    flat_store_dword v[0:1], v0
177; CIH-NEXT:    s_waitcnt vmcnt(0)
178; CIH-NEXT:  .LBB1_2: ; %bb1
179; CIH-NEXT:    s_endpgm
180;
181; SI-LABEL: is_local_sgpr:
182; SI:       ; %bb.0:
183; SI-NEXT:    s_load_dword s0, s[8:9], 0x1
184; SI-NEXT:    s_load_dword s1, s[8:9], 0x33
185; SI-NEXT:    s_waitcnt lgkmcnt(0)
186; SI-NEXT:    s_cmp_eq_u32 s0, s1
187; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
188; SI-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
189; SI-NEXT:    s_cbranch_vccnz .LBB1_2
190; SI-NEXT:  ; %bb.1: ; %bb0
191; SI-NEXT:    s_mov_b32 s3, 0x100f000
192; SI-NEXT:    s_mov_b32 s2, -1
193; SI-NEXT:    v_mov_b32_e32 v0, 0
194; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
195; SI-NEXT:    s_waitcnt vmcnt(0)
196; SI-NEXT:  .LBB1_2: ; %bb1
197; SI-NEXT:    s_endpgm
198;
199; CI-SDAG-LABEL: is_local_sgpr:
200; CI-SDAG:       ; %bb.0:
201; CI-SDAG-NEXT:    s_load_dword s0, s[8:9], 0x1
202; CI-SDAG-NEXT:    s_load_dword s1, s[8:9], 0x33
203; CI-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
204; CI-SDAG-NEXT:    s_cmp_eq_u32 s0, s1
205; CI-SDAG-NEXT:    s_cselect_b64 s[0:1], -1, 0
206; CI-SDAG-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
207; CI-SDAG-NEXT:    s_cbranch_vccnz .LBB1_2
208; CI-SDAG-NEXT:  ; %bb.1: ; %bb0
209; CI-SDAG-NEXT:    v_mov_b32_e32 v0, 0
210; CI-SDAG-NEXT:    flat_store_dword v[0:1], v0
211; CI-SDAG-NEXT:    s_waitcnt vmcnt(0)
212; CI-SDAG-NEXT:  .LBB1_2: ; %bb1
213; CI-SDAG-NEXT:    s_endpgm
214;
215; GFX9-SDAG-LABEL: is_local_sgpr:
216; GFX9-SDAG:       ; %bb.0:
217; GFX9-SDAG-NEXT:    s_load_dword s2, s[8:9], 0x4
218; GFX9-SDAG-NEXT:    s_mov_b64 s[0:1], src_shared_base
219; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
220; GFX9-SDAG-NEXT:    s_cmp_eq_u32 s2, s1
221; GFX9-SDAG-NEXT:    s_cselect_b64 s[0:1], -1, 0
222; GFX9-SDAG-NEXT:    s_andn2_b64 vcc, exec, s[0:1]
223; GFX9-SDAG-NEXT:    s_cbranch_vccnz .LBB1_2
224; GFX9-SDAG-NEXT:  ; %bb.1: ; %bb0
225; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0
226; GFX9-SDAG-NEXT:    global_store_dword v[0:1], v0, off
227; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
228; GFX9-SDAG-NEXT:  .LBB1_2: ; %bb1
229; GFX9-SDAG-NEXT:    s_endpgm
230;
231; CI-GISEL-LABEL: is_local_sgpr:
232; CI-GISEL:       ; %bb.0:
233; CI-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
234; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
235; CI-GISEL-NEXT:    s_load_dword s0, s[8:9], 0x33
236; CI-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
237; CI-GISEL-NEXT:    s_cmp_lg_u32 s1, s0
238; CI-GISEL-NEXT:    s_cbranch_scc1 .LBB1_2
239; CI-GISEL-NEXT:  ; %bb.1: ; %bb0
240; CI-GISEL-NEXT:    v_mov_b32_e32 v0, 0
241; CI-GISEL-NEXT:    flat_store_dword v[0:1], v0
242; CI-GISEL-NEXT:    s_waitcnt vmcnt(0)
243; CI-GISEL-NEXT:  .LBB1_2: ; %bb1
244; CI-GISEL-NEXT:    s_endpgm
245;
246; GFX9-GISEL-LABEL: is_local_sgpr:
247; GFX9-GISEL:       ; %bb.0:
248; GFX9-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
249; GFX9-GISEL-NEXT:    s_mov_b64 s[2:3], src_shared_base
250; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
251; GFX9-GISEL-NEXT:    s_cmp_lg_u32 s1, s3
252; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB1_2
253; GFX9-GISEL-NEXT:  ; %bb.1: ; %bb0
254; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0
255; GFX9-GISEL-NEXT:    global_store_dword v[0:1], v0, off
256; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
257; GFX9-GISEL-NEXT:  .LBB1_2: ; %bb1
258; GFX9-GISEL-NEXT:    s_endpgm
259;
260; GFX10-LABEL: is_local_sgpr:
261; GFX10:       ; %bb.0:
262; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
263; GFX10-NEXT:    s_mov_b64 s[2:3], src_shared_base
264; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
265; GFX10-NEXT:    s_cmp_lg_u32 s1, s3
266; GFX10-NEXT:    s_cbranch_scc1 .LBB1_2
267; GFX10-NEXT:  ; %bb.1: ; %bb0
268; GFX10-NEXT:    v_mov_b32_e32 v0, 0
269; GFX10-NEXT:    global_store_dword v[0:1], v0, off
270; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
271; GFX10-NEXT:  .LBB1_2: ; %bb1
272; GFX10-NEXT:    s_endpgm
273;
274; GFX11-LABEL: is_local_sgpr:
275; GFX11:       ; %bb.0:
276; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
277; GFX11-NEXT:    s_mov_b64 s[2:3], src_shared_base
278; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
279; GFX11-NEXT:    s_cmp_lg_u32 s1, s3
280; GFX11-NEXT:    s_cbranch_scc1 .LBB1_2
281; GFX11-NEXT:  ; %bb.1: ; %bb0
282; GFX11-NEXT:    v_mov_b32_e32 v0, 0
283; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
284; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
285; GFX11-NEXT:  .LBB1_2: ; %bb1
286; GFX11-NEXT:    s_endpgm
287  %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
288  br i1 %val, label %bb0, label %bb1
289
290bb0:
291  store volatile i32 0, ptr addrspace(1) undef
292  br label %bb1
293
294bb1:
295  ret void
296}
297
298!llvm.module.flags = !{!0}
299!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
300;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
301; CI: {{.*}}
302; GFX10-GISEL: {{.*}}
303; GFX11-GISEL: {{.*}}
304; SI-SDAG: {{.*}}
305